In [1]:
import random
import time
import torch
import numpy as np
import matplotlib.pyplot as plt
import rlcard
from rlcard.envs.registration import register, make
from IPython.display import clear_output
from IPython.display import display, HTML

from briscolaAI.briscola import BriscolaEnv
from briscolaAI.game import BriscolaGame
from briscolaAI.agent import BriscolaRuleAgent
from briscolaAI.human import HumanAgent
from briscolaAI.dqn_agent import DQNAgent
from briscolaAI.load_image import image

In [2]:
model = torch.load("./models/checkpoint_dqn_episode_94000.pt") 

In [3]:
agent = DQNAgent.from_checkpoint(model)


INFO - Restoring model from checkpoint...


In [4]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()
    
def rl_based_action(player, env, state):
    action_idx = player.action_if_win(env.game)
    if action_idx != None:
        action = player.hand.index(action_idx)
        would_be_winning = True
    else:
        Qvalues = softmax(player.predict(state))
        if np.max(Qvalues) >= 0.666 or len(env.game.dealer.deck) >= 10:
            action = np.argmax(Qvalues)
        else:
            action = random.choices(range(len(Qvalues)), Qvalues, k=1)[0]        
        would_be_winning = False
    return action, would_be_winning

def random_action(player, env):
    return player.hand.index(player.random_card())
    
def rule_based_action(player, env, epsilon = 0.0):
    return player.hand.index(player.rule_card(env.game, epsilon))

def human_action(player, env):
    return player.throw_card()

def run_eval_games(env, action_func, n, print_game = False):
    game_results = {"win": 0, "odds": 0, "loss": 0}
    points_results = []
    rule_not_learned = 0
    for _ in range(n):
        if print_game: print("\nNew Game\n")
        state, player_id = env.reset()
        if print_game: print(f"Briscola is {env.game.judger.briscola}\n")
        agent_id = env.game.agent_id
        would_win_times = 0
        i = 0
        while True:
            player = env.game.players[player_id]
            if player_id == agent_id:
                action, would_win = rl_based_action(player, env, state)
                if print_game: print(f"AI plays:")
                display(HTML(f"""
        			<div class="row">
        					<img src={image(str(player.hand[action]))} style="width:10%"> </img>
        			</div>
        			"""))
                would_win_times += would_win
            else:
                action = action_func(player, env)
                #if print_game: print(f"Human plays: {image(str(player.hand[action]))}\n")
            i += 1
            if i % 2 == 0:
                time.sleep(2)
                clear_output(wait=False)
                time.sleep(0.1)
                print(f"Briscola is {env.game.judger.briscola}\n")
            next_state, player_id = env.step(action)
            if player_id is None: break
            state = next_state 
        winner = env.game.winner
        points = env.game.judger.compute_points(env.game.players[env.game.agent_id].pile)
        other_points = env.game.judger.compute_points(env.game.players[(env.game.agent_id - 1) % 2].pile)
        if print_game:
            print(f"AI points: {points}")
            print(f"Human points: {other_points}\n")
            print(f"\nWinner is {'AI' if points > 60 else ('None' if points == 60 else 'Human')}")
        rule_not_learned += points <= 60 and would_win_times >= 1
        points_results.append(points)
        
        result_k = "win" if winner == (agent_id,) else ("odds" if len(winner) == 2 else "loss")
        game_results[result_k] += 1
    print(rule_not_learned)
    return game_results, points_results

def play_agent(env):
    human_results, human_points_results = run_eval_games(env, human_action, 10000,  print_game = True)
    print_results(human_results, "human")
    return human_points_results

def print_results(results, player_type):
	str_results = " ".join(f"{k}: {results[k]}" for k in results)
	print(f"agent vs. {player_type} -> {str_results}")

In [None]:
try:
	register(
    	env_id='briscola',
    	entry_point='briscolaAI.briscola:BriscolaEnv',
	)
except ValueError:
	pass

config = {
	"players": [
		agent, 
		HumanAgent(1, np.random)
	]
}

env = make('briscola', config = config)

play_agent(env)

Briscola is 7Coins

AI points: 96
Human points: 24


Winner is AI

New Game

Briscola is 6Batons

Choose a card:
