In [None]:
pip install rlcard

In [None]:
pip install rlcard[torch]

# Imports

In [None]:
import rlcard
import numpy as np
import torch
import torch.nn as nn
import os
from copy import deepcopy

from datetime import datetime
from pytz import timezone

In [None]:
from rlcard.agents.random_agent import RandomAgent
from rlcard.models.gin_rummy_rule_models import GinRummyNoviceRuleAgent

from rlcard.agents.dqn_agent import DQNAgent
from rlcard.agents.dqn_agent import Estimator
from rlcard.agents.dqn_agent import EstimatorNetwork
from rlcard.agents.dqn_agent import Memory

from rlcard.utils import reorganize
from rlcard.utils import get_device
from rlcard.utils import set_seed
from rlcard.utils import Logger
from rlcard.utils import tournament
from rlcard.utils import plot_curve

from rlcard.games.gin_rummy.game import GinRummyGame
from rlcard.games.gin_rummy.player import GinRummyPlayer
from rlcard.games.gin_rummy.utils.move import ScoreNorthMove, ScoreSouthMove, KnockMove, GinMove, DeclareDeadHandMove
from rlcard.games.gin_rummy.utils.action_event import KnockAction, GinAction

# Get current time

In [None]:
def get_current_time():
    return datetime.now(timezone('America/Chicago')).strftime('%I:%M:%S %p')

# Train Environment

In [None]:
def save_model(agent, log_dir: str, model_name: str):
    agent_path = os.path.join(log_dir, f'{model_name}.pth')
    torch.save(agent, agent_path)
    print('Model saved in', agent_path)

In [None]:
def train_environment(env, seed, num_episodes, num_eval_games, evaluate_every, log_dir, model_name: str):
    # Seed numpy, torch, random
    set_seed(seed)

    # Start training
    algorithm = 'dqn'
    agent = env.agents[0]
    game_statistics = GameStatistics()
    with Logger(log_dir) as logger:
        for episode in range(num_episodes):
            # Generate data from the environment
            trajectories, payoffs = env.run(is_training=True)

            # Reorgananize the data to be state, action, reward, next_state, done
            trajectories = reorganize(trajectories, payoffs)

            # Feed transitions into agent memory, and train the agent
            # Here, we assume that DQN always plays the first position
            # and the other players play randomly (if any)
            for ts in trajectories[0]:
                agent.feed(ts)

            # Evaluate the performance. Play with random agents.
            if episode % evaluate_every == 0:
                logger.log(f"\n----------------------------------------")
                logger.log(f"\nEpisode: {episode}")
                logger.log_performance(env.timestep, tournament(env, num_eval_games)[0])
            
            # Save model?
            if episode > 0 and episode % 100000 == 0:
                save_model(agent=agent, log_dir=log_dir, model_name=model_name)

            # Keep game statistics
            game_statistics.update_statistics(game=env.game)

    # Show game statistics
    game_statistics.show_statistics()

    # Get the paths
    csv_path, fig_path = logger.csv_path, logger.fig_path

    # Plot the learning curve
    plot_curve(csv_path, fig_path, algorithm)

    # Save model
    save_model(agent=agent, log_dir=log_dir, model_name=model_name)

# Get DQNAgent

1. replay_memory_size = 200000 rather than 20K
2. mlp_layers = [64, 64, 64] rather than [64, 64]

In [None]:
def get_dqn_agent(env, model_name: str, log_dir: str):
    replay_memory_size = 200000
    replay_memory_init_size = 1000 #100
    update_target_estimator_every = 1000
    discount_factor=0.99
    epsilon_start=1.0
    epsilon_end=0.1
    epsilon_decay_steps=20000
    batch_size=128 #32
    train_every=100 #1
    mlp_layers = [128, 128, 128]
    learning_rate=0.00005 #0.00005
    device = get_device()

    agent_path = os.path.join(log_dir, f'{model_name}.pth')
    if os.path.exists(agent_path):
        train_id = 2 # increment for each train iteration
        dqn_agent = torch.load(agent_path, map_location=device)
        dqn_agent.set_device(device)
        #dqn_agent.train_every = 100  # wch: note this
        #dqn_agent.batch_size = 128  # wch: note this
        #dqn_agent.memory.batch_size = dqn_agent.batch_size  # wch: note this
        #dqn_agent.replay_memory_init_size = 1000  # wch: note this
        #dqn_agent.q_estimator.optimizer.param_groups[0]['lr'] = 0.000005
        #dqn_agent.target_estimator.optimizer.param_groups[0]['lr'] = 0.000005
        print(f'train_id={train_id}; agent={dqn_agent}')
        print(f'train_steps={dqn_agent.train_t} time_steps={dqn_agent.total_t}')
    else:
        train_id = 1 # reset for first train iteration
        num_actions = env.game.get_num_actions()
        state_shape = env.state_shape[0] # state_shape for player_id = 0
        dqn_agent = DQNAgent(
            replay_memory_size,
            replay_memory_init_size,
            update_target_estimator_every,
            discount_factor,
            epsilon_start,
            epsilon_end,
            epsilon_decay_steps,
            batch_size,
            num_actions,
            state_shape,
            train_every,
            mlp_layers,
            learning_rate,
            device=device)
    return dqn_agent

# Gin Rummy Scoring

In [None]:
def get_payoff_gin_rummy(player: GinRummyPlayer, game: GinRummyGame) -> float:
    ''' Get the payoff of player:
            a) 1.0 if player gins
            b) 1.0 if player knocks
            c) 0.0 otherwise
        The goal is to have the agent learn how to knock and gin.
    Returns:
        payoff (int or float): payoff for player (higher is better)
    '''
    going_out_action = game.round.going_out_action
    going_out_player_id = game.round.going_out_player_id
    if going_out_player_id == player.player_id and isinstance(going_out_action, KnockAction):
        payoff = 1
    elif going_out_player_id == player.player_id and isinstance(going_out_action, GinAction):
        payoff = 1
    else:
        payoff = 0
    return payoff

# Game Statistics

In [None]:
class GameStatistics(object):
    def __init__(self):
        super().__init__()
        self.game_count = 0
        self.knock_count = 0
        self.gin_count = 0
        self.declare_dead_hand_count = 0
        self.moves_per_game = 0

    def update_statistics(self, game):
        move_sheet = game.round.move_sheet
        self.game_count += 1
        if len(move_sheet) > 2:
            going_out_move = move_sheet[-3]
            if isinstance(going_out_move, KnockMove):
                if going_out_move.player.player_id == 0:
                    self.knock_count += 1
            elif isinstance(going_out_move, GinMove):
                if going_out_move.player.player_id == 0:
                    self.gin_count += 1
            elif isinstance(going_out_move, DeclareDeadHandMove):
                self.declare_dead_hand_count += 1
        self.moves_per_game += len(move_sheet)

    def show_statistics(self):
        print(f'game_count={self.game_count}')
        print(f'knock_count={self.knock_count}')
        print(f'gin_count={self.gin_count}')
        print(f'declare_dead_hand_count={self.declare_dead_hand_count}')
        print(f'average_moves_per_game={self.moves_per_game / self.game_count}')

# Train DQNAgent

In [None]:
def train_dqn_agent(log_dir: str, model_name: str, multiplier: int, seed):
    # Make the environment with seed
    gin_rummy_env = rlcard.make('gin-rummy', config={'seed': seed})
    num_actions = gin_rummy_env.game.get_num_actions()

    # patch gin rummy game
    gin_rummy_env.game.judge.scorer.get_payoff = get_payoff_gin_rummy # wch: Note this
    gin_rummy_env.game.settings.is_always_knock = True # wch: Note this

    # define opponent agents
    ginRummyNoviceRuleAgent = GinRummyNoviceRuleAgent()
    random_agent = RandomAgent(num_actions=num_actions)

    # Initialize the agents for the environment
    dqn_agent = get_dqn_agent(env=gin_rummy_env, model_name=model_name, log_dir=log_dir)
    agents = [dqn_agent, ginRummyNoviceRuleAgent]
    gin_rummy_env.set_agents(agents)

    # print info
    state_shape = gin_rummy_env.state_shape[0] # state_shape for player_id = 0
    action_shape = gin_rummy_env.action_shape
    print(f'log_dir={log_dir}')
    print(f'model_name={model_name}')
    print(f'state_shape={state_shape}')
    print(f'action_shape={action_shape}')
    print(f'num_actions={num_actions}')
    print(dqn_agent.q_estimator.qnet)

    #train the agent
    num_episodes = 1000 * multiplier
    evaluate_every = 20 * multiplier
    num_eval_games = 200 #100
    train_environment(gin_rummy_env,
         seed=seed,
         num_episodes=num_episodes,
         num_eval_games=num_eval_games,
         evaluate_every=evaluate_every,
         log_dir=log_dir,
         model_name=model_name)

In [None]:
my_log_dir='experiments/gin_rummy_dqn_result/'
my_model_name='model_gin_rummy_dqnagent'
my_multiplier = 10

In [None]:
%%time
print(f"Start: {get_current_time()}")
if torch.cuda.is_available():
    !nvidia-smi
train_dqn_agent(log_dir=my_log_dir, model_name=my_model_name, multiplier=my_multiplier, seed=None)