# Training Napoleon AI named Brumaire

In [None]:
import torch
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

from brumaire.model import BrumaireHParams, BrumaireController
from brumaire.agent import RandomAgent, BrumaireAgent
from brumaire.session import Game

## Hyperparameters

In [None]:
EPISODE_NUM = 500

TRAIN_EPOCH = 3
BATCH_SIZE = 300
TEST_SIZE = 100

EVAL_BOARD_NUM = 10000
EVAL_INTERVALS = 5

AGENT_SWITCH_INTERVALS = 1000

DECL_L1_NODE = 4000
DECL_L2_NODE = 2000

DECL_ITA = 0.001
DECL_CLIP_GRAD = 10.0

L1_NODE = 4000
L2_NODE = 2000
L3_NODE = 1000

ITA = 0.0005
GAMMA = 0.97
CLIP_GRAD = 10.0

EPSILON = 0.8
EPSILON_DEC_RATE = 0.99

RUN_NAME = f"trial-{datetime.now().strftime('%Y-%m-%dT%H-%M-%S.%f')}"

Note hyperparameters in a log. This can be viewed with Tensorboard.

In [None]:
writer = SummaryWriter(f"./runs/{RUN_NAME}")

h_params = BrumaireHParams()
h_params.decl_l1_node = DECL_L1_NODE
h_params.decl_l2_node = DECL_L2_NODE
h_params.decl_ita = DECL_ITA
h_params.decl_clip_grad = DECL_CLIP_GRAD
h_params.l1_node = L1_NODE
h_params.l2_node = L2_NODE
h_params.l3_node = L3_NODE
h_params.ita = ITA
h_params.gamma = GAMMA
h_params.clip_grad = CLIP_GRAD
h_params.write_summary(writer)

Use a CUDA device if available.

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Setup an agent and its model

In [None]:
controller = BrumaireController(h_params, device, writer)
agent = BrumaireAgent(controller, epsilon=EPSILON)
opponent_agent = RandomAgent()

In [None]:
AGENTS = [
    agent,
    opponent_agent,
    opponent_agent,
    opponent_agent,
    opponent_agent
]

## Play and train the model

In [None]:
for ep in range(EPISODE_NUM):
    game = Game(BATCH_SIZE + TEST_SIZE, AGENTS)
    game.decide_napoleon()
    game.discard_additional_cards()
    for idx in range(10):
        game.trick(idx)
    game.check_result()

    writer.add_scalar("epsilon", agent.epsilon, controller.global_step)

    controller.train_decl(game.recorder, BATCH_SIZE, TEST_SIZE, epoch=TRAIN_EPOCH)
    controller.train(game.recorder, BATCH_SIZE, TEST_SIZE, epoch=TRAIN_EPOCH)

    agent.epsilon *= EPSILON_DEC_RATE

    if (ep + 1) % EVAL_INTERVALS == 0:
        game = Game(EVAL_BOARD_NUM, AGENTS, log_enabled=True)
        game.decide_napoleon()
        game.discard_additional_cards()
        for idx in range(10):
            game.trick(idx)
        game.check_result()

        reward = np.sum(np.sum(game.recorder.rewards, axis=1), axis=1)[0] / EVAL_BOARD_NUM
        win_rate = np.sum(game.recorder.winners, axis=1)[0] / EVAL_BOARD_NUM
        total_win_rate = np.sum(game.recorder.winners) / EVAL_BOARD_NUM / 5

        writer.add_scalar("eval/reward", reward, controller.global_step)
        writer.add_scalar("eval/win rate", win_rate, controller.global_step)
        writer.add_scalar("eval/win rate diff", win_rate - total_win_rate, controller.global_step)

    if (ep + 1) % AGENT_SWITCH_INTERVALS == 0:
        opponent_controller = BrumaireController(h_params, device, None)
        opponent_controller.copy_from_other(controller)
        opponent_agent = BrumaireAgent(opponent_controller)
        AGENTS = [
            agent,
            opponent_agent,
            opponent_agent,
            opponent_agent,
            opponent_agent
        ]

## Save the model

In [None]:
controller.save(f"./runs/{RUN_NAME}")