# Training Napoleon AI named Brumaire

In [1]:
import torch
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

from brumaire.model import BrumaireHParams, BrumaireController
from brumaire.agent import RandomAgent, BrumaireAgent
from brumaire.session import Game

## Hyperparameters

In [2]:
BOARD_NUM = 5000
EPISODE_NUM = 20
TRAIN_EPOCH = 10

BATCH_RATE = 0.7
TEST_RATE = 0.3

LINEAR1_NODE_NUM = 4000
LINEAR2_NODE_NUM = 2000
LINEAR3_NODE_NUM = 1000

ITA = 0.0005
GAMMA = 0.95
CLIP_GRAD = 10.0
EPSILON = 0.8
EPSILON_DEC_RATE = 0.8

batch_size = int(BOARD_NUM * BATCH_RATE)
test_size = int(BOARD_NUM * TEST_RATE)

Note hyperparameters in a log. This can be viewed with Tensorboard.

In [3]:
writer = SummaryWriter(f"./runs/trial-{datetime.now().strftime('%Y-%m-%dT%H-%M-%S.%f')}")

h_params = BrumaireHParams()
h_params.linear1_node_num = LINEAR1_NODE_NUM
h_params.linear2_node_num = LINEAR2_NODE_NUM
h_params.linear3_node_num = LINEAR3_NODE_NUM
h_params.ita = ITA
h_params.gamma = GAMMA
h_params.clip_grad = CLIP_GRAD
h_params.write_summary(writer)

Use a CUDA device if available.

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Setup an agent and its model

In [5]:

controller = BrumaireController(h_params, device, writer)
agent = BrumaireAgent(controller, epsilon=EPSILON)

In [6]:
AGENTS = [
    agent,
    RandomAgent(),
    RandomAgent(),
    RandomAgent(),
    RandomAgent()
]

## Play and train the model

In [7]:
for _ in range(EPISODE_NUM):
    game = Game(BOARD_NUM, AGENTS, log_enabled=True)
    game.decide_napoleon()
    game.discard_additional_cards()
    for idx in range(10):
        game.trick(idx)
    game.check_result()

    reward = np.sum(np.sum(game.recorder.rewards, axis=1), axis=1)[0] / BOARD_NUM
    win_rate = np.sum(game.recorder.winners, axis=1)[0] / BOARD_NUM

    writer.add_scalar("reward", reward, controller.global_step)
    writer.add_scalar("win rate", reward, controller.global_step)
    writer.add_scalar("epsilon", agent.epsilon, controller.global_step)

    controller.train(game.recorder, batch_size, test_size, epoch=TRAIN_EPOCH)
    agent.epsilon *= EPSILON_DEC_RATE