# Trained-Model Evaluation

In [1]:
import torch
import numpy as np

from brumaire.model import BrumaireHParams, BrumaireController
from brumaire.agent import RandomAgent, BrumaireAgent
from brumaire.session import Game

## Hyperparameters

In [2]:
EVAL_BOARD_NUM = 10000

DECL_L1_NODE = 1000
DECL_L2_NODE = 1000

L1_NODE = 4000
L2_NODE = 2000
L3_NODE = 1000

SAVED_DIR = "./runs/trial-2024-02-21T16-46-28.860778/"

In [3]:
h_params = BrumaireHParams()
h_params.decl_l1_node = DECL_L1_NODE
h_params.decl_l2_node = DECL_L2_NODE
h_params.l1_node = L1_NODE
h_params.l2_node = L2_NODE
h_params.l3_node = L3_NODE

Use a CUDA device if available.

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Setup

Load a trained model from the file and make a new agent that follows the model.

In [5]:
controller = BrumaireController(h_params, device, None)
controller.load(SAVED_DIR)
agent = BrumaireAgent(controller)

Choose opponent agents.

In [6]:
opponent_agent = RandomAgent()
AGENTS = [
    agent,
    opponent_agent,
    opponent_agent,
    opponent_agent,
    opponent_agent
]

## Evaluation

In [7]:
game = Game(EVAL_BOARD_NUM, AGENTS, log_enabled=True)
game.decide_napoleon()
game.discard_additional_cards()
for idx in range(10):
    game.trick(idx)
game.check_result()

An average of rewards the agent earns.

In [8]:
reward = np.sum(np.sum(game.recorder.rewards, axis=1), axis=1)[0] / EVAL_BOARD_NUM
print(reward)

0.8528840000000124


A win rate of the agent.

In [9]:
win_rate = np.sum(game.recorder.winners, axis=1)[0] / EVAL_BOARD_NUM
print(win_rate)

0.7571


An average of win rates of agents. This is not obvious because a number of winners of a game is not constant (1-4) on Napoleon.

In [10]:
total_win_rate = np.sum(game.recorder.winners) / EVAL_BOARD_NUM / 5
print(total_win_rate)

0.64074


A difference between a win rate of the agent and an average of others.

In [11]:
print(win_rate - total_win_rate)

0.11636000000000002
