In [1]:
import os
import sys

# Do be able to import reinforcement_yatzy
lib_path = os.path.abspath('..')
if lib_path not in sys.path:
    sys.path.append(lib_path)

In [2]:
import numpy as np
import pandas as pd

from reinforcement_yatzy.nn_models.autoencoders.pca_encoder import PCAEncoder
from reinforcement_yatzy.reinforcement_agents.q_agent import DeepQYatzyPlayer
from reinforcement_yatzy.nn_models.xvariant_mlp import DiceSelector, EntrySelector, InvariantPoolingParams

In [3]:
scoreboard_dataset_path = os.path.join('..', 'datasets', '1_million_scoreboards.csv')
pca_encoder = PCAEncoder(
    pd.read_csv(scoreboard_dataset_path),
    n_components=5
)
 
dice_model = DiceSelector(
        n_dice=DeepQYatzyPlayer.NUM_DICE,
        dice_embed_dim=5,
        mlp_channels=[32, 16, 8, 4],
        scoreboard_encoder=pca_encoder,
)
entry_model = EntrySelector(
        n_dice = DeepQYatzyPlayer.NUM_DICE,
        n_entries = DeepQYatzyPlayer.NUM_ENTRIES,
        dice_pre_mlp_channels = [5, 5],
        scoreboard_encoder = pca_encoder,
        mlp_dims = [64, 32, 16, 8, 4],
)

agent = DeepQYatzyPlayer(
    gamma_dice=np.log(2) / 1000,
    gamma_entries=np.log(2) / 10_000,
    invalid_entry_factor=0.01,
    select_dice_model=dice_model,
    select_entry_model=entry_model,
    dice_buffer_size=1_000,
    entry_buffer_size=100,
    target_change_interval=1_000,
    batch_size=32,
    penalty_scratch=-30,
)

In [4]:
n_games = 100_000
print_interval = 1_000

scores = np.zeros([n_games])
for epoch in range(n_games):
    agent.play_game()
    scores[epoch] = agent.get_total_score()

    if epoch != 0 and epoch % print_interval == 0:
        print(f'Epoch {epoch} - Score: {np.mean(scores[epoch-print_interval : epoch])}')

Epoch 1000 - Score: 2.507
Epoch 2000 - Score: 2.52
Epoch 3000 - Score: 2.578
Epoch 4000 - Score: 2.451
Epoch 5000 - Score: 2.504
Epoch 6000 - Score: 2.531
Epoch 7000 - Score: 2.48
Epoch 8000 - Score: 2.469
Epoch 9000 - Score: 2.536
Epoch 10000 - Score: 2.605
Epoch 11000 - Score: 2.516
Epoch 12000 - Score: 2.536
Epoch 13000 - Score: 2.643
Epoch 14000 - Score: 2.592
Epoch 15000 - Score: 2.556
Epoch 16000 - Score: 2.586
Epoch 17000 - Score: 2.632
Epoch 18000 - Score: 2.548
Epoch 19000 - Score: 2.614
Epoch 20000 - Score: 2.648
Epoch 21000 - Score: 2.607
Epoch 22000 - Score: 2.706
Epoch 23000 - Score: 2.655
Epoch 24000 - Score: 2.688
Epoch 25000 - Score: 2.701
Epoch 26000 - Score: 2.654
Epoch 27000 - Score: 2.577
Epoch 28000 - Score: 2.631


KeyboardInterrupt: 