## Set Up

In [1]:
from src.game_logic import Game
import math
import time
import numpy
import pandas
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Init

In [3]:
model = nn.Sequential(
    nn.Linear(16, 16),
    nn.ReLU(),
    nn.Linear(16, 16),
    nn.ReLU(),
    nn.Linear(16, 8),
    nn.ReLU(),
    nn.Linear(8, 4),
    nn.ReLU(),
).cuda()

In [4]:
optimizer = optim.RMSprop(model.parameters())

## Train

In [5]:
num_games = int(5e0)
batch_size = int(1e6)
discount = 0.999
eps_start = 0.75
eps_end = 0.005
eps_decay = 75

In [None]:
def _train_on_game(index, game,):
    while not game.game_over():
        _train_on_game()

In [9]:
def train(model, loss_func, optimizer, num_games, batch_size, discount, eps_func):
    start_time = time.time()
    for index in range(num_games):
        game = Game(batch_size, 4)

        game_time = time.time() - start_time
        mean = float(game.score.float().mean())
        std = float(game.score.float().std())
        yield index, game_time, mean, std

In [18]:
record = pandas.DataFrame(numpy.zeros((num_games, 3)), columns=['Time (s)', 'Mean', 'Std'])
session = train(model, F.smooth_l1_loss, optimizer, num_games, batch_size, discount, eps_func)
for index, game_time, mean, std in session:
    print(game_time, mean, std)
    record.loc[index] = [game_time, float(mean), float(std)]
record.round(2)

24.23522973060608 tensor(1040.1421, device='cuda:0') tensor(511.7341, device='cuda:0')
45.5584921836853 tensor(1036.5852, device='cuda:0') tensor(509.4788, device='cuda:0')
69.16304731369019 tensor(1032.3250, device='cuda:0') tensor(508.7476, device='cuda:0')
93.71033763885498 tensor(1110.8843, device='cuda:0') tensor(543.5995, device='cuda:0')
116.52382326126099 tensor(1074.1494, device='cuda:0') tensor(501.5773, device='cuda:0')


Unnamed: 0,Time,Mean,Std
0,24.23523,1040.14209,511.7341
1,45.558492,1036.585205,509.47876
2,69.163047,1032.324951,508.747559
3,93.710338,1110.884277,543.599548
4,116.523823,1074.149414,501.577301


In [19]:
record.round(2)

Unnamed: 0,Time,Mean,Std
0,24.24,1040.14,511.73
1,45.56,1036.59,509.48
2,69.16,1032.32,508.75
3,93.71,1110.88,543.6
4,116.52,1074.15,501.58


## Play a Game

In [13]:
def play_game(game):
    while not game.game_over():
        # Calculate q value for each action from state
        state_values = model(game.state.flatten(1).float())
        # Flag futile actions
        state_values[game.available_actions().logical_not()] = -1
        # Choose action with max q_value
        actions = state_values.max(1)[1]
        # Do Action
        game.do_action(actions.to(torch.int8), 1)

In [14]:
game = Game(int(1e6), 4)
%time play_game(game)

CPU times: user 8.47 s, sys: 3.98 ms, total: 8.47 s
Wall time: 8.49 s


In [15]:
game.score.float().mean()

tensor(811.8593, device='cuda:0')

In [16]:
game.score.float().std()

tensor(508.4912, device='cuda:0')

In [18]:
game.score.max()

tensor(5848, device='cuda:0', dtype=torch.int32)