In [1]:
import src
import statistics
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

## Game Engine

In [2]:
def play_random_game(game):
    game.new_game()
    while not game.game_over():
        if game.batch_size == 0:
            random = torch.rand(4, dtype=torch.float32, device=game.device)
            action = (game.available_actions() * random).argmax().to(torch.int8)
        else:
            random = torch.rand((game.batch_size, 4), dtype=torch.float32, device=game.device)
            action = (game.available_actions() * random).max(1)[1].to(torch.int8)
        game.do_action(action)
        game.add_tile()
    return game.score

### CPU

In [3]:
device = torch.device('cpu')

In [4]:
batch_size = 0
game = src.Game(batch_size, device=device)
%timeit play_random_game(game)

34.2 ms ± 3.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [5]:
batch_size = int(1e1)
game = src.Game(batch_size, device=device)
%timeit play_random_game(game)

39.4 ms ± 2.13 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [6]:
batch_size = int(1e2)
game = src.Game(batch_size, device=device)
%timeit play_random_game(game)

79.2 ms ± 1.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [7]:
batch_size = int(1e3)
game = src.Game(batch_size, device=device)
%timeit play_random_game(game)

372 ms ± 25.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
batch_size = int(1e4)
game = src.Game(batch_size, device=device)
%timeit play_random_game(game)

3.94 s ± 403 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### GPU

In [9]:
device = torch.device('cuda')

In [10]:
batch_size = 0
game = src.Game(batch_size, device=device)
%timeit play_random_game(game)

960 ms ± 309 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
batch_size = int(1e2)
game = src.Game(batch_size, device=device)
%timeit play_random_game(game)

1.7 s ± 106 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
batch_size = int(1e4)
game = src.Game(batch_size, device=device)
%timeit play_random_game(game)

2.45 s ± 132 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
batch_size = int(1e6)
game = src.Game(batch_size, device=device)
%timeit play_random_game(game)

## Agent Play

In [13]:
eps_start = 0
eps_end = 0
eps_decay = 1

In [14]:
def time_session(session):
    game_times = []
    last_game_time = 0
    for index, game_time, mean, std in session:
        game_times.append(game_time - last_game_time)
        last_game_time = game_time
    return statistics.mean(game_times), statistics.stdev(game_times)

In [15]:
def time_agent(agent, num_games, game, eps_start, eps_end, eps_decay, **kwargs):
    session = agent.init_training_session(num_games, game, eps_start, eps_end, eps_decay, **kwargs)
    mean, std = time_session(session)
    batch_size = game.batch_size
    if batch_size == 0:
        batch_size = 1
    inverse_mean = batch_size / mean
    inverse_std = batch_size * std / mean**2
    template = 'Agent can play {inverse_mean: .2f} +/- {inverse_std: .2f} games per sec'
    print(template.format(inverse_mean=inverse_mean, inverse_std=inverse_std))
    return inverse_mean, inverse_std

### CPU

In [16]:
device = torch.device('cpu')
value_func = src.value_func.RandomFunc(device)
agent = src.Agent(value_func)

In [17]:
batch_size = 0
num_games = 1000
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay);

Agent can play  10.29 +/-  3.23 games per sec


In [18]:
batch_size = int(1e1)
num_games = 300
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay);

Agent can play  62.17 +/-  10.23 games per sec


In [19]:
batch_size = int(1e2)
num_games = 100
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay);

Agent can play  334.21 +/-  28.64 games per sec


In [20]:
batch_size = int(1e3)
num_gaCPUAgentmes = 30
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay);

Agent can play  827.91 +/-  38.79 games per sec


In [21]:
batch_size = int(1e4)
num_games = 10
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay);

Agent can play  922.67 +/-  30.71 games per sec


### GPU

In [22]:
device = torch.device('cuda')
value_func = src.value_func.RandomFunc(device)
agent = src.Agent(value_func)

In [23]:
batch_size = 0
num_games = 10
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay);

Agent can play  0.40 +/-  0.12 games per sec


In [24]:
batch_size = int(1e2)
num_games = 10
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay);

Agent can play  18.63 +/-  3.06 games per sec


In [25]:
batch_size = int(1e4)
num_games = 10
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay);

Agent can play  1499.22 +/-  55.80 games per sec


In [26]:
batch_size = int(1e6)
num_games = 10
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay);

Agent can play  96295.83 +/-  7485.64 games per sec


## Neural Agent Play

### CPU

In [36]:
model = nn.Sequential(
    nn.Linear(16, 16),
    nn.Hardsigmoid(),
    nn.Linear(16, 8),
    nn.Hardsigmoid(),
    nn.Linear(8, 4),
    nn.Hardsigmoid(),
    nn.Linear(4, 2),
    nn.Hardsigmoid(),
    nn.Linear(2, 1),
    nn.Hardsigmoid(),
)
loss_func = F.smooth_l1_loss
optimizer = optim.Adam(model.parameters())

In [28]:
device = torch.device('cpu')
value_func = src.value_func.NNFunc(model, device)
agent = src.Agent(value_func)

In [29]:
batch_size = 0
num_games = 100
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay, loss_func=loss_func, optimizer=optimizer);

Agent can play  1.42 +/-  0.51 games per sec


In [30]:
batch_size = int(1e2)
num_games = 30
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay, loss_func=loss_func, optimizer=optimizer);

Agent can play  60.91 +/-  6.45 games per sec


In [31]:
batch_size = int(1e4)
num_games = 10
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay, loss_func=loss_func, optimizer=optimizer);

Agent can play  460.12 +/-  33.96 games per sec


### GPU

In [36]:
model = nn.Sequential(
    nn.Linear(16, 16),
    nn.Hardsigmoid(),
    nn.Linear(16, 8),
    nn.Hardsigmoid(),
    nn.Linear(8, 4),
    nn.Hardsigmoid(),
    nn.Linear(4, 2),
    nn.Hardsigmoid(),
    nn.Linear(2, 1),
    nn.Hardsigmoid(),
)
loss_func = F.smooth_l1_loss
optimizer = optim.Adam(model.parameters())

In [37]:
device = torch.device('cuda')
value_func = src.value_func.NNFunc(model, device)
agent = src.Agent(value_func)

In [38]:
batch_size = 0
num_games = 100
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay, loss_func=loss_func, optimizer=optimizer);

Agent can play  0.23 +/-  0.09 games per sec


In [39]:
batch_size = int(1e2)
num_games = 30
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay, loss_func=loss_func, optimizer=optimizer);

Agent can play  10.05 +/-  1.24 games per sec


In [40]:
batch_size = int(1e4)
num_games = 10
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay, loss_func=loss_func, optimizer=optimizer);

Agent can play  689.55 +/-  47.91 games per sec


In [41]:
batch_size = int(1e6)
num_games = 3
game = src.Game(batch_size, device=device)
time_agent(agent, num_games, game, eps_start, eps_end, eps_decay, loss_func=loss_func, optimizer=optimizer);

Agent can play  23361.03 +/-  695.68 games per sec
