# AlphaZero Algorithm - Testing

This notebook was built to conduct experiences on the AlphaZero Algorithm and better understand its implementation details

# Import libraries / modules

In [1]:
import time

import matplotlib.pyplot as plt
import numpy as np

import games_mod # Games
import policy_mod # neural network
from play_mod import Play #functionalities of game
import training_mod #neural network training
from replay_buffer_dict import ReplayBuffer #centralized buffer
from utils import DotDict #other utilities
from log_data import LogData #logging class for monitoring purposes

# Game, Training, Competition, Benchmark and Play Settings

In [2]:
# Game settings
game_settings = DotDict({
    "board_size": (3,3),
    "N": 3
})

# Self-play training settings
game_training_settings = DotDict({
    "generations": 100,
    "self_play_iterations": 50,
    "data_augmentation_times": 1
})
# alpha = 10 / average legal moves 
# https://medium.com/oracledevs/lessons-from-alphazero-part-3-parameter-tweaking-4dceb78ed1e5 

# Self-play training settings
mcts_settings = DotDict({
    "explore_steps": 50,
    "temp": 1.0,
    "dir_enabled": True,
    "dir_eps": 0.25,
    "dir_alpha": 2.0,
})

# neural network settings
nn_training_settings = DotDict({
    "load_policy": False,
    "policy_path": "ai_ckp.pth",
    "ckp_folder":"../ckp",
    "lr": .005, 
    "weight_decay": 1.e-4,
    "buffer_size_target": 1000,
    "n_epochs": 1,
    "batch_size": 50
})
# set compet_freq at 0 for disabling the competition between current and trained network. 
# In this case the trained network replaces the current network at every generation

benchmark_competition_settings = DotDict({
    "compet_freq":0,
    "compet_rounds": 2,
    "net_compet_threshold": 0.0,
    "benchmark_freq": 5,
    "benchmark_rounds": 50,
    "mcts_iterations": 1000,
    "mcts_random_moves":0
})

# play settings
play_settings = DotDict({
    "explore_steps": 50,
    "temperature": 0.01                         
})

# Training the AI

In [3]:
log_data = LogData()
log_data.add_chart("nn_loss", ["nn_loss.csv", ['iter', 'loss', 'value_loss', 'prob_loss']])
log_data.add_chart("buffer", ["buffer.csv", ['iter', 'wins', 'losses', 'draws']])
log_data.add_chart("compet", ["compet.csv",['iter', 'scores']])

game=games_mod.ConnectN(game_settings)

policy = policy_mod.Policy(nn_training_settings.policy_path, 
                           nn_training_settings, 
                           log_data)
policy.save_weights()

buffer = ReplayBuffer(nn_training_settings.buffer_size_target, 
                      nn_training_settings.batch_size, 
                      log_data)

In [4]:
t0 = time.time()
alpha_0 = training_mod.AlphaZeroTraining(
    game_settings, 
    game_training_settings,
    mcts_settings,
    nn_training_settings,
    benchmark_competition_settings,
    play_settings,
    policy,
    log_data)
alpha_0.training_pipeline(buffer)
t1 = time.time()
print (t1 - t0)

Generations:  14%|█▍        | 14/100 [03:48<18:12, 12.71s/it]Network replaced at generation 14
Generations:  19%|█▉        | 19/100 [05:26<17:23, 12.88s/it]Network replaced at generation 19
Generations:  24%|██▍       | 24/100 [07:03<16:24, 12.95s/it]Network replaced at generation 24
Generations:  24%|██▍       | 24/100 [07:23<23:25, 18.50s/it]


FileNotFoundError: [Errno 2] No such file or directory: 'ckp/ai_temp_ckp.pth'

# Testing some positions

In [7]:
import torch
import numpy as np
import policy_mod  # neural network

def test_final_positions(game_state):

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    frame = torch.tensor(game_state, dtype=torch.float, device=device).unsqueeze(0).unsqueeze(0)
    policy_path = "ai_ckp.pth"
    policy = policy_mod.Policy(policy_path, nn_training_settings)
    policy.load_weights(policy_path)
    print (frame)
    v, p = policy.forward_batch(frame)
    print("Probabilities = {}; Values = {}".format(p, v))
    

In [8]:
game_state1 = np.array([[-1, 1, -1], [0, 1, 0], [0, 0, 0]])
test_final_positions (game_state1)

tensor([[[[-1.,  1., -1.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 8.4970e-08,  8.4970e-08,  8.4970e-08,  1.2820e-03,  8.4970e-08,
          1.3472e-03,  7.9750e-04,  9.9490e-01,  1.6758e-03]]); Values = tensor([[ 0.8834]])


In [9]:
game_state2 = np.array([[-1, 1, -1], [0, 1, -1], [0, 0, 0]])
test_final_positions (game_state2)

tensor([[[[-1.,  1., -1.],
          [ 0.,  1., -1.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 1.0181e-07,  1.0181e-07,  1.0181e-07,  1.5234e-04,  1.0181e-07,
          1.0181e-07,  2.5640e-04,  9.9826e-01,  1.3306e-03]]); Values = tensor([[ 0.9417]])


## Game with 2nd position not in the center

In [10]:
game_state3 = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]])
test_final_positions (game_state3)

tensor([[[[ 0.,  0.,  0.],
          [ 0.,  0.,  0.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 0.1265,  0.0394,  0.1808,  0.0311,  0.3087,  0.0458,  0.0772,
          0.0497,  0.1408]]); Values = tensor([[-0.1421]])


In [11]:
game_state4 = np.array([[-1, 0, 0], [0, 0, 0], [0, 0, 0]])
test_final_positions (game_state4)

tensor([[[[-1.,  0.,  0.],
          [ 0.,  0.,  0.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 0.0011,  0.0643,  0.0552,  0.0646,  0.5669,  0.0362,  0.0459,
          0.0493,  0.1165]]); Values = tensor([[-0.6335]])


In [12]:
game_state5 = np.array([[1, 0, 0], [0, 0, -1], [0, 0, 0]])
test_final_positions (game_state5)

tensor([[[[ 1.,  0.,  0.],
          [ 0.,  0., -1.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 0.0004,  0.1166,  0.4792,  0.0829,  0.1865,  0.0004,  0.1082,
          0.0050,  0.0208]]); Values = tensor([[ 0.9842]])


In [13]:
game_state6 = np.array([[-1, 0, -1], [0, 0, 1], [0, 0, 0]])
test_final_positions (game_state6)

tensor([[[[-1.,  0., -1.],
          [ 0.,  0.,  1.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 0.0002,  0.4954,  0.0002,  0.1477,  0.1413,  0.0002,  0.0997,
          0.0775,  0.0378]]); Values = tensor([[-0.9286]])


In [14]:
game_state7 = np.array([[1, -1, 1], [0, 0, -1], [0, 0, 0]])
test_final_positions (game_state7)

tensor([[[[ 1., -1.,  1.],
          [ 0.,  0., -1.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 0.0000,  0.0000,  0.0000,  0.0092,  0.6929,  0.0000,  0.2636,
          0.0016,  0.0327]]); Values = tensor([[ 0.9997]])


In [15]:
game_state8 = np.array([[-1, 1, -1], [0, -1, 1], [0, 0, 0]])
test_final_positions (game_state8)

tensor([[[[-1.,  1., -1.],
          [ 0., -1.,  1.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 0.0001,  0.0001,  0.0001,  0.3398,  0.0001,  0.0001,  0.5126,
          0.1091,  0.0381]]); Values = tensor([[-0.8974]])


# Testing Symetries

In [34]:
import torch
import numpy as np
def flip(x, dim):

    indices = [slice(None)] * x.dim()
    indices[dim] = torch.arange(
        x.size(dim) - 1, -1, -1, dtype=torch.long, device=x.device
    )
    return x[tuple(indices)]

In [87]:
t0 = lambda x: x
t1 = lambda x: x[:, ::-1].copy()
t2 = lambda x: x[::-1, :].copy()
t3 = lambda x: x[::-1, ::-1].copy()
t4 = lambda x: x.T
# TO DO
t5 = lambda x: x[:, ::-1].T.copy()
t6 = lambda x: x[::-1, :].T.copy()
t7 = lambda x: x[::-1, ::-1].T.copy()

tlist = [t0, t1, t2, t3, t4, t7]
tlist_half = [t0, t1, t2, t3]

# inverse transformations
t0inv = lambda x: x
t1inv = lambda x: flip(x, 1)
t2inv = lambda x: flip(x, 0)
t3inv = lambda x: flip(flip(x, 0), 1)
t4inv = lambda x: x.t()
# TO DO
t5inv = lambda x: flip(x, 1).t()
t6inv = lambda x: flip(x, 0).t()
t7inv = lambda x: flip(flip(x, 0), 1).t()

In [100]:
t, tinv = t7, t7inv

In [101]:
input_board = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
new_board = t(input_board)
new_board_tensor = torch.tensor(input_board)
prob = new_board_tensor.reshape(3, 3)
old_board = tinv(prob)

In [102]:
new_board, old_board

(array([[9, 6, 3],
        [8, 5, 2],
        [7, 4, 1]]),
 tensor([[ 9,  6,  3],
         [ 8,  5,  2],
         [ 7,  4,  1]]))