# AlphaZero Algorithm - Testing

This notebook was built to conduct experiences on the AlphaZero Algorithm and better understand its implementation details

# Import libraries / modules

In [1]:
import time

import matplotlib.pyplot as plt
import numpy as np

import games_mod # Games
import policy_mod # neural network
from play_mod import Play #functionalities of game
import training_mod #neural network training
from replay_buffer_dict import ReplayBuffer #centralized buffer
from utils import DotDict #other utilities
from log_data import LogData #logging class for monitoring purposes

# Game, Training, Competition, Benchmark and Play Settings

In [2]:
# Game settings
game_settings = DotDict({
    "board_size": (3,3),
    "N": 3
})

# Self-play training settings
game_training_settings = DotDict({
    "generations": 100,
    "self_play_iterations": 50,
    "data_augmentation_times": 1
})
# alpha = 10 / average legal moves 
# https://medium.com/oracledevs/lessons-from-alphazero-part-3-parameter-tweaking-4dceb78ed1e5 

# Self-play training settings
mcts_settings = DotDict({
    "explore_steps": 50,
    "temp": 1,
    "dir_enabled": True,
    "dir_eps": 0.25,
    "dir_alpha": 2.0,
})

# neural network settings
nn_training_settings = DotDict({
    "load_policy": False,
    "policy_path": "ai_ckp.pth",
    "ckp_folder":"ckp",
    "lr": .005, 
    "weight_decay": 1.e-4,
    "buffer_size_target": 1000,
    "n_epochs": 1,
    "batch_size": 50
})

benchmark_competition_settings = DotDict({
    "compet_freq":0,
    "compet_rounds": 10,
    "net_compet_threshold": 0,
    "benchmark_freq": 5,
    "benchmark_rounds": 50,
    "mcts_iterations": 1000,
    "mcts_random_moves":1
})

# play settings
play_settings = DotDict({
    "explore_steps": 50,
    "temperature": 0.01                         
})

# Training the AI

In [3]:
log_data = LogData()
log_data.add_chart("nn_loss", ["nn_loss.csv", ['iter', 'loss', 'value_loss', 'prob_loss']])
log_data.add_chart("buffer", ["buffer.csv", ['iter', 'wins', 'losses', 'draws']])
log_data.add_chart("compet", ["compet.csv",['iter', 'scores']])

game=games_mod.ConnectN(game_settings)

policy = policy_mod.Policy(nn_training_settings.policy_path, 
                           nn_training_settings, 
                           log_data)
policy.save_weights()

buffer = ReplayBuffer(nn_training_settings.buffer_size_target, 
                      nn_training_settings.batch_size, 
                      log_data)

In [4]:
t0 = time.time()
alpha_0 = training_mod.AlphaZeroTraining(
    game_settings, 
    game_training_settings,
    mcts_settings,
    nn_training_settings,
    benchmark_competition_settings,
    play_settings,
    policy,
    log_data)
alpha_0.training_pipeline(buffer)
t1 = time.time()
print (t1 - t0)

KeyboardInterrupt: 

50

# Testing some positions

In [9]:
import torch
import numpy as np
import policy_mod  # neural network

def test_final_positions(game_state):

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    frame = torch.tensor(game_state, dtype=torch.float, device=device).unsqueeze(0).unsqueeze(0)
    policy_path = "ckp/ai_ckp.pth"
    policy = policy_mod.Policy(policy_path)
    policy.load_weights(policy_path)
    print (frame)
    v, p = policy.forward_batch(frame)
    print("Probabilities = {}; Values = {}".format(p, v))
    

In [10]:
game_state1 = np.array([[-1, 1, -1], [0, 1, 0], [0, 0, 0]])
test_final_positions (game_state1)

tensor([[[[-1.,  1., -1.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 3.4858e-06,  3.4858e-06,  3.4858e-06,  3.6218e-02,  3.4858e-06,
          4.3896e-02,  6.2306e-03,  9.1239e-01,  1.2473e-03]]); Values = tensor([[ 0.9937]])


In [11]:
game_state2 = np.array([[-1, 1, -1], [0, 1, -1], [0, 0, 0]])
test_final_positions (game_state2)

tensor([[[[-1.,  1., -1.],
          [ 0.,  1., -1.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 1.0748e-06,  1.0748e-06,  1.0748e-06,  7.5641e-03,  1.0748e-06,
          1.0748e-06,  1.1053e-02,  9.7835e-01,  3.0257e-03]]); Values = tensor([[ 0.7763]])


## Game with 2nd position not in the center

In [12]:
game_state3 = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]])
test_final_positions (game_state3)

tensor([[[[ 0.,  0.,  0.],
          [ 0.,  0.,  0.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 0.1330,  0.0343,  0.1149,  0.0498,  0.1819,  0.0421,  0.2458,
          0.0524,  0.1457]]); Values = tensor(1.00000e-02 *
       [[-2.8003]])


In [13]:
game_state4 = np.array([[-1, 0, 0], [0, 0, 0], [0, 0, 0]])
test_final_positions (game_state4)

tensor([[[[-1.,  0.,  0.],
          [ 0.,  0.,  0.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 0.0004,  0.1201,  0.1103,  0.1005,  0.3945,  0.0353,  0.1392,
          0.0302,  0.0695]]); Values = tensor([[-0.3695]])


In [14]:
game_state5 = np.array([[1, 0, 0], [0, 0, -1], [0, 0, 0]])
test_final_positions (game_state5)

tensor([[[[ 1.,  0.,  0.],
          [ 0.,  0., -1.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 0.0000,  0.0161,  0.3758,  0.0582,  0.1935,  0.0000,  0.3206,
          0.0050,  0.0306]]); Values = tensor([[ 0.9192]])


In [15]:
game_state6 = np.array([[-1, 0, -1], [0, 0, 1], [0, 0, 0]])
test_final_positions (game_state6)

tensor([[[[-1.,  0., -1.],
          [ 0.,  0.,  1.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 0.0001,  0.3831,  0.0001,  0.1559,  0.2523,  0.0001,  0.1125,
          0.0479,  0.0479]]); Values = tensor([[-0.2738]])


In [16]:
game_state7 = np.array([[1, -1, 1], [0, 0, -1], [0, 0, 0]])
test_final_positions (game_state7)

tensor([[[[ 1., -1.,  1.],
          [ 0.,  0., -1.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 0.0000,  0.0000,  0.0000,  0.0097,  0.7339,  0.0000,  0.2310,
          0.0051,  0.0201]]); Values = tensor([[ 0.9843]])


In [17]:
game_state8 = np.array([[-1, 1, -1], [0, -1, 1], [0, 0, 0]])
test_final_positions (game_state8)

tensor([[[[-1.,  1., -1.],
          [ 0., -1.,  1.],
          [ 0.,  0.,  0.]]]])
Probabilities = tensor([[ 0.0001,  0.0001,  0.0001,  0.1038,  0.0001,  0.0001,  0.3113,
          0.1818,  0.4026]]); Values = tensor([[-0.7754]])



# New MCTS

In [18]:
from competition import match_net_mcts
from utils import DotDict #other utilities

game_settings = DotDict({
    "board_size": (3,3),
    "N": 3
})
'''
game=games_mod.ConnectN(game_settings)
policy = policy_mod.Policy(nn_training_settings.policy_path, 
                           nn_training_settings, 
                           log_data)
policy.save_weights()
'''

'\ngame=games_mod.ConnectN(game_settings)\npolicy = policy_mod.Policy(nn_training_settings.policy_path, \n                           nn_training_settings, \n                           log_data)\npolicy.save_weights()\n'

In [19]:
benchmark_rounds = 1
mcts_iterations = 5000
mcts_random_moves = 0

scores1 = match_net_mcts(
    game_settings,
    benchmark_rounds,
    mcts_iterations,
    mcts_random_moves,
)

In [20]:
scores1

1.0