# AlphaZero Algorithm - Testing

This notebook was built to conduct experiences on the AlphaZero Algorithm and better understand its implementation details

# Import libraries / modules

In [1]:
import time

import matplotlib.pyplot as plt
import numpy as np

import games_mod # Games
import policy_mod # neural network
from play_mod import Play #functionalities of game
import training_mod #neural network training
from replay_buffer_dict import ReplayBuffer #centralized buffer
from utils import DotDict #other utilities
from log_data import LogData #logging class for monitoring purposes

# Game, Training, Competition, Benchmark and Play Settings

In [2]:
# Game settings
game_settings = DotDict({
    "board_size": (3,3),
    "N": 3,
    "discount_enabled": False
})

# Self-play training settings
game_training_settings = DotDict({
    "generations": 100,
    "self_play_iterations": 50,
    "data_augmentation_times": 1
})
# alpha = 10 / average legal moves 
# https://medium.com/oracledevs/lessons-from-alphazero-part-3-parameter-tweaking-4dceb78ed1e5 

# Self-play training settings
mcts_settings = DotDict({
    "explore_steps": 50,
    "temp": 1.0,
    "dir_enabled": True,
    "dir_eps": 0.25,
    "dir_alpha": 2.0,
})

# neural network settings
nn_training_settings = DotDict({
    "load_policy": False,
    "policy_path": "ai_ckp.pth",
    "ckp_folder":"../ckp",
    "lr": .005, 
    "weight_decay": 1.e-4,
    "buffer_size_target": 1000,
    "n_epochs": 1,
    "batch_size": 50
})
# set compet_freq at 0 for disabling the competition between current and trained network. 
# In this case the trained network replaces the current network at every generation

benchmark_competition_settings = DotDict({
    "compet_freq":0,
    "compet_rounds": 2,
    "net_compet_threshold": 0.0,
    "benchmark_freq": 5,
    "benchmark_rounds": 50,
    "mcts_iterations": 1000,
    "mcts_random_moves":0
})

# play settings
play_settings = DotDict({
    "explore_steps": 50,
    "temperature": 0.01                         
})

In [3]:
benchmark_competition_settings.compet_freq

0

# Training the AI

In [4]:
log_data = LogData()
log_data.add_chart("nn_loss", ["nn_loss.csv", ['iter', 'loss', 'value_loss', 'prob_loss']])
log_data.add_chart("buffer", ["buffer.csv", ['iter', 'wins', 'losses', 'draws']])
log_data.add_chart("compet", ["compet.csv",['iter', 'scores']])

game=games_mod.ConnectN(game_settings)

policy = policy_mod.Policy(nn_training_settings.policy_path, 
                           nn_training_settings, 
                           log_data)
policy.save_weights()

buffer = ReplayBuffer(nn_training_settings.buffer_size_target, 
                      nn_training_settings.batch_size, 
                      log_data)

In [5]:
t0 = time.time()
alpha_0 = training_mod.AlphaZeroTraining(
    game_settings, 
    game_training_settings,
    mcts_settings,
    nn_training_settings,
    benchmark_competition_settings,
    play_settings,
    policy,
    log_data)
alpha_0.training_pipeline(buffer)
t1 = time.time()
print (t1 - t0)

NameError: name 'add_to_buffer' is not defined

# Testing some positions

In [None]:
import torch
import numpy as np
import policy_mod  # neural network

def test_final_positions(game_state):

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    frame = torch.tensor(game_state, dtype=torch.float, device=device).unsqueeze(0).unsqueeze(0)
    policy_path = "ai_ckp.pth"
    policy = policy_mod.Policy(policy_path, nn_training_settings)
    policy.load_weights(policy_path)
    print (frame)
    v, p = policy.forward_batch(frame)
    print("Probabilities = {}; Values = {}".format(p, v))
    

In [None]:
game_state1 = np.array([[-1, 1, -1], [0, 1, 0], [0, 0, 0]])
test_final_positions (game_state1)

In [None]:
game_state2 = np.array([[-1, 1, -1], [0, 1, -1], [0, 0, 0]])
test_final_positions (game_state2)

## Game with 2nd position not in the center

In [None]:
game_state3 = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]])
test_final_positions (game_state3)

In [None]:
game_state4 = np.array([[-1, 0, 0], [0, 0, 0], [0, 0, 0]])
test_final_positions (game_state4)

In [None]:
game_state5 = np.array([[1, 0, 0], [0, 0, -1], [0, 0, 0]])
test_final_positions (game_state5)

In [None]:
game_state6 = np.array([[-1, 0, -1], [0, 0, 1], [0, 0, 0]])
test_final_positions (game_state6)

In [None]:
game_state7 = np.array([[1, -1, 1], [0, 0, -1], [0, 0, 0]])
test_final_positions (game_state7)

In [None]:
game_state8 = np.array([[-1, 1, -1], [0, -1, 1], [0, 0, 0]])
test_final_positions (game_state8)

# Testing Symetries

In [None]:
import torch
import numpy as np
def flip(x, dim):

    indices = [slice(None)] * x.dim()
    indices[dim] = torch.arange(
        x.size(dim) - 1, -1, -1, dtype=torch.long, device=x.device
    )
    return x[tuple(indices)]

In [None]:
t0 = lambda x: x
t1 = lambda x: x[:, ::-1].copy()
t2 = lambda x: x[::-1, :].copy()
t3 = lambda x: x[::-1, ::-1].copy()
t4 = lambda x: x.T
# TO DO
t5 = lambda x: x[:, ::-1].T.copy()
t6 = lambda x: x[::-1, :].T.copy()
t7 = lambda x: x[::-1, ::-1].T.copy()

tlist = [t0, t1, t2, t3, t4, t7]
tlist_half = [t0, t1, t2, t3]

# inverse transformations
t0inv = lambda x: x
t1inv = lambda x: flip(x, 1)
t2inv = lambda x: flip(x, 0)
t3inv = lambda x: flip(flip(x, 0), 1)
t4inv = lambda x: x.t()
# TO DO
t5inv = lambda x: flip(x, 1).t()
t6inv = lambda x: flip(x, 0).t()
t7inv = lambda x: flip(flip(x, 0), 1).t()

In [None]:
t, tinv = t7, t7inv

In [None]:
input_board = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
new_board = t(input_board)
new_board_tensor = torch.tensor(input_board)
prob = new_board_tensor.reshape(3, 3)
old_board = tinv(prob)

In [None]:
new_board, old_board

In [None]:
import main

In [None]:
main.launch()

In [None]:
arr1 = [4, 2, 3]
arr2 = [4, 2, 3]
arr3 = [1, 2, 3]
arr4 = [3, 2, 3]
p1 = 1
p2 = 2
p3 = 3
p4 = 4
list_arr = [[arr1, p1], [arr2,p2], [arr3,p3], [arr4,p4]]

In [None]:
new_list = []

for arr, p in list_arr:
    if arr i new_list:
        


In [None]:
import numpy as np

In [None]:
a = np.array([1,0,-1])

In [None]:
np.abs(a).sum()

In [None]:
0.9 ** 2