# AlphaZero Algorithm - Testing

This notebook was built to conduct experiences on the AlphaZero Algorithm and better understand its implementation details

# Import libraries / modules

In [1]:
# Python libraries
import time
import random
from copy import copy
# z3rd party libraries
import matplotlib.pyplot as plt
import numpy as np

# Game-related libraries
import games_mod # Games
import policy_mod # neural network
from play_mod import Play
import training_mod
from game_utils import DotDict, policy_player_mcts, random_player, match_ai, network_only
from log_data import LogData
import plain_mcts

# Game, Training and Play Settings

In [2]:
# Game settings
game_settings = DotDict({
    "board_size": (3,3),
    "N": 3
})

# Self-play training settings
game_training_settings = DotDict({
    "comp_interval":1000,
    "episods": 300,
    "self_play_iterations": 50,
    "explore_steps": 500,
    "temp_threshold": [30, 0.01],
    "dir_eps": 0.25,
    "dir_alpha": 1.0 
})

# temp_threshold: [x,y] means "up to x episods, applies y temperature"

# neural network settings
nn_training_settings = DotDict({
    "load_policy": False,
    "ai_ckp": "",
    "lr": .01, 
    "weight_decay": 1.e-4,
    "training_steps":30,
    "buffer_size":1500,
    "batch_size": 20
})

# play settings
play_settings = DotDict({
    "explore_steps": 50,
    "temperature": 0.01                         
})

buffer_size = nn_training_settings.buffer_size
batch_size = nn_training_settings.batch_size

In [3]:
from competition import match_net_mcts

In [4]:
policy = policy_mod.Policy()

In [5]:
match_net_mcts(policy, game_settings)

9 0
7 1
9 0
9 0
5 1
7 1
9 0
9 0
7 1
7 1


15

# Testing MCTS

## Parameters

In [6]:
def roll_out (game, nb_roll_out = 5):
    scores = []
    for _ in range(nb_roll_out):
        sim_game = copy(game)
        while sim_game.score == None:          
            random_move = random.choice(sim_game.available_moves())
            #print (random_move)
            sim_game.move(random_move)
            #print (game.state)
        scores.append(sim_game.score)
    return np.average(np.array(scores))

In [7]:
new_game = games_mod.ConnectN(game_settings)
mytree = plain_mcts.Node(new_game)
#new_game.state = np.array([[0,1,1],[0,-1, -1],[0,0,0]])
#new_game.n_moves = 4

In [8]:
while mytree.outcome is None:
    
    for _ in range (1000):
        mytree.explore()

    mytree = mytree.next(temperature=0.01)
    print (mytree.game.state)
    #memory.append()
    # print ("Sent to memory {}".format([state * current_player, p]))
    mytree.detach_mother()
    outcome = mytree.outcome
print ("Game outcome {}".format(outcome))

[[0. 0. 0.]
 [0. 1. 0.]
 [0. 0. 0.]]
[[ 0.  0. -1.]
 [ 0.  1.  0.]
 [ 0.  0.  0.]]
[[ 0.  1. -1.]
 [ 0.  1.  0.]
 [ 0.  0.  0.]]
[[ 0.  1. -1.]
 [ 0.  1.  0.]
 [ 0. -1.  0.]]
[[ 0.  1. -1.]
 [ 0.  1.  1.]
 [ 0. -1.  0.]]
[[ 0.  1. -1.]
 [-1.  1.  1.]
 [ 0. -1.  0.]]
[[ 1.  1. -1.]
 [-1.  1.  1.]
 [ 0. -1.  0.]]
[[ 1.  1. -1.]
 [-1.  1.  1.]
 [ 0. -1. -1.]]
[[ 1.  1. -1.]
 [-1.  1.  1.]
 [ 1. -1. -1.]]
Game outcome 0


In [9]:
print (mytree.child[(0,0)].N)
print (mytree.child[(0,1)].N)
print (mytree.child[(0,2)].N)
print (mytree.child[(1,0)].N)
print (mytree.child[(1,1)].N)
print (mytree.child[(1,2)].N)
print (mytree.child[(2,0)].N)
print (mytree.child[(2,1)].N)
print (mytree.child[(2,2)].N)

KeyError: (0, 0)

# Agent1 = receives last game move

In [None]:
new_game = games_mod.ConnectN(game_settings)
agent1 = plain_mcts.Node(new_game)
agent2 = plain_mcts.Node(new_game)

In [None]:
def play_agent(agent, iterations):
    for _ in range (iterations):
        agent.explore()
    agent = agent.next(temperature=0.01)
    return agent.game.last_move

In [None]:
scores = 0
for _ in range(100):
    new_game = games_mod.ConnectN(game_settings)
    agent1 = plain_mcts.Node(new_game)
    agent2 = plain_mcts.Node(new_game)
    turn = 0
    seq_states = []

    while new_game.score is None and turn < 2:
        #print (new_game.state)
        if turn % 2 == 0 and turn > 1:
            agent1 = plain_mcts.Node(new_game)
            new_game.move (play_agent(agent1, 500))
        elif turn % 2 == 0:
            #new_game.move (random.choice(new_game.available_moves()))
            new_game.move ((2,1))
        else:
            agent2 = plain_mcts.Node(new_game)
            new_game.move (play_agent(agent2, 2000))
        seq_states.append(copy(new_game.state))
        turn +=1
    if new_game.score == 1:
        stored_game = new_game
        stored_seq_states = seq_states
        break
    #scores += new_game.score
    #print (turn, new_game.score)
    del new_game