# AlphaZero Algorithm - Testing Self-Play

# Import libraries / modules

In [None]:
# Python libraries
import time

# 3rd party libraries
import matplotlib.pyplot as plt
import numpy as np

# Game-related libraries
import games_mod # Games
import policy_mod # neural network
from play_mod import Play
import training_mod
from game_utils import DotDict, policy_player_mcts, random_player, match_ai

# Game, Training and Play Settings

In [None]:
# Game settings
game_settings = DotDict({
    "board_size": (3,3),
    "N": 3
})

# Self-play training settings
game_training_settings = DotDict({
    "comp_interval":100,
    "episods": 1000,
    "explore_steps": 50,
    "temp_threshold": [300, 0.01] #[x,y] means "up to x episods, applies y temperature"
})
# neural network settings
nn_training_settings = DotDict({
    "load_policy": False,
    "ai_ckp": "",
    "lr": .01, 
    "weight_decay": 1.e-4,
})

# play settings
play_settings = DotDict({
    "explore_steps": 50,
    "temperature": 0.01                         
})

## Testing of the self_play function

In [None]:
from self_play import execute_self_play
policy = policy_mod.Policy()
temp = 1
wins = 0
draws = 0
losses = 0
explore_steps = 10
for _ in range(100):
    new_exp = execute_self_play (game_settings, explore_steps, policy, temp)
    res = new_exp[0][1]
    if res == 1:
        wins += 1
    elif res == 0:
        draws += 1
    elif res == -1:
        losses += 1
    
print ("Games won / lost by player1 = {} / {}; number of draws = {}".format
       (wins, losses, draws))


In [None]:
from self_play import execute_self_play

wins_count = []
draws_count= []
mcts_explore=[]


for explore_steps in range(1, 51, 5):

    policy = policy_mod.Policy()
    temp = 1
    wins = 0
    draws = 0
    losses = 0
    for _ in range(100):
        new_exp = execute_self_play (game_settings, explore_steps, policy, temp)
        res = new_exp[0][1]
        if res == 1:
            wins += 1
        elif res == 0:
            draws += 1
        elif res == -1:
            losses += 1
    
    #print ("Games won / lost by player1 = {} / {}; number of draws = {}".format
    #   (wins, losses, draws))
    
    wins_count.append(wins)
    draws_count.append(draws)
    mcts_explore.append(explore_steps)

In [None]:
plt.xlabel("MCTS explore_steps")
plt.ylabel("player 1 wins / draws")
plt.plot(mcts_explore,wins_count, label = "wins")
plt.plot(mcts_explore, draws_count, label = "draws")
plt.legend()
plt.show()