In [None]:
from Nnet import NNet 
from MCTS import mcts
from Node import Node
from Board import Board
from config import Config 

In [None]:
from Game import game

In [None]:
from AlphaBetaPlayer import AlphaBetaPlayer
from BasicPlayer import BasicPlayer
from RandomPlayer import RandomPlayer
from AlphaZeroPlayer import AlphaZeroPlayer
from handPlay import handPlayer

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
from copy import deepcopy
import random
import tensorflow.keras.backend as K
from keras.models import load_model

In [None]:
import os.path

In [None]:
import keras
keras.__version__

'2.4.3'

In [None]:
Config.epochs = 10
Config.self_games = 20
Config.batch_size = 32
Config.mcts_iterations = 20
Config.verbose = 1
Config.callbacks = None
Config.eval_games = 15
Config.win_threshold = 0.55
Config.save_path = '/content/drive/MyDrive/AlphaZeroModels/'

In [None]:
class Train():
    
    def __init__(self, board, model):
        self.board = board
        self.model = model
        self.eval_model = NNet(board.width, board.height)
        self.training_states = np.empty((0, board.height, board.width, 2)) 
        self.training_ps = np.empty((0, board.width))
        self.training_vs = np.empty((0))

    def start_training(self):

        if (os.path.isfile('best_model.h5')): 
            print('loading best model')
            self.model.model = load_model('best_model.h5')

        for iteration in range(Config.mcts_iterations):
            
            print('Iteration :', iteration)

            self.training_states = np.empty((0, self.board.height, self.board.width, 2)) 
            self.training_ps = np.empty((0, self.board.width))
            self.training_vs =  np.empty((0))
            
            # Preparing training data
            print('Launching self simulation')
            for i in range(Config.self_games):
                #print('Simulation self play number :', i)
                board = self.board.make_copy()
                self.self_play(board)

            print('Self simulations done')

            self.model.model.save(Config.save_path+'model.h5')

            self.eval_model.model = load_model(Config.save_path+'model.h5')

            self.model.model.fit(self.training_states,
                                 [self.training_ps, self.training_vs],
                                 batch_size=Config.batch_size,
                                 epochs=Config.epochs,
                                 verbose=Config.verbose,
                                 callbacks=Config.callbacks)
            
            current_mcts = mcts(self.model, self.board.width)
            eval_mcts = mcts(self.eval_model, self.board.width)

            # Play multiple games between the old and the new network
            wins, losses = self.evaluate(current_mcts, eval_mcts)
            total_games = wins+losses

            if total_games == 0:
              win_rate = 0
            else:
              win_rate = wins/total_games

            print('Win rate of the new model vs the old model:', win_rate)

            if win_rate > Config.win_threshold:
              print('New model accepted', end='')
              self.model.model.save(Config.save_path+'best_model.h5')
              print(' (Saved)')
            else:
              print('New model rejected ( Deleted )')
              if (os.path.isfile(Config.save_path+'best_model.h5')): 
                self.model.model = load_model(Config.save_path+'best_model.h5')
              else:
                self.model.model = load_model(Config.save_path+'model.h5')

    def self_play(self, board):
        
        mct = mcts(self.model, self.board.width)
        root = Node()
        
        game_over = False
        count = 0
        
        while not game_over:
            
            if count < Config.early_training:
                best_child = mct.get_move(board, root, temp=Config.initial_temperature)
            else:
                best_child = mct.get_move(board, root, temp=Config.final_temperature)
            
            state_iter = board.get_curr_state()
            ps_iter = best_child.parent.get_children_ps()
            
            self.training_states = np.append(self.training_states, state_iter, axis=0)
            self.training_ps = np.append(self.training_ps, [ps_iter], axis=0)

            board.play_move(best_child.move)

            best_child.parent = None
            root = best_child
            
            count += 1
            game_over, result = board.game_over()

        v_data = np.ones(count)*result            
        
        self.training_vs = np.append(self.training_vs, v_data, axis=0)

    def evaluate(self, mcts1, mcts2):
      
      wins, losses = 0, 0
      print('Start evaluation')
      for i in range(Config.eval_games):
          #print('Evaluation number:', i)

          board = Board()
          root = Node()

          while not board.game_over()[0]:
            
            if board.current_player == 1:
              child = mcts1.get_move(board, root, temp=Config.final_temperature)
            else:
              child = mcts2.get_move(board, root, temp=Config.final_temperature)

            move = child.move
            board.play_move(move)
            game_over, result = board.game_over()

            child.parent = None
            root = child
            root.parent = None
          board.pretty_print()
          
          if result == 1:
            print('New model wins')
            wins += 1
          elif result == -1:
            print('New model looses')
            losses += 1
          else:
            print('Draw')
            wins += 1e-4

      print('Evaluation done')

      return wins, losses


In [None]:
B = Board()
N = NNet(B.height, B.width)
T = Train(B, N)

Initializing model:
conv: (None, 6, 7, 256)
res (None, 6, 7, 256)
pi (None, 7)
v (None, 1)
Initializing model:
conv: (None, 7, 6, 256)
res (None, 7, 6, 256)
pi (None, 7)
v (None, 1)


In [None]:
 T.start_training()

Iteration : 0
Launching self simulation


KeyboardInterrupt: ignored

# Testing 

Available players:

In [None]:
random = RandomPlayer("random player")
basic = BasicPlayer("Basic player")
alha_beta = AlphaBetaPlayer("Alpha Beta Player")
hand_player = handPlayer()
# You can change path to the model
alpha_zero = AlphaZeroPlayer("Alpha Zero Player", '/content/drive/MyDrive/AlphaZeroModels/backup/best_model.h5' )



Initializing model:
conv: (None, 6, 7, 256)
res (None, 6, 7, 256)
pi (None, 7)
v (None, 1)


In [None]:
game(alpha_zero, random)

Alpha Zero Player played: 0
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . . . . . . 

random player played: 6
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . . . . . 0 

Alpha Zero Player played: 1
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X X . . . . 0 

random player played: 4
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X X . . 0 . 0 

Alpha Zero Player played: 3
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X X . X 0 . 0 

random player played: 3
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . 0 . . . 
X X . X 0 . 0 

Alpha Zero Player played: 2
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . 0 . . . 
X X X X 0 . 0 

Alpha Zero Player wins


1

In [None]:
game(alpha_zero, basic)

Alpha Zero Player played: 0
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . . . . . . 

Basic player played: 3
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . . 0 . . . 

Alpha Zero Player played: 4
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . . 0 X . . 

Basic player played: 2
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . 0 0 X . . 

Alpha Zero Player played: 2
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . X . . . . 
X . 0 0 X . . 

Basic player played: 1
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . X . . . . 
X 0 0 0 X . . 

Alpha Zero Player played: 4
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . X . X . . 
X 0 0 0 X . . 

Basic player played: 5
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . X . X . . 
X 0 0 0 X 0 . 

Alpha Zero Player played: 3
. . . . . . . 
. . . . . . . 
. . . . . 

1

In [None]:
game(alpha_zero, alpha_beta)

Alpha Zero Player played: 0
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . . . . . . 

alphaBeta1 played: 3
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . . 0 . . . 

Alpha Zero Player played: 0
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . . . . . . 
X . . 0 . . . 

alphaBeta1 played: 2
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . . . . . . 
X . 0 0 . . . 

Alpha Zero Player played: 2
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . X . . . . 
X . 0 0 . . . 

alphaBeta1 played: 1
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . X . . . . 
X 0 0 0 . . . 

Alpha Zero Player played: 4
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . X . . . . 
X 0 0 0 X . . 

alphaBeta1 played: 3
. . . . . . . 
. . . . . . . 
. . . . . . . 
. . . . . . . 
X . X 0 . . . 
X 0 0 0 X . . 

Alpha Zero Player played: 3
. . . . . . . 
. . . . . . . 
. . . . . . . 
. .

1

In [None]:
game(alpha_zero, hand_player)

## Testing 50 games between AlphaZero and random player

In [None]:
zwins, looses, draws = 0, 0, 0
num_games = 50
for i in range(num_games):
    result = game(alpha_zero, random_player, 0)
    print('game:', i, 'result:', result)
    if result == 1:
        wins += 1
    if result == -1:
        looses += 1
    else:
        draws += 1 

game: 0 result: 1
game: 1 result: 1
game: 2 result: 1
game: 3 result: -1
game: 4 result: 1
game: 5 result: 1
game: 6 result: 1
game: 7 result: 1
game: 8 result: 1
game: 9 result: 1
game: 10 result: 1
game: 11 result: 1
game: 12 result: 1
game: 13 result: 1
game: 14 result: 1
game: 15 result: 1
game: 16 result: 1
game: 17 result: 1
game: 18 result: 1
game: 19 result: 1
game: 20 result: -1
game: 21 result: 1
game: 22 result: -1
game: 23 result: 1
game: 24 result: 1
game: 25 result: 1
game: 26 result: 1
game: 27 result: 1
game: 28 result: -1
game: 29 result: 1
game: 30 result: 1
game: 31 result: 1
game: 32 result: 1
game: 33 result: 1
game: 34 result: 1
game: 35 result: 1
game: 36 result: 1
game: 37 result: 1
game: 38 result: -1
game: 39 result: -1
game: 40 result: 1
game: 41 result: 1
game: 42 result: 1
game: 43 result: 1
game: 44 result: -1
game: 45 result: 1
game: 46 result: 1
game: 47 result: 1
game: 48 result: 1
game: 49 result: 1


In [None]:
print('Wins against random:', wins)

Wins against random: 43


In [None]:
print('Win ratio against random:', wins/num_games, '%')

Win ratio against random: 0.86 %
