This notebook contains everything needed to run our algorithm.
You have to launch all the import/classes at the top, then there is a cell to test model against each others and one cell
to train the model.

In [1]:
import gym
import random
import time
import os

import numpy as np
import copy
import tensorflow as tf
print(tf.__version__)
from keras.layers import Conv2D
from keras.layers import Dense
from keras.layers import BatchNormalization
from keras.layers import Activation
from keras import Input
from keras.layers import Flatten
from tensorflow.keras.optimizers import Adam
import keras

2.8.0


Implémentation de puissance 4. Code provenant de https://github.com/IASIAI/gym-connect-four/blob/master/gym_connect_four/envs/connect_four_env.py

In [None]:
#!pip install pygame
#!pip install gym

In [2]:
#connectfour implementation using gym. taken from https://github.com/IASIAI/gym-connect-four/blob/master/gym_connect_four/envs/connect_four_env.py
import random
from abc import ABC, abstractmethod
from collections import deque
from enum import Enum, unique
from operator import itemgetter
from typing import Tuple, NamedTuple, Hashable, Optional

import gym
import numpy as np
import pygame
from gym import error
from gym import spaces
#from keras.engine.saving import load_model

#unique
from PIL import Image, ImageDraw

class Color(object):
    WHITE = (255, 255, 255)
    RED = (255, 0, 0)
    BLUE = (0, 0, 255)
    YELLOW = (255, 255, 0)

def render_board(board,
                 image_width=512,
                 image_height=512,
                 board_percent_x=0.8,
                 board_percent_y=0.8,
                 items_padding_x=0.05,
                 items_padding_y=0.05,
                 slot_padding_x=0.1,
                 slot_padding_y=0.1,
                 background_color=Color.WHITE,
                 board_color=Color.BLUE,
                 empty_slot_color=Color.WHITE,
                 player1_slot_color=Color.RED,
                 player2_slot_color=Color.YELLOW):
    image = Image.new('RGB', (image_height, image_width), background_color)
    draw = ImageDraw.Draw(image)

    board_width = int(image_width * board_percent_x)
    board_height = int(image_height * board_percent_y)

    padding_x = image_width - board_width
    padding_y = image_height - board_height

    padding_top = padding_y // 2
    padding_bottom = padding_y - padding_top

    padding_left = padding_x // 2
    padding_right = padding_x - padding_left

    draw.rectangle([
        (padding_left, padding_top),
        (image_width - padding_right, image_height - padding_bottom)
    ], fill=board_color)

    padding_left += int(items_padding_x * image_width)
    padding_right += int(items_padding_x * image_width)

    padding_top += int(items_padding_y * image_height)
    padding_bottom += int(items_padding_y * image_height)

    cage_width = int((image_width - padding_left - padding_right) / board.shape[1])
    cage_height = int((image_width - padding_top - padding_bottom) / board.shape[0])

    radius_x = int((cage_width - 2 * int(cage_width * slot_padding_x)) // 2)
    radius_y = int((cage_height - 2 * int(cage_height * slot_padding_y)) // 2)

    slots = []
    for row in range(board.shape[0]):
        for column in range(board.shape[1]):
            player = board[row, column]

            actual_row = board.shape[0] - row - 1
            origin_x = padding_left + int(column * cage_width + cage_width // 2)
            origin_y = padding_top + int(actual_row * cage_height + cage_height // 2)

            slots.append((origin_x, origin_y, player))

    for origin_x, origin_y, player in slots:
        color = empty_slot_color
        if player == 1:
            color = player1_slot_color
        elif player == -1:
            color = player2_slot_color

        draw.ellipse([
            (origin_x - radius_x, origin_y - radius_y),
            (origin_x + radius_x, origin_y + radius_y)
        ], fill=color)

    return np.array(image)
class ResultType(Enum):
    NONE = None
    DRAW = 0
    WIN1 = 1
    WIN2 = -1

    def __eq__(self, other):
        """
        Need to implement this due to an unfixed bug in Python since 2017: https://bugs.python.org/issue30545
        """
        return self.value == other.value


class ConnectFourEnv(gym.Env):
    """
    Description:
        ConnectFour game environment
    Observation:
        Type: Discreet(6,7)
    Actions:
        Type: Discreet(7)
        Num     Action
        x       Column in which to insert next token (0-6)
    Reward:
        Reward is 0 for every step.
        If there are no other further steps possible, Reward is 0.5 and termination will occur
        If it's a win condition, Reward will be 1 and termination will occur
        If it is an invalid move, Reward will be -1 and termination will occur
    Starting State:
        All observations are assigned a value of 0
    Episode Termination:
        No more spaces left for pieces
        4 pieces are present in a line: horizontal, vertical or diagonally
        An attempt is made to place a piece in an invalid location
    """

    metadata = {'render.modes': ['human']}

    LOSS_REWARD = -1
    DEF_REWARD = 0
    DRAW_REWARD = 0
    WIN_REWARD = 1

    class StepResult(NamedTuple):

        res_type: ResultType

        def get_reward(self, player: int):
            if self.res_type is ResultType.NONE:
                return ConnectFourEnv.DEF_REWARD
            elif self.res_type is ResultType.DRAW:
                return ConnectFourEnv.DRAW_REWARD
            else:
                return {ResultType.WIN1.value: ConnectFourEnv.WIN_REWARD, ResultType.WIN2.value: ConnectFourEnv.LOSS_REWARD}[
                    self.res_type.value * player]

        def is_done(self):
            return self.res_type != ResultType.NONE

    def __init__(self, board_shape=(6, 7), window_width=512, window_height=512):
        super(ConnectFourEnv, self).__init__()

        self.board_shape = board_shape

        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=board_shape,
                                            dtype=int)
        self.action_space = spaces.Discrete(board_shape[1])

        self.__current_player = 1
        self.__board = np.zeros(self.board_shape, dtype=int)

        self.__player_color = 1
        self.__screen = None
        self.__window_width = window_width
        self.__window_height = window_height
        self.__rendered_board = self._update_board_render()
    """
    def run(self, player1: Player, player2: Player, board: Optional[np.ndarray] = None, render=False) -> ResultType:
        player1.reset()
        player2.reset()
        self.reset(board)

        cp = lambda: self.__current_player

        def change_player():
            self.__current_player *= -1
            return player1 if cp() == 1 else player2

        state_hist = deque([self.__board.copy()], maxlen=4)

        act = player1.get_next_action(self.__board * 1)
        act_hist = deque([act], maxlen=2)
        step_result = self._step(act)
        state_hist.append(self.__board.copy())
        player = change_player()
        done = False
        while not done:
            if render:
                self.render()
            act_hist.append(player.get_next_action(self.__board * cp()))
            step_result = self._step(act_hist[-1])
            state_hist.append(self.__board.copy())

            player = change_player()

            reward = step_result.get_reward(cp())
            done = step_result.is_done()
            player.learn(state=state_hist[-3] * cp(), action=act_hist[-2], state_next=state_hist[-1] * cp(), reward=reward, done=done)

        player = change_player()
        reward = step_result.get_reward(cp())
        player.learn(state_hist[-2] * cp(), act_hist[-1], state_hist[-1] * cp(), reward, done)
        if render:
            self.render()

        return step_result.res_type
				"""
    def step(self, action: int) -> Tuple[np.ndarray, float, bool, dict]:
        step_result = self._step(action)
        reward = step_result.get_reward(self.__current_player)
        done = step_result.is_done()
        return self.__board.copy(), reward, done, {}

    def _step(self, action: int) -> StepResult:
        result = ResultType.NONE

        if not self.is_valid_action(action):
            #raise Exception(
            #    'Unable to determine a valid move! Maybe invoke at the wrong time?'
            #)
            print('invalid move, return reward')
        

        # Check and perform action
        for index in list(reversed(range(self.board_shape[0]))):
            if self.__board[index][action] == 0:
                self.__board[index][action] = self.__current_player
                break

        # Check if board is completely filled
        if np.count_nonzero(self.__board[0]) == self.board_shape[1]:
            result = ResultType.DRAW
        else:
            # Check win condition
            if self.is_win_state():
                result = ResultType.WIN1 if self.__current_player == 1 else ResultType.WIN2
        self.__current_player *= -1
        return self.StepResult(result)

    @property
    def board(self):
        return self.__board.copy()

    def reset(self, board: Optional[np.ndarray] = None) -> np.ndarray:
        self.__current_player = 1
        if board is None:
            self.__board = np.zeros(self.board_shape, dtype=int)
        else:
            self.__board = board
        self.__rendered_board = self._update_board_render()
        return self.board

    def render(self, mode: str = 'console', close: bool = False) -> None:
        if mode == 'console':
            replacements = {
                self.__player_color: 'A',
                0: ' ',
                -1 * self.__player_color: 'B'
            }

            def render_line(line):
                return "|" + "|".join(
                    ["{:>2} ".format(replacements[x]) for x in line]) + "|"

            hline = '|---+---+---+---+---+---+---|'
            print(hline)
            for line in np.apply_along_axis(render_line,
                                            axis=1,
                                            arr=self.__board):
                print(line)
            print(hline)

        elif mode == 'human':
            if self.__screen is None:
                pygame.init()
                self.__screen = pygame.display.set_mode(
                    (round(self.__window_width), round(self.__window_height)))

            if close:
                pygame.quit()

            self.__rendered_board = self._update_board_render()
            frame = self.__rendered_board
            surface = pygame.surfarray.make_surface(frame)
            surface = pygame.transform.rotate(surface, 90)
            self.__screen.blit(surface, (0, 0))

            pygame.display.update()
        else:
            raise error.UnsupportedMode()

    def close(self) -> None:
        pygame.quit()

    def is_valid_action(self, action: int) -> bool:
        return self.__board[0][action] == 0

    def _update_board_render(self) -> np.ndarray:
        return render_board(self.__board,
                            image_width=self.__window_width,
                            image_height=self.__window_height)

    def is_win_state(self) -> bool:
        # Test rows
        for i in range(self.board_shape[0]):
            for j in range(self.board_shape[1] - 3):
                value = sum(self.__board[i][j:j + 4])
                if abs(value) == 4:
                    return True

        # Test columns on transpose array
        reversed_board = [list(i) for i in zip(*self.__board)]
        for i in range(self.board_shape[1]):
            for j in range(self.board_shape[0] - 3):
                value = sum(reversed_board[i][j:j + 4])
                if abs(value) == 4:
                    return True

        # Test diagonal
        for i in range(self.board_shape[0] - 3):
            for j in range(self.board_shape[1] - 3):
                value = 0
                for k in range(4):
                    value += self.__board[i + k][j + k]
                    if abs(value) == 4:
                        return True

        reversed_board = np.fliplr(self.__board)
        # Test reverse diagonal
        for i in range(self.board_shape[0] - 3):
            for j in range(self.board_shape[1] - 3):
                value = 0
                for k in range(4):
                    value += reversed_board[i + k][j + k]
                    if abs(value) == 4:
                        return True

        return False

    def available_moves(self) -> frozenset:
        return frozenset(
            (i for i in range(self.board_shape[1]) if self.is_valid_action(i)))
    
    def game_over(self):
        if np.count_nonzero(self.__board[0]) == self.board_shape[1]:
            return True
        return self.is_win_state()

pygame 2.1.2 (SDL 2.0.18, Python 3.9.7)
Hello from the pygame community. https://www.pygame.org/contribute.html


La classe node est la classe qui est utilisée pour représenter les différents nœuds de l’arbre du MCTS.

In [3]:
class Node(object):

    def __init__(self, parent, probability, state=None, action=None):
        #we use state = none for all nodes that aren't root to avoid creating too much instance of state
        self.state = state
        self.numbervisits = 0
        #Q = average of estimated outcome for all the children of the node
        self.Q = 0
        #U = node value to assure exploration (see : https://www.nature.com/articles/nature24270.epdf?author_access_token=VJXbVjaSHxFoctQQ4p2k4tRgN0jAjWel9jnR3ZoTv0PVW4gB86EEpGqTRDtpIz-2rmo8-KG06gqVobU5NSCFeHILHcVFUeMsbvwS-lxjqQGg98faovwjxeTUgZAUMnRQ page8)
        self.U = 0
        self.moveprobability = probability
        self.parent = parent
        self.children = {}
        self.action = action


    def updateU(self) :
        #cpuct
        csteExpl = 2
        self.U = csteExpl * self.moveprobability * np.sqrt(self.parent.numbervisits) / (1 + self.numbervisits)

    
    def updateNode(self, value):
        self.numbervisits += 1
        self.Q += (value - self.Q) / self.numbervisits
        
    def backFill(self,value):
        #we will use this function to update all the parents node
        if self.parent != None :
            #we take the opposite of the value for every parents as the parent represent the opposing player
            self.parent.backFill(-value)
        self.updateNode(value)
        
    
    def addChildren(self,NN_move_probability) :
        for i in range(len(NN_move_probability)):
            if NN_move_probability[i] > 0 : 
                newNode = Node(self,NN_move_probability[i], action=i)
                self.children[i] = newNode
            
            
    def moveToLeaf(self) :
        
        currentNode = self
        current_state = copy.deepcopy(self.state)
        i=0
        #while we are not on a leaf :
        while currentNode.children != {}:
            #update this to add noise see : https://drive.google.com/drive/folders/1qFpWV_xuGIPKwK0cFV7yzJg2glx825in
            # Acquisition of Chess Knowledge in AlphaZero page 8
            scoreMax = float('-inf')
            nodeMax = self
            i += 1
            #we iterate over all the children and chose the one with max score Q + U
            for node in currentNode.children.values():
                node.updateU()
                score = node.Q + node.U
                if score > scoreMax:
                    scoreMax = score
                    nodeMax = node
                #print to see how the tree works
                #print(f'child {i} score Q : {node.Q} and U : {node.U} action :  {node.action}')
            currentNode = nodeMax
            #print(f'current node action (node max) : {currentNode.action}')
            #if we get to the end of the game in a simulation we use the true reward for the MCTS
            if currentNode.action != None:
                current_state._step(currentNode.action)
        currentNode.state = current_state
        return currentNode
                
    def isLeaf(self):
        if len(self.edges) > 0:
            return False
        else:
            return True



La classe MCTS est la classe utilisée pour effectuer la recherche arborescente Monte Carlo. 

In [74]:
class MCTS():
    """this class will be used to run the MCTS algorithm"""
    def __init__(self, NeuralNetwork, rootNode):
        self.NN = NeuralNetwork
        self.root = rootNode
        
        
    def oneIteration(self):
        #we use the moveToLeaf method to go to the node leaf node based on the scores of every nodes
        leafNode = self.root.moveToLeaf()
        #if the game is over we use  the true reward
        if leafNode.state.is_win_state():
            value = 1
            #print(f'value early winner {value}')
            leafNode.backFill(value)
            return value
        #we get p and v from our NN
        #start2 = time.time()
        probabilities, value = self.NN.predict([ leafNode.state._ConnectFourEnv__board.reshape(1,6,7) * leafNode.state._ConnectFourEnv__current_player ])
        #our NN tell us if the board is winning for player2 = -1 (value -1) or for player1 = 1
        #end2 = time.time()
        #print(f'time NEURAL NETWORK {end2 - start2}')
        #add new children
        legalMoves = list(leafNode.state.available_moves())
        legalMovesMask = [0 if i not in legalMoves else 1 for i in range(7)]
        probabilities = probabilities[0]  * legalMovesMask
        probabilities /= np.sum(probabilities)
        #we create new nodes from our leaf
        leafNode.addChildren(probabilities)
        leafNode.backFill(value)
        return value

    
    def selectAction(self):
        maxNode = None
        maxN = 0
        for children in self.root.children:
                if maxN < children.numbervisits:
                    maxN = children.numbervisits
                    maxNode = children
        return children, children.action
    
    def nIterations(self, nb_of_iteration):
        
        for i in range(nb_of_iteration):
            self.oneIteration()
        probabilities = {}
        sum_numberofvisit = 0
        for children in self.root.children.values():
            sum_numberofvisit += children.numbervisits
        for children in self.root.children.values():
            probabilities[children.action] = children.numbervisits / sum_numberofvisit
        #print(f'probabilities of move : {probabilities}')
        return probabilities

In [387]:
def softmax(p):
    s = np.exp(p) / np.sum(np.exp(p))
    return s


def modelAgainstModel(model1, model2, numberOfGames):
    """function used to make 2 models play against each others"""
    winRatePlayer1 = 0
    for i in range(numberOfGames):
        env =  ConnectFourEnv()
        GameOver = False
            #we create the game
        env.reset()
        while not GameOver:

            currentStateNode = Node(None, 0, state=env, action=None)
            mcts1 = MCTS(model1 ,currentStateNode)
            mcts2 = MCTS(model2 ,currentStateNode)
            if env._ConnectFourEnv__current_player == 1 :
                probabilitiesnodemcts = mcts1.nIterations(75)
            else:
                probabilitiesnodemcts = mcts2.nIterations(75)            
            probabilities = [0 for i in range(7)]
            for move in probabilitiesnodemcts.keys():
                probabilities[move] = probabilitiesnodemcts[move]
            action = np.random.choice(list(probabilitiesnodemcts.keys()), p=softmax(list(probabilitiesnodemcts.values())))
            env._step(action)
            GameOver = env.game_over()
            print(f'probabilities of doing each moves : {probabilities}')
            print(np.array(env._ConnectFourEnv__board))
        if 1 == -1 * env._ConnectFourEnv__current_player:
            winRatePlayer1 += 1
        print(f'gagnant :  {-1 * env._ConnectFourEnv__current_player}')
    winRatePlayer1 = winRatePlayer1 / numberOfGames
    return winRatePlayer1 


class Agent():
    
    def __init__(self, input_shape, output_shape, model=None):
        if model==None:
            self.neuralNetwork = createModelSmall(input_shape, output_shape, l2const =0.2)
        else:
            self.neuralNetwork = model
        
    def generateData(self, gamesPerBundle):
        
        statesBatch = []
        policiesBatch = []
        valuesBatch = []
        model = self.neuralNetwork
        env =  ConnectFourEnv() 
        for i in range(gamesPerBundle):
            print(f'Game {i} in progress')
            GameOver = False
            #we create the game
            env.reset()
            valuesBatchTemp = []
            a = 0

            while not GameOver:

                currentStateNode = Node(None, 0, state=env, action=None)
                mcts = MCTS(model ,currentStateNode)
                probabilitiesnodemcts = mcts.nIterations(50)
                #reward = -1 * env._ConnectFourEnv__current_player
                #we don't take every position because "positions on subsequent moves are strongly correlated, 
                #and including all of them may lead to increased overfitting."
                if np.random.rand(1)[0] < 1:
                    statesBatch.append(env._ConnectFourEnv__board.reshape(6,7,1) * env._ConnectFourEnv__current_player)
                    probabilities = [0 for i in range(7)]
                    for move in probabilitiesnodemcts.keys():
                         probabilities[move] = probabilitiesnodemcts[move]
                    policiesBatch.append(probabilities)
                    #we use this to remember if it was black or a white move
                    valuesBatchTemp.append(-1 * env._ConnectFourEnv__current_player)
                #we check if the game is over
                #print(f'turn : {a} board : {np.array(env._ConnectFourEnv__board) * env._ConnectFourEnv__current_player}')
                #print(f'probabilities : {probabilities}')
                action = np.random.choice(list(probabilitiesnodemcts.keys()), p=softmax(list(probabilitiesnodemcts.values())))
                #print(f'action:  {action}')
                env._step(action)
                a+=1
                GameOver = env.game_over()
            #once the game is over we check the reward
            valuesBatchTemp = np.array(valuesBatchTemp)
            valuesBatch.extend(valuesBatchTemp)
        return (statesBatch, policiesBatch, valuesBatch)
    
    def Train(self, statesBatch, policiesBatch, valuesBatch):
        self.neuralNetwork.fit(np.array(statesBatch), [np.array(policiesBatch), np.array(valuesBatch)],epochs=2,batch_size=32)
        
    def selfPlay(self, gamesPerBundle):
        return True
        
    def saveModel(self, path):
        cwd = os.getcwd() 
        self.neuralNetwork.save(cwd + '/modelSaved' + path)

    def loadModel(self, path):
        """path = 'path/to/location' """
        self.neuralNetwork =  keras.models.load_model(path)
        return keras.models.load_model(path)
        
    def train(self, gamesPerBundle, nbCheckpoint):
        for checkpoint in range(nbCheckpoint):
            statesBatch, policiesBatch, valuesBatch = self.generateData(gamesPerBundle)
            self.Train(statesBatch, policiesBatch, valuesBatch)
            np.save('statesBatchSaved'+ f'{checkpoint}', statesBatch)
            np.save('policiesBatchSaved' + f'{checkpoint}', policiesBatch)
            np.save('valuesBatchSaved'+ f'{checkpoint}', valuesBatch)
            self.saveModel(f'{checkpoint}')
        return True






In [388]:
def createModelSmall(input_shape ,output_shape, l2const):
    input = Input(input_shape)
    layer = Conv2D(75, (3,3), use_bias=False, activation='relu',padding='same',strides=1)(input)
    layer = BatchNormalization()(layer)
    layer = Activation('relu')(layer)
    #smaller version of the deep resnet used in alpha zero :
    for i in range(3):
        res_layer = layer
        layer = keras.layers.Conv2D(75, (4, 4), kernel_regularizer = keras.regularizers.l2(l2const),padding='same',strides=1)(layer)
        layer = keras.layers.Activation("relu")(layer)
        layer = keras.layers.Conv2D(75, (2, 2), kernel_regularizer = keras.regularizers.l2(l2const),padding='same',strides=1)(layer)
        layer = keras.layers.BatchNormalization()(layer)
        layer = keras.layers.Add()([layer, res_layer])
        layer = keras.layers.Activation("relu")(layer)
    
    value_layer = layer
    value_layer = Conv2D(1, (1,1))(value_layer)
    value_layer = BatchNormalization()(value_layer)
    value_layer = Flatten()(value_layer)
    value_layer = Activation('relu')(value_layer)
    value_layer = Dense(16, activation='relu')(value_layer)
    value_layer = Dense(1, activation='tanh', name = 'value')(value_layer)

    policy_layer = layer
    policy_layer = Conv2D(2, (1,1),padding='same',strides=1)(policy_layer)
    policy_layer = BatchNormalization()(policy_layer)
    policy_layer = Flatten()(policy_layer)
    policy_layer = Activation("relu")(policy_layer)
    policy_layer = Dense(7)(policy_layer)
    policy_layer = keras.layers.Activation("softmax", name = "policy")(policy_layer)

    model = keras.models.Model(inputs = [input], outputs = [policy_layer, value_layer])
    model.compile(
            optimizer = Adam(),
            loss = [keras.losses.categorical_crossentropy, keras.losses.mean_squared_error],
            loss_weights = [0.5, 0.5],
            metrics=["accuracy"])
    return model

def createModel(input_shape, output_shape, l2const):
    input = Input(input_shape)
    layer = Conv2D(64, (3,3), use_bias=False, input_shape=input_shape,padding='same',strides=1)(input)
    layer = BatchNormalization()(layer)
    layer = Activation('relu')(layer)
    #smaller version of the deep resnet used in alpha zero :
    for i in range(4):
        print(i)
        res_layer = layer
        layer = keras.layers.Conv2D(64, (4, 4), kernel_regularizer = keras.regularizers.l2(l2const),padding='same',strides=1)(layer)
        layer = keras.layers.Activation("relu")(layer)
        layer = keras.layers.Conv2D(64, (2, 2), kernel_regularizer = keras.regularizers.l2(l2const),padding='same',strides=1)(layer)
        layer = keras.layers.BatchNormalization()(layer)
        layer = keras.layers.Add()([layer, res_layer])
        layer = keras.layers.Activation("relu")(layer)
    
    value_layer = layer
    value_layer = Conv2D(1, (1,1))(value_layer)
    value_layer = BatchNormalization()(value_layer)
    value_layer = Flatten()(value_layer)
    value_layer = Activation('relu')(value_layer)
    value_layer = Dense(128, activation='relu')(value_layer)
    value_layer = Dense(1, activation='tanh', name = 'value')(value_layer)

    policy_layer = layer
    policy_layer = Conv2D(output_shape, (1,1),padding='same',strides=1)(policy_layer)
    policy_layer = BatchNormalization()(policy_layer)
    policy_layer = Activation("relu")(policy_layer)
    policy_layer = keras.layers.Flatten()(policy_layer)
    policy_layer = keras.layers.Activation("softmax", name = "policy")(policy_layer)

    model = keras.models.Model(inputs = [input], outputs = [policy_layer, value_layer])
    model.compile(
            optimizer = Adam(),
            loss = [keras.losses.categorical_crossentropy, keras.losses.mean_squared_error],
            loss_weights = [0.5, 0.5],
            metrics=["accuracy"])
    return model

In [369]:
agent = Agent((6,7,1), 7)
agent.neuralNetwork.summary()

Model: "model_32"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_33 (InputLayer)          [(None, 6, 7, 1)]    0           []                               
                                                                                                  
 conv2d_476 (Conv2D)            (None, 6, 7, 75)     675         ['input_33[0][0]']               
                                                                                                  
 batch_normalization_286 (Batch  (None, 6, 7, 75)    300         ['conv2d_476[0][0]']             
 Normalization)                                                                                   
                                                                                                  
 activation_476 (Activation)    (None, 6, 7, 75)     0           ['batch_normalization_286[

In [380]:
keras.utils.vis_utils.plot_model(agent.neuralNetwork)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [None]:
#use this to import agent, then load the chosen model. If no model are chosen the agent will have default parameters for his NN(close to classic MCTS with no NN)
agent2 = Agent((6,7,1), 7)
agent1 = Agent((6,7,1), 7)
cwd = os.getcwd()
#agent1.neuralNetwork = keras.models.load_model(cwd + '/bestModel')
agent2.neuralNetwork = keras.models.load_model(cwd + '/bestModel')
#modelAgainstModel(model1,model2,nbOfGames)
winRatePlayer1 = modelAgainstModel(agent1.neuralNetwork, agent2.neuralNetwork,10)


In [389]:
winRatePlayer1

0.1

Use to train :

In [None]:
#agent.train(60,1)
#parameter 1 = nb of game per update
#parameter 2 : number of time we will play parameter1 games and update