In [None]:
!pip install chess

Collecting chess
  Downloading chess-1.11.1.tar.gz (156 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/156.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.5/156.5 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: chess
  Building wheel for chess (setup.py) ... [?25l[?25hdone
  Created wheel for chess: filename=chess-1.11.1-py3-none-any.whl size=148497 sha256=7be1e0d78edcf78a0cf086e7436d999b1a86113580773cd9a79514c05629a29d
  Stored in directory: /root/.cache/pip/wheels/f0/3f/76/8783033e8524d407e1bebaf72fdd3f3eba27e0c030e92bbd87
Successfully built chess
Installing collected packages: chess
Successfully installed chess-1.11.1


In [None]:
from chess import Board, Move
import chess
import math
import numpy as np


class Edge:
    def __init__(self, in_node, out_node, action, prior):
        self.in_node = in_node
        self.out_node = out_node
        self.action = action

        self.player_turn = self.in_node.state.split(" ")[1]=="w"

        # We have four parameters for each edge struct:
        # N: number of times this action has been used from the in_node state
        # W: Action value in total
        # P: Prior probability of selecting this action

        self.N = 0
        self.W = 0
        self.P = prior

    def upper_confidence_bound(self, noise:float) ->float:
        c = math.sqrt(2)

        exploitative_term = self.W/(self.N+1)
        exploratory_term = c*(self.P*noise)*math.sqrt(math.log(self.in_node.N+1)/(self.N+1))
        if self.in_node.turn==chess.WHITE:
            return exploitative_term+exploratory_term
        return -1*exploitative_term+exploratory_term
class Node:
    def __init__(self, state:str):
        # Node represents some board state inside the Monte_Carlo tree

        self.state = state
        self.turn = Board(state).turn

        # Maintain an edge struct to keep track of parameters for each action
        self.edges = []
        #Visit count for node
        self.N = 0

        self.value = 0

    def step(self, action:Move):
        #Make a move in the current node board's state

        board = Board(self.state)
        board.push(action)
        # Extract the fen of the new board
        new_state = board.fen()
        # Delete created board for memoyr purposes
        del board
        #return the new state
        return new_state

    def is_game_over(self):
        board = Board(self.state)

        ans = False
        if board.is_game_over():
            ans = True
        del board
        return ans

    def add_child(self, child, action, prior):
        # Add a child node to current node

        edge = Edge(in_node = self, out_node = child, action = action, prior = prior)
        self.edges.append(edge)

        return edge

    def get_edge(self, action):
        # Gets the edge between current node and child node with action being action

        for edge in self.edges:
            if edge.action==action:
                return edge
        return None

In [None]:
class ChessEnv:
    def __init__(self, fen: str = chess.STARTING_FEN):
        self.fen = fen
        self.board = Board(self.fen)


    def step(self, move: Move) -> Board:
        self.board.push(move)
        return self.board

    def reset(self):
        self.board = Board(self.fen)


    @staticmethod
    def state_to_input(fen):
        # Converts current board to a input vector understood by the network
        board = Board(fen)

        is_white_turn = np.ones((8,8)) if board.turn else np.zeros((8,8))

        #print(f'is_white_turn is {is_white_turn}')
        castling = np.asarray([
            np.ones((8, 8)) if board.has_queenside_castling_rights(
                chess.WHITE) else np.zeros((8, 8)),
            np.ones((8, 8)) if board.has_kingside_castling_rights(
                chess.WHITE) else np.zeros((8, 8)),
            np.ones((8, 8)) if board.has_queenside_castling_rights(
                chess.BLACK) else np.zeros((8, 8)),
            np.ones((8, 8)) if board.has_kingside_castling_rights(
                chess.BLACK) else np.zeros((8, 8)),
        ])
        fifty_move = np.ones((8,8)) if board.can_claim_fifty_moves() else np.zeros((8,8))
        pieces = []

        for color in chess.COLORS:
            for piece_type in chess.PIECE_TYPES:
                array = np.zeros((8,8))
                for ind in list(board.pieces(piece_type,color)):
                    array[7-ind//8][ind%8]=True
                pieces.append(array)
                #print(f'for piece {piece_type} array is {array}')
        pieces = np.asarray(pieces)
        #print(f'final pieces are {pieces}')


        en_passant = np.zeros((8,8))
        if board.has_legal_en_passant():
            en_passant[7-int(board.ep_square/8)][board.ep_square%8]=True

        # there are black/white for 6 types of pieces for 2*6, then is white_turn is 1
        # then castling rights are 4 *8*8, then en_passant and fifty move rule are there too,
        # so there are 19 planes of input for this

        r = np.array([is_white_turn,*castling, fifty_move, *pieces, en_passant]).reshape((1,*(8,8,19)))

        del board
        return r.astype(bool)


    def __str__(self):
        return str(self.board)

In [None]:
import tensorflow as tf
from keras.api.models import Sequential
from keras.api.layers import Activation, Dense, Dropout, Flatten, Conv2D, BatchNormalization, LeakyReLU, Input
from keras.api.optimizers import Adam

from keras.api.layers import add as add_layer
from keras.api.models import Model
#from tensorflow.python.keras.engine.keras_tensor import Kerastensor
from tensorflow.python.keras.engine.keras_tensor import KerasTensor
from tensorflow.python.types.core import ConcreteFunction



In [None]:
amount_of_input_planes = (2*6+1)+(1+4+1)

#Chess board is 8x8
n = 8

INPUT_SHAPE = (n,n,amount_of_input_planes)




#Neural Network Outputs
# Model will output policy and value
# output_shape[0] is # of possible moves
#   * 8x8 board = 64 possible actions ->
# 56 possible queen-like moves, 8 possible knight moves, 9 possible underpromotions
# Total values is 8*8*(56+8+9) = 4672
# output_shape[1] is a scalar value (v)

queen_planes = 56
knight_planes = 8
underpromotion_planes = 9
amount_of_planes = queen_planes+knight_planes+underpromotion_planes

OUTPUT_SHAPE = (8*8*amount_of_planes, 1)


# Neural Network Params

LEARNING_RATE = 0.2
CONVOLUTION_FILTERS = 256
AMOUNT_OF_RESIDUAL_BLOCKS = 19

In [None]:
class ModelBuilder:
    """
    Builds neural network architecture
    """

    def __init__(self, input_shape, output_shape):
        """
        Neural network f that takes as input the raw board representation and outputs move probabilities p and a value v:
        f(s) = (p,v), where p is a vector of move probabilities and v is the expected value of the position
        """
        self.input_shape = input_shape
        self.output_shape = output_shape
        self.nr_hidden_layers = 19  # Alphazero used 19, can change maybe later
        self.convolution_filters = CONVOLUTION_FILTERS


    def build_convolutional_layer(self, input_layer):
        # Add a convolution layer with 256 convolution filters, (3,3) kernel size with stride 1
        layer = Conv2D(filters = self.convolution_filters, kernel_size = (3,3),strides = (1,1), padding = 'same', data_format = 'channels_first',use_bias = False)(input_layer)

        # Add batch normalization
        layer = BatchNormalization(axis = 1)(layer)
        # Add Relu activation to the layer
        layer = Activation('relu')(layer)

        return (layer)

    def build_residual_layer(self,input_layer):
        # First build a convolutional layer
        layer = self.build_convolutional_layer(input_layer)

        # Build another convolutional layer with skip connection to erase vanishing gradients
        layer = Conv2D(filters = self.convolution_filters, kernel_size = (3,3), strides = (1,1), padding = 'same', data_format = 'channels_first', use_bias = False)(layer)
        layer = BatchNormalization(axis = 1)(layer)

        #skip connection
        layer = add_layer([layer,input_layer])

        #Add relu activation
        layer = Activation('relu')(layer)
        return (layer)

    def build_value_head(self) -> Model:
        """
        Builds the value head of the neural network
        """

        model = Sequential(name = 'value_head')
        model.add(Conv2D(1,kernel_size=(1,1),strides = (1,1),
                         input_shape = (self.convolution_filters, self.input_shape[1],self.input_shape[2]),
                         padding = 'same',data_format='channels_first'))
        model.add(BatchNormalization(axis=1))
        model.add(Activation('relu'))
        model.add(Flatten())
        model.add(Dense(256))
        model.add(Activation('relu'))

        model.add(Dense(self.output_shape[1],
                        activation='tanh',name='value_head'))

        return model

    def build_policy_head(self):
        model = Sequential(name='policy_head')
        model.add(Conv2D(1,kernel_size=(1,1),strides = (1,1),
                         input_shape = (self.convolution_filters, self.input_shape[1],self.input_shape[2]),
                         padding = 'same',data_format='channels_first'))
        model.add(BatchNormalization(axis=1))
        model.add(Activation('relu'))
        model.add(Flatten())
        model.add(Activation('relu'))

        model.add(Dense(self.output_shape[0],
                        activation='sigmoid',name='policy_head'))
        return model

    def build_model(self):
        input = Input(shape = self.input_shape, name = 'input')

        x = self.build_convolutional_layer(input)

        # add residual blocks
        for _ in range(self.nr_hidden_layers):
            x = self.build_residual_layer(x)

        model = Model(inputs = input, outputs = x)

        policy_head = self.build_policy_head()
        value_head = self.build_value_head()

        model = Model(inputs = input, outputs = [policy_head(x),value_head(x)])

        model.compile(
            loss = {
                'policy_head':'categorical_crossentropy',
                'value_head': 'mean_squared_error'
            },
            optimizer = Adam(learning_rate=LEARNING_RATE),
            loss_weights = {
                'policy_head':0.5,
                'value_head':0.5
            }
        )

        return model

import os
    # parse arguments
model_builder = ModelBuilder(input_shape = INPUT_SHAPE, output_shape = OUTPUT_SHAPE)
model = model_builder.build_model()

        # create folders if they don't exist
if not os.path.exists("models"):
  os.makedirs("models")

        # save the model
    #print(f"Saving model to {args['model_folder']} as {args['model_name']}.h5 ...")
model.save(os.path.join("models", "base_model") + '.keras')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:

import numpy as np
from tqdm import tqdm
from chess import Board
from chess import PieceType
import logging
import cProfile
import pstats
import io


class MCTS:
    def __init__(self,agent, state = chess.STARTING_FEN,stochastic = False):
        self.root = Node(state=state)


        self.game_path = []
        self.cur_board = None

        self.agent = agent
        self.stochastic = stochastic

        self.expand(leaf=self.root)


    def run_simulations(self, n):
        profiler = cProfile.Profile()
        profiler.enable()
        for _ in tqdm(range(n)):
            self.game_path = []
            leaf = self.select_child(self.root)
            logging.debug(f'selected leaf as the state {Board(leaf.state)}')
            leaf.N +=1
            logging.debug(f'leaf n value is now {leaf.N}')
            leaf = self.expand(leaf)
            leaf = self.back_propagate(leaf,leaf.value)

        profiler.disable()
        s = io.StringIO()
        ps = pstats.Stats(profiler, stream=s).strip_dirs().sort_stats('cumulative')
        ps.print_stats()
        with open("expand_profiling.txt", "w") as f:
            f.write(s.getvalue())

    def select_child(self, node):
        #Traverse the tree from node via selecting actions with max Q+U

        # And if node hasn't been visited, return the node

        #while node is not a leaf node

        logging.debug(f'before selecting children')
        n = len(node.edges)
        noise = [1 for _ in range(n)]
        while node.N!=0:
            logging.debug(f'checkign node with current state as {Board(node.state)}')
            if len(node.edges)==0:
                logging.debug(f'current node has no outgoing edges')
                return node

            best = None
            best_score = -np.inf
            n = len(node.edges)

            for i in range(n):
                edge = node.edges[i]
                logging.debug(f'checking edge with state to node {Board(edge.out_node.state)}')
                cur_score = edge.upper_confidence_bound(noise=noise[i])
                if cur_score>best_score:
                    best_score = cur_score
                    best = edge

            node = best.out_node
            self.game_path.append(best)

        return node

    def get_index(self, piece_type, direction, distance):
        if piece_type == PieceType.KNIGHT:
            return 56+direction
        return direction*8+distance

    @staticmethod
    def get_underpromotion_move(piece_type, from_square, to_square):
        under_promotion_piece_type = -1
        if piece_type==PieceType.KNIGHT:
            under_promotion_piece_type=0
        elif piece_type==PieceType.BISHOP:
            under_promotion_piece_type = 1
        elif piece_type==PieceType.ROOK:
            under_promotion_piece_type=2
        else:
            raise Exception("Underpromotion piece not valid")

        diff = from_square-to_square
        if to_square<8:
            direction = diff-8
        elif to_square>55:
            direction = diff+8
        return (under_promotion_piece_type,direction)

    @staticmethod
    def get_knight_move(from_square, to_square):
        diff = to_square-from_square
        if diff==15:
            return 0
            #return "NORTH_LEFT"
        if diff==17:
            return 1
            #return "NORTH_RIGHT"
        if diff==10:
            return 2
            #return "EAST_UP"
        if diff==-6:
            return 3
            #return "EAST_DOWN"
        if diff==-15:
            return 4
            #return "SOUTH_RIGHT"
        if diff==-17:
            return 5
            #return "SOUTH_LEFT"
        if diff==-10:
            return 6
            #return "WEST_DOWN"
        if diff==6:
            return 7
            #return "WEST_UP"
        raise Exception("invalid knight moves")

    @staticmethod
    def get_queen_like_move(from_square, to_square):
        diff = to_square-from_square
        if diff%8==0:
            if diff>0:
                #direction = "NORTH"
                direction = 1
            else:
                #direction = "SOUTH"
                direction = 5
            dist = abs(diff)//8
        elif diff%9==0:
            if diff>0:
                #direction = "NORTHEAST"
                direction = 2
            else:
                #direction = "SOUTHWEST"
                direction = 6
            dist = abs(diff)//9
        elif from_square//8==to_square//8:
            if diff>0:
                #direction = "EAST"
                direction = 3
            else:
                #direction = "WEST"
                direction = 7
            dist = abs(diff)
        elif diff%7==0:
            if diff>0:
                #direction = "NORTHWEST"
                direction = 0
            else:
                #direction = "SOUTHEAST"
                direction = 4
            dist = abs(diff)//7
        else:
            raise Exception("NOT a valid queen-like move")
        return (direction,dist)

    def probabilities_to_actions(self,probabilities,board):
        probabilities=probabilities.reshape((amount_of_planes,n,n))
        actions = {}

        self.cur_board = chess.Board(board)
        valid_moves = self.cur_board.generate_legal_moves()
        self.outputs = []

        for move in valid_moves:
            from_square = move.from_square
            to_square = move.to_square
            plane_index = None
            piece = self.cur_board.piece_at(from_square)
            direction = None

            if move.promotion and move.promotion!=chess.QUEEN:
                piece_type, direction = self.get_underpromotion_move(move.promotion, from_square, to_square)
                plane_index = 64+ 3*piece_type + (1-direction)
            else:
                if piece.piece_type==chess.KNIGHT:
                    direction = self.get_knight_move(from_square,to_square)
                    plane_index = 56+direction
                else:
                    direction, dist = self.get_queen_like_move(from_square,to_square)
                    plane_index = direction*7+dist
            col = from_square % 8
            row = 7- from_square//8
            self.outputs.append((move,plane_index, row, col))

        for move, plane_index, row, col in self.outputs:
            actions[move.uci()] = probabilities[plane_index][row][col]
        return actions

    def expand(self, leaf):
        #profiler = cProfile.Profile()
        #profiler.enable()

        # Original function code
        board = chess.Board(leaf.state)

        possible_actions = list(board.generate_legal_moves())
        if len(possible_actions) == 0:
            outcome = board.outcome(claim_draw=True)
            if outcome is None:
                leaf.value = 0
            else:
                if outcome.winner == chess.WHITE:
                    leaf.value = 1
                else:
                    leaf.value = 0
            #profiler.disable()

            # Save profiler results for this function
            #s = io.StringIO()
            #ps = pstats.Stats(profiler, stream=s).strip_dirs().sort_stats('cumulative')
            #ps.print_stats()
            #with open("run_simulations_profiling.txt", "w") as f:
                #f.write(s.getvalue())
            return leaf

        input = ChessEnv.state_to_input(leaf.state)
        p, v = self.agent.predict(input)

        actions = self.probabilities_to_actions(p, leaf.state)
        leaf.value = v

        for action in possible_actions:
            new_state = leaf.step(action)
            leaf.add_child(Node(new_state), action, actions[action.uci()])

       # profiler.disable()

        # Save profiler results for this function
        #s = io.StringIO()
        #ps = pstats.Stats(profiler, stream=s).strip_dirs().sort_stats('cumulative')
        #ps.print_stats()
        #with open("expand_profiling.txt", "w") as f:
            #f.write(s.getvalue())

        return leaf

    '''
    def expand(self, leaf):
        # Add all move possibility to leaf node

        board = chess.Board(leaf.state)

        possible_actions = list(board.generate_legal_moves())
        if len(possible_actions)==0:
            outcome = board.outcome(claim_draw = True)
            if outcome is None:
                leaf.value = 0
            else:
                if outcome.winner == chess.WHITE:
                    leaf.value=1
                else:
                    leaf.value = 0
            return leaf

        input = ChessEnv.state_to_input(leaf.state)
        p,v = self.agent.predict(input)

        actions = self.probabilities_to_actions(p,leaf.state)
        leaf.value = v

        for action in possible_actions:
            new_state = leaf.step(action)
            leaf.add_child(Node(new_state),action, actions[action.uci()])
        return leaf
    '''
    def back_propagate(self, end_node, value):
        for edge in self.game_path:
            edge.in_node.N+=1
            edge.N+=1
            edge.W+=value
        return end_node


In [None]:
from keras.api.models import Model, load_model

class Agent:
    def __init__(self, model_path = None, state = chess.STARTING_FEN):
        self.model = load_model(model_path)
        self.mcts = MCTS(self, state = state)

    def build_model(self):
        model_builder = ModelBuilder(INPUT_SHAPE,OUTPUT_SHAPE)
        model = model_builder.build_model()
        return model

    def predict(self,data):
        p,v = self.model(data)
        return p.numpy(),v[0][0]

   # def save_model(self):
       # self.model.save(f"{MODEL_FOLDER}/model-{time.time()}.h5")

In [None]:
import os
from chess.pgn import Game as ChessGame
import logging
class Game:
    def __init__(self,env, white, black):
        self.env = env
        self.white = white
        self.black = black

        #Store memory of games played
        self.memory = []
    def reset(self):
        self.env.reset()
        self.turn = self.env.board.turn
    def play_game(self, stochastic = True):
        self.reset()
        self.memory.append([])

        move_counter = 0
        previous_edges = (None, None)

        winner = None

        while not self.env.board.is_game_over():
            previous_edges = self.play_move(stochastic = stochastic, previous_moves = previous_edges)
            move_counter+=1
            print(f'board after move {move_counter} is {self.env.board}')

            if move_counter>350:
                winner = self.guess_winner()
                break

        if winner is None:
            game_result = self.env.board.result()
            if game_result == "1-0":
                winner = 1
            elif game_result=="0-1":
                winner = -1
            else:
                winner = 0

        #Copyting this part to see what it does

        game = ChessGame()
        # set starting position
        game.setup(self.env.fen)
        # add moves
        node = game.add_variation(self.env.board.move_stack[0])
        for move in self.env.board.move_stack[1:]:
            node = node.add_variation(move)
        # print pgn

        # save memory to file
        logging.info(game)
        #self.save_game(name="game", full_game=full_game)

        return winner




    def guess_winner(self):
        cur_score = 0
        piece_scores = {
            chess.PAWN: 1,
            chess.KNIGHT:3,
            chess.BISHOP:3,
            chess.ROOK:5,
            chess.QUEEN:9,
            chess.KING:20
        }

        for piece in self.env.board.piece_map.values():
            if piece.color== chess.WHITE:
                score+=piece_scores[piece.piece_type]
            else:
                score-=piece_scores[piece.piece_type]
        if score>2.5:
            return 1
        elif score<-2.5:
            return -1
        return 0

    def play_move(self, stochastic = True, previous_moves = (None,None),save_moves = True):
        current = self.white if self.turn else self.black
        print(f'playing move as {current} with board as {self.env.board}')
        if previous_moves[0] is None or previous_moves[1] is None:
            # Initialize mcts
            current.mcts = MCTS(current,state = self.env.board.fen(),stochastic = stochastic)
        else:
            try:
                node = current.mcts.root.get_edge(previous_moves[0].action).output_node
                node = node.get_edge(previous_moves[1].action).output_node
                current.mcts.root = node
            except AttributeError:
                current.mcts = MCTS(current, state = self.env.board.fen(), stochastic = stochastic)

        logging.debug(f'before running simulations')
        current.mcts.run_simulations(300)
        logging.debug(f'after mcts running simulations')

        moves = current.mcts.root.edges

        if save_moves:
            self.save_to_memory(self.env.board.fen(),moves)

        total_visits = sum(e.N for e in moves)
        probs = [e.N/ total_visits for e in moves]

        if stochastic:
            best = np.random.choice(moves,p = probs)
        else:
            best = moves[np.argmax(probs)]

        self.env.step(best.action)

        self.turn = not self.turn

        return (previous_moves[1], best)

    def save_to_memory(self, state, moves):
        total_visits = sum(e.N for e in moves)

        probabilities = {
            e.action.uci(): e.N / total_visits for e in moves
        }

        self.memory[-1].append((state,probabilities,None))

In [None]:
from chess import *
import sys
import os
# Add the parent directory to the Python path

base_model_path = "models/base_model.keras"
test_mcts = MCTS(agent=Agent(model_path = base_model_path))

test_mcts.run_simulations(10)

  saveable.load_own_variables(weights_store.get(inner_path))
100%|██████████| 10/10 [00:02<00:00,  4.17it/s]


In [None]:
class Game:
    def __init__(self,env, white, black):
        self.env = env
        self.white = white
        self.black = black

        #Store memory of games played
        self.memory = []
    def reset(self):
        self.env.reset()
        self.turn = self.env.board.turn
    def play_game(self, stochastic = True):
        self.reset()
        self.memory.append([])

        move_counter = 0
        previous_edges = (None, None)

        winner = None

        while not self.env.board.is_game_over():
            previous_edges = self.play_move(stochastic = stochastic, previous_moves = previous_edges)
            move_counter+=1
            print(f'board after move {move_counter} is {self.env.board}')

            if move_counter>350:
                winner = self.guess_winner()
                break

        if winner is None:
            game_result = self.env.board.result()
            if game_result == "1-0":
                winner = 1
            elif game_result=="0-1":
                winner = -1
            else:
                winner = 0

        #Copyting this part to see what it does

        game = ChessGame()
        # set starting position
        game.setup(self.env.fen)
        # add moves
        node = game.add_variation(self.env.board.move_stack[0])
        for move in self.env.board.move_stack[1:]:
            node = node.add_variation(move)
        # print pgn

        # save memory to file
        logging.info(game)
        #self.save_game(name="game", full_game=full_game)

        return winner




    def guess_winner(self):
        cur_score = 0
        piece_scores = {
            chess.PAWN: 1,
            chess.KNIGHT:3,
            chess.BISHOP:3,
            chess.ROOK:5,
            chess.QUEEN:9,
            chess.KING:20
        }

        for piece in self.env.board.piece_map.values():
            if piece.color== chess.WHITE:
                score+=piece_scores[piece.piece_type]
            else:
                score-=piece_scores[piece.piece_type]
        if score>2.5:
            return 1
        elif score<-2.5:
            return -1
        return 0

    def play_move(self, stochastic = True, previous_moves = (None,None),save_moves = True):
        current = self.white if self.turn else self.black
        print(f'playing move as {current} with board as {self.env.board}')
        if previous_moves[0] is None or previous_moves[1] is None:
            # Initialize mcts
            current.mcts = MCTS(current,state = self.env.board.fen(),stochastic = stochastic)
        else:
            try:
                node = current.mcts.root.get_edge(previous_moves[0].action).output_node
                node = node.get_edge(previous_moves[1].action).output_node
                current.mcts.root = node
            except AttributeError:
                current.mcts = MCTS(current, state = self.env.board.fen(), stochastic = stochastic)

        logging.debug(f'before running simulations')
        current.mcts.run_simulations(300)
        logging.debug(f'after mcts running simulations')

        moves = current.mcts.root.edges

        if save_moves:
            self.save_to_memory(self.env.board.fen(),moves)

        total_visits = sum(e.N for e in moves)
        probs = [e.N/ total_visits for e in moves]

        if stochastic:
            best = np.random.choice(moves,p = probs)
        else:
            best = moves[np.argmax(probs)]

        self.env.step(best.action)

        self.turn = not self.turn

        return (previous_moves[1], best)

    def save_to_memory(self, state, moves):
        total_visits = sum(e.N for e in moves)

        probabilities = {
            e.action.uci(): e.N / total_visits for e in moves
        }

        self.memory[-1].append((state,probabilities,None))

In [None]:

logging.basicConfig(filename='app.log', filemode='w', level=logging.DEBUG,
                    format='%(asctime)s - %(levelname)s - %(message)s')

new_game = Game(ChessEnv(), Agent(model_path = base_model_path), Agent(model_path=base_model_path))
new_game.play_game()

  saveable.load_own_variables(weights_store.get(inner_path))


playing move as <__main__.Agent object at 0x7d28facd6090> with board as r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R


100%|██████████| 300/300 [01:30<00:00,  3.33it/s]


board after move 1 is r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . P . . . . .
P P . P P P P P
R N B Q K B N R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . P . . . . .
P P . P P P P P
R N B Q K B N R


100%|██████████| 300/300 [01:29<00:00,  3.34it/s]


board after move 2 is r . b q k b n r
p p p p p p p p
. . n . . . . .
. . . . . . . .
. . . . . . . .
. . P . . . . .
P P . P P P P P
R N B Q K B N R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . b q k b n r
p p p p p p p p
. . n . . . . .
. . . . . . . .
. . . . . . . .
. . P . . . . .
P P . P P P P P
R N B Q K B N R


100%|██████████| 300/300 [00:57<00:00,  5.24it/s]


board after move 3 is r . b q k b n r
p p p p p p p p
. . n . . . . .
. . . . . . . .
. . . . . . . .
. . P . . . . N
P P . P P P P P
R N B Q K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . b q k b n r
p p p p p p p p
. . n . . . . .
. . . . . . . .
. . . . . . . .
. . P . . . . N
P P . P P P P P
R N B Q K B . R


100%|██████████| 300/300 [01:31<00:00,  3.26it/s]


board after move 4 is r . b q k b . r
p p p p p p p p
. . n . . . . n
. . . . . . . .
. . . . . . . .
. . P . . . . N
P P . P P P P P
R N B Q K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . b q k b . r
p p p p p p p p
. . n . . . . n
. . . . . . . .
. . . . . . . .
. . P . . . . N
P P . P P P P P
R N B Q K B . R


100%|██████████| 300/300 [01:31<00:00,  3.28it/s]


board after move 5 is r . b q k b . r
p p p p p p p p
. . n . . . . n
. . . . . . . .
. P . . . . . .
. . P . . . . N
P . . P P P P P
R N B Q K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . b q k b . r
p p p p p p p p
. . n . . . . n
. . . . . . . .
. P . . . . . .
. . P . . . . N
P . . P P P P P
R N B Q K B . R


100%|██████████| 300/300 [01:32<00:00,  3.24it/s]


board after move 6 is r . b q k b . r
p p p p p p p p
. . . . . . . n
. . . . . . . .
. P . n . . . .
. . P . . . . N
P . . P P P P P
R N B Q K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . b q k b . r
p p p p p p p p
. . . . . . . n
. . . . . . . .
. P . n . . . .
. . P . . . . N
P . . P P P P P
R N B Q K B . R


100%|██████████| 300/300 [01:32<00:00,  3.23it/s]


board after move 7 is r . b q k b . r
p p p p p p p p
. . . . . . . n
. . . . . . . .
. P . n . . . .
. Q P . . . . N
P . . P P P P P
R N B . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . b q k b . r
p p p p p p p p
. . . . . . . n
. . . . . . . .
. P . n . . . .
. Q P . . . . N
P . . P P P P P
R N B . K B . R


100%|██████████| 300/300 [01:38<00:00,  3.05it/s]


board after move 8 is r . b q k b . r
p p p p p p p p
. . . . . . . n
. n . . . . . .
. P . . . . . .
. Q P . . . . N
P . . P P P P P
R N B . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . b q k b . r
p p p p p p p p
. . . . . . . n
. n . . . . . .
. P . . . . . .
. Q P . . . . N
P . . P P P P P
R N B . K B . R


100%|██████████| 300/300 [01:35<00:00,  3.15it/s]


board after move 9 is r . b q k b . r
p p p p p p p p
. . . . . . . n
. n . . . . . .
. P Q . . . . .
. . P . . . . N
P . . P P P P P
R N B . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . b q k b . r
p p p p p p p p
. . . . . . . n
. n . . . . . .
. P Q . . . . .
. . P . . . . N
P . . P P P P P
R N B . K B . R


100%|██████████| 300/300 [01:34<00:00,  3.19it/s]


board after move 10 is r . b q k b . r
. p p p p p p p
. . . . . . . n
p n . . . . . .
. P Q . . . . .
. . P . . . . N
P . . P P P P P
R N B . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . b q k b . r
. p p p p p p p
. . . . . . . n
p n . . . . . .
. P Q . . . . .
. . P . . . . N
P . . P P P P P
R N B . K B . R


100%|██████████| 300/300 [01:38<00:00,  3.04it/s]


board after move 11 is r . b q k b . r
. p p p p p p p
. . . . . . . n
p n . . . . . .
. P Q . . . . .
. . P . . . P N
P . . P P P . P
R N B . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . b q k b . r
. p p p p p p p
. . . . . . . n
p n . . . . . .
. P Q . . . . .
. . P . . . P N
P . . P P P . P
R N B . K B . R


100%|██████████| 300/300 [01:37<00:00,  3.09it/s]


board after move 12 is r . b q k b . r
n p p p p p p p
. . . . . . . n
p . . . . . . .
. P Q . . . . .
. . P . . . P N
P . . P P P . P
R N B . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . b q k b . r
n p p p p p p p
. . . . . . . n
p . . . . . . .
. P Q . . . . .
. . P . . . P N
P . . P P P . P
R N B . K B . R


100%|██████████| 300/300 [01:40<00:00,  2.99it/s]


board after move 13 is r . b q k b . r
n p p p p p p p
. . . . . . . n
p . . . . . . .
. P . . . Q . .
. . P . . . P N
P . . P P P . P
R N B . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . b q k b . r
n p p p p p p p
. . . . . . . n
p . . . . . . .
. P . . . Q . .
. . P . . . P N
P . . P P P . P
R N B . K B . R


100%|██████████| 300/300 [01:32<00:00,  3.24it/s]


board after move 14 is r . b q k b . r
n p p p p . p p
. . . . . p . n
p . . . . . . .
. P . . . Q . .
. . P . . . P N
P . . P P P . P
R N B . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . b q k b . r
n p p p p . p p
. . . . . p . n
p . . . . . . .
. P . . . Q . .
. . P . . . P N
P . . P P P . P
R N B . K B . R


100%|██████████| 300/300 [01:39<00:00,  3.02it/s]


board after move 15 is r . b q k b . r
n p p p p . p p
. . . . . p . n
P . . . . . . .
. . . . . Q . .
. . P . . . P N
P . . P P P . P
R N B . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . b q k b . r
n p p p p . p p
. . . . . p . n
P . . . . . . .
. . . . . Q . .
. . P . . . P N
P . . P P P . P
R N B . K B . R


100%|██████████| 300/300 [01:33<00:00,  3.20it/s]


board after move 16 is r . b q k b n r
n p p p p . p p
. . . . . p . .
P . . . . . . .
. . . . . Q . .
. . P . . . P N
P . . P P P . P
R N B . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . b q k b n r
n p p p p . p p
. . . . . p . .
P . . . . . . .
. . . . . Q . .
. . P . . . P N
P . . P P P . P
R N B . K B . R


100%|██████████| 300/300 [01:39<00:00,  3.01it/s]


board after move 17 is r . b q k b n r
n p p p p . p p
. . . . . p . .
P . . . . . . .
. . . . . Q . .
. . P . . . P N
P B . P P P . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . b q k b n r
n p p p p . p p
. . . . . p . .
P . . . . . . .
. . . . . Q . .
. . P . . . P N
P B . P P P . P
R N . . K B . R


100%|██████████| 300/300 [01:32<00:00,  3.26it/s]


board after move 18 is r . b q k b n r
n p p . p . p p
. . . . . p . .
P . . p . . . .
. . . . . Q . .
. . P . . . P N
P B . P P P . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . b q k b n r
n p p . p . p p
. . . . . p . .
P . . p . . . .
. . . . . Q . .
. . P . . . P N
P B . P P P . P
R N . . K B . R


100%|██████████| 300/300 [01:39<00:00,  3.02it/s]


board after move 19 is r . b q k b n r
n p p . p . p p
. . . . . p . .
P . . p . . . .
. . . . . Q . .
. . P . . P P N
P B . P P . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . b q k b n r
n p p . p . p p
. . . . . p . .
P . . p . . . .
. . . . . Q . .
. . P . . P P N
P B . P P . . P
R N . . K B . R


100%|██████████| 300/300 [01:37<00:00,  3.08it/s]


board after move 20 is r . b q k b n r
n p p . p . . p
. . . . . p . .
P . . p . . p .
. . . . . Q . .
. . P . . P P N
P B . P P . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . b q k b n r
n p p . p . . p
. . . . . p . .
P . . p . . p .
. . . . . Q . .
. . P . . P P N
P B . P P . . P
R N . . K B . R


100%|██████████| 300/300 [01:39<00:00,  3.01it/s]


board after move 21 is r . b q k b n r
n p p . p . . p
. . . . . p . .
P . . p . . p .
. . . . . Q . .
B . P . . P P N
P . . P P . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . b q k b n r
n p p . p . . p
. . . . . p . .
P . . p . . p .
. . . . . Q . .
B . P . . P P N
P . . P P . . P
R N . . K B . R


100%|██████████| 300/300 [01:38<00:00,  3.05it/s]


board after move 22 is r . b q k b n r
n . p . p . . p
. . . . . p . .
P p . p . . p .
. . . . . Q . .
B . P . . P P N
P . . P P . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . b q k b n r
n . p . p . . p
. . . . . p . .
P p . p . . p .
. . . . . Q . .
B . P . . P P N
P . . P P . . P
R N . . K B . R


100%|██████████| 300/300 [01:41<00:00,  2.96it/s]


board after move 23 is r . b q k b n r
n . p . p . . p
. . . B . p . .
P p . p . . p .
. . . . . Q . .
. . P . . P P N
P . . P P . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . b q k b n r
n . p . p . . p
. . . B . p . .
P p . p . . p .
. . . . . Q . .
. . P . . P P N
P . . P P . . P
R N . . K B . R


100%|██████████| 300/300 [01:39<00:00,  3.01it/s]


board after move 24 is r . . q k b n r
n . p . p . . p
. . . B . p . .
P p . p . b p .
. . . . . Q . .
. . P . . P P N
P . . P P . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . . q k b n r
n . p . p . . p
. . . B . p . .
P p . p . b p .
. . . . . Q . .
. . P . . P P N
P . . P P . . P
R N . . K B . R


100%|██████████| 300/300 [01:40<00:00,  2.99it/s]


board after move 25 is r . . q k b n r
n . p . p . . p
. . . B . p . .
P p . p . b p .
. . . . . . . .
. . P . Q P P N
P . . P P . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . . q k b n r
n . p . p . . p
. . . B . p . .
P p . p . b p .
. . . . . . . .
. . P . Q P P N
P . . P P . . P
R N . . K B . R


100%|██████████| 300/300 [01:44<00:00,  2.88it/s]


board after move 26 is r . . q k b n r
n . . . p . . p
. . p B . p . .
P p . p . b p .
. . . . . . . .
. . P . Q P P N
P . . P P . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . . q k b n r
n . . . p . . p
. . p B . p . .
P p . p . b p .
. . . . . . . .
. . P . Q P P N
P . . P P . . P
R N . . K B . R


100%|██████████| 300/300 [01:43<00:00,  2.89it/s]


board after move 27 is r . . q k b n r
n . . . p . . p
. . p B . p . .
P p . p . b p .
. . . . Q . . .
. . P . . P P N
P . . P P . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . . q k b n r
n . . . p . . p
. . p B . p . .
P p . p . b p .
. . . . Q . . .
. . P . . P P N
P . . P P . . P
R N . . K B . R


100%|██████████| 300/300 [01:40<00:00,  2.98it/s]


board after move 28 is r . . q k b . r
n . . . p . . p
. . p B . p . n
P p . p . b p .
. . . . Q . . .
. . P . . P P N
P . . P P . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . . q k b . r
n . . . p . . p
. . p B . p . n
P p . p . b p .
. . . . Q . . .
. . P . . P P N
P . . P P . . P
R N . . K B . R


100%|██████████| 300/300 [01:44<00:00,  2.86it/s]


board after move 29 is r . . q k b . r
n . . . p . . p
. . p B . p . n
P p . p . b p .
. . . . Q . . .
. . P . P P P N
P . . P . . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . . q k b . r
n . . . p . . p
. . p B . p . n
P p . p . b p .
. . . . Q . . .
. . P . P P P N
P . . P . . . P
R N . . K B . R


100%|██████████| 300/300 [01:42<00:00,  2.93it/s]


board after move 30 is r . . q k b . r
n . . . p . . p
. . p B b p . n
P p . p . . p .
. . . . Q . . .
. . P . P P P N
P . . P . . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . . q k b . r
n . . . p . . p
. . p B b p . n
P p . p . . p .
. . . . Q . . .
. . P . P P P N
P . . P . . . P
R N . . K B . R


100%|██████████| 300/300 [01:46<00:00,  2.82it/s]


board after move 31 is r B . q k b . r
n . . . p . . p
. . p . b p . n
P p . p . . p .
. . . . Q . . .
. . P . P P P N
P . . P . . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r B . q k b . r
n . . . p . . p
. . p . b p . n
P p . p . . p .
. . . . Q . . .
. . P . P P P N
P . . P . . . P
R N . . K B . R


100%|██████████| 300/300 [01:40<00:00,  2.98it/s]


board after move 32 is r B . q k b . r
n . . . p . . p
. . p . b p . n
P p . . . . p .
. . . p Q . . .
. . P . P P P N
P . . P . . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r B . q k b . r
n . . . p . . p
. . p . b p . n
P p . . . . p .
. . . p Q . . .
. . P . P P P N
P . . P . . . P
R N . . K B . R


100%|██████████| 300/300 [01:45<00:00,  2.85it/s]


board after move 33 is r . . q k b . r
n . . . p . . p
. . p . b p . n
P p . . . . p .
. . . p Q B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . . q k b . r
n . . . p . . p
. . p . b p . n
P p . . . . p .
. . . p Q B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R


100%|██████████| 300/300 [01:46<00:00,  2.82it/s]


board after move 34 is r . . . k b . r
n . . . p . . p
. q p . b p . n
P p . . . . p .
. . . p Q B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . . . k b . r
n . . . p . . p
. q p . b p . n
P p . . . . p .
. . . p Q B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R


100%|██████████| 300/300 [01:43<00:00,  2.89it/s]


board after move 35 is r . . . k b . r
n . . . p . . p
. q p . b p . n
P p . Q . . p .
. . . p . B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . . . k b . r
n . . . p . . p
. q p . b p . n
P p . Q . . p .
. . . p . B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R


100%|██████████| 300/300 [01:43<00:00,  2.89it/s]


board after move 36 is r . . . k b . r
n q . . p . . p
. . p . b p . n
P p . Q . . p .
. . . p . B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . . . k b . r
n q . . p . . p
. . p . b p . n
P p . Q . . p .
. . . p . B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R


100%|██████████| 300/300 [01:45<00:00,  2.85it/s]


board after move 37 is r . . . k b . r
n q . . p . . p
. . p . Q p . n
P p . . . . p .
. . . p . B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . . . k b . r
n q . . p . . p
. . p . Q p . n
P p . . . . p .
. . . p . B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R


100%|██████████| 300/300 [01:36<00:00,  3.10it/s]


board after move 38 is r . n . k b . r
. q . . p . . p
. . p . Q p . n
P p . . . . p .
. . . p . B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28facd6090> with board as r . n . k b . r
. q . . p . . p
. . p . Q p . n
P p . . . . p .
. . . p . B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R


100%|██████████| 300/300 [01:43<00:00,  2.89it/s]


board after move 39 is r . Q . k b . r
. q . . p . . p
. . p . . p . n
P p . . . . p .
. . . p . B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R
playing move as <__main__.Agent object at 0x7d28757084d0> with board as r . Q . k b . r
. q . . p . . p
. . p . . p . n
P p . . . . p .
. . . p . B . .
. . P . P P P N
P . . P . . . P
R N . . K B . R


  2%|▏         | 7/300 [00:01<01:07,  4.33it/s]


IndexError: list index out of range