In [66]:
import chess
from IPython.display import display, clear_output
import random
import time

def who(player):
    return "White" if player == chess.WHITE else "Black"

class ChessGame:
    """
    Base class where the chess game is played
    """
    
    def __init__(self, player1, player2):
        self.board = chess.Board()
        
        # Setting up the players
        self.white = player1
        self.black = player2
        self.white.board = self.board
        self.black.board = self.board
        self.white.my_color = chess.WHITE
        self.black.my_color = chess.BLACK
        
        # Game outcome
        self.winner = None
        self.over = False
        
        
    def white_move(self):
        """
        White makes a move according to their best move/model
        """
        assert self.board.turn == chess.WHITE
        self.white.get_move()

        
    def black_move(self):
        """
        Black makes a move according to their best move/model
        
        :param str action: black move (must be in UCI notation)
        """
        assert self.board.turn == chess.BLACK
        self.black.get_move()
        
    def display_board(self, use_svg = 1):
        """
        Method for use in iPython Notebook to visualize board
        """
        
        display(self.board)
        
    def play_game(self, pause=0.1, display = False):
        """
        Game play by the two agents comprising the game.
        
        :param float pause: how long to pause between displays (seconds)
        :param bool display: display the board after each move, verbose output
        """
        
        try:
            while not self.board.is_game_over(claim_draw=True):
                if self.board.turn == chess.WHITE:
                    self.white_move()
                else:
                    self.black_move()
                if display:
                    self.display_board()
                    time.sleep(pause)
                    clear_output(wait=True)
                    
                    
        except KeyboardInterrupt:
            msg = "Game interrupted!"
            return (None, msg)
        result = None
        if self.board.is_checkmate():
            msg = "checkmate: " + who(not self.board.turn) + " wins!"
            result = not self.board.turn
        elif self.board.is_stalemate():
            msg = "draw: stalemate"
        elif self.board.is_fivefold_repetition():
            msg = "draw: 5-fold repetition"
        elif self.board.is_insufficient_material():
            msg = "draw: insufficient material"
        elif self.board.can_claim_draw():
            msg = "draw: claim"
        if display:
            print(msg)
        return (result, msg)
        
        
class HumanPlayer:
    """
    A human player can provide input to which move to take.
    """
    
    def __init__(self, notation = "UCI"):
        self.notation = notation
        self.board = None
        self.my_color = None
    
    def get_move(self):
        """
        User inputs move, must be a legal move in correct format.
        User gets 4 tries
        """
        for x in range(0, 4):  
            try:
                move = input("Please provide your move in %s notation (qqq to exit):" %self.notation)
                if move == "qqq":
                    raise KeyboardInterrupt
                if self.notation == "SAN":
                    self.board.push_san(move)
                else:
                    self.board.push_uci(move)
                return (move)
            except KeyboardInterrupt:
                raise KeyboardInterrupt
            except ValueError:
                pass
                
        
        
    
class RandomPlayer:
    """
    Implements a random player that picks a random move 
    out of all possible moves.
    """
    def __init__(self):
        self.board = None
        self.my_color = None
    
    def get_move(self):
        move = random.choice(list(self.board.legal_moves))
        self.board.push(move)
        
class PieceValuePlayer:
    """
    Implements a player that selects the move that will maximize 
    their piece values on the board.
    
    The only improvement over a random player is that a PieceValuePlayer
    will capture a piece if they can.
    """
    def __init__(self):
        self.board = None
        self.my_color = None
    
    def get_move(self):
        move = self.pick_highest_value_move()
        self.board.push(move)
        
    def pick_highest_value_move(self):
        """
        For each move, evaluate the position after the move,
        then select the highest value move
        """
        # Evaluate move
        moves = list(self.board.legal_moves)
        for move in moves:
            hypothetical_board = self.board.copy()
            move.score = self.static_analysis(move, hypothetical_board, self.board.turn)

        # Select random move among the moves that do best
        best_move_score = max([move.score for move in moves])
        best_moves = [move for move in moves if move.score == best_move_score]
        random_best_move = random.choice(best_moves)
        return random_best_move
    
        
    def static_analysis(self, move, board, my_color):
        """
        Evaluate the board position according to point values 
        purely based on the pieces on the board
        
        Piece values are:
            Pawn: 1
            Bishop: 4
            Queen: 10
            Knight: 3
            Rook: 5
            
        :param chess.Move move: move to evaluate
        :param chess.Board board: board on which to evaluate move
        :param bool my_color: perspective with which to evaluate moves
        """
        board.push(move)
        score = 0
        for (piece, value) in [(chess.PAWN, 1), 
                           (chess.BISHOP, 4), 
                           (chess.QUEEN, 10), 
                           (chess.KNIGHT, 3),
                           (chess.ROOK, 5)]:
            score += len(board.pieces(piece, my_color)) * value
            score -= len(board.pieces(piece, not my_color)) * value
        score += 100 if board.is_checkmate() else 0
        return score
    
class MinimaxPlayer:
    """
    Implements a player that expands the game tree up to a certain depth
    and selects the minimax move.
    
    The minimax move is the best case scenario assuming the a worst case 
    opponent. Assuming the opponent is as strong as possible and will respond 
    with their best move, we select the move that will maximize our payoff
    in this worst case scenario.
    """
    def __init__(self, depth = 2):
        """
        Class constructor for minimax player.
        
        
        """
        self.board = None
        self.max_depth = depth
        self.my_color = None
    
    def get_move(self):
        move = self.minimax(self.max_depth)
        self.board.push(move)
        
    def pick_highest_value_move_depth_2(self):
        moves = list(self.board.legal_moves)
        for move in moves:
            print ("Considering move", move)
            self.board.push(move)
            opponent_moves = list(self.board.legal_moves)
            next_moves = [self.static_analysis(move, self.board, self.board.turn) for move in opponent_moves]
            minimax = -max(next_moves)
            move.score = minimax
            self.board.pop()


        best_move_score = max([move.score for move in moves])
        best_moves = [move for move in moves if move.score == best_move_score]
        random_best_move = random.choice(best_moves)
        return random_best_move
    
    def minimax(self, depth, simulate_opponent = False):
        """
        Recursive function that expands every 
        """
        moves = list(self.board.legal_moves) 
        # If no more legal moves, then game has ended
        if len(moves) == 0:
            moves = None
            print (self.board.result)
            return self.static_analysis(None, self.board, self.my_color)
            
        # At leaf node, evaluate board position
        if depth == 1: 
            leaf_scores = [self.static_analysis(move, self.board, self.my_color) for move in moves]
            if simulate_opponent:
                return min(leaf_scores)
            else:
                return max(leaf_scores)
        
        # If simulating opponent, opponent tries to minimize my score
        if simulate_opponent:
            for move in moves:
                self.board.push(move)
                move.score = self.minimax(depth - 1, simulate_opponent = not simulate_opponent) 
                #the better the opponent scores, the worse we score
                self.board.pop()
            return min([move.score for move in moves])
        else:
        # If simulating my moves, I try to maximize my own score
            for move in moves:
                self.board.push(move)
                move.score = self.minimax(depth - 1, simulate_opponent = not simulate_opponent) 
                print ("Move:", self.board.uci(move), "score", move.score)
                #the better the opponent scores, the worse we score
                self.board.pop()
            if depth < self.max_depth:
                return max([move.score for move in moves])

        best_move_score = max([move.score for move in moves])
        best_moves = [move for move in moves if move.score == best_move_score]
        random_best_move = random.choice(best_moves)
        return random_best_move
    
        
    def static_analysis(self, move, board, my_color):
        """
        Evaluate the board position according to point values previously
        described
        
        Piece values are:
            Pawn: 1
            Bishop: 4
            Queen: 10
            Knight: 3
            Rook: 5
        """
        # Return score if the game has ended
        if move == None:
            if board.result == "1-0":
                score = 100 if my_color == 1 else -100
            elif board.result == "0-1":
                score = -100 if my_color == 1 else 100
            else:
                score = 0 
            return score
        else:
            score = 0
                
        board.push(move)
        
        for (piece, value) in [(chess.PAWN, 1), 
                           (chess.BISHOP, 4), 
                           (chess.QUEEN, 10), 
                           (chess.KNIGHT, 3),
                           (chess.ROOK, 5)]:
            score += len(board.pieces(piece, my_color)) * value
            score -= len(board.pieces(piece, not my_color)) * value
        score += 100 if board.is_checkmate() else 0
        board.pop()
        return score
    


In [69]:

PAWN_position_value = np.array([[0,  0,  0,  0,  0,  0,  0,  0],
                [50, 50, 50, 50, 50, 50, 50, 50],
                [10, 10, 20, 30, 30, 20, 10, 10],
                [ 5,  5, 10, 25, 25, 10,  5,  5],
                [ 0,  0,  0, 20, 20,  0,  0,  0],
                [ 5, -5,-10,  0,  0,-10, -5,  5],
                [ 5, 10, 10,-20,-20, 10, 10,  5],
                [ 0,  0,  0,  0,  0,  0,  0,  0]]).ravel()[::-1]

KNIGHT_position_value = np.array([[-50,-40,-30,-30,-30,-30,-40,-50],
                [-40,-20,  0,  0,  0,  0,-20,-40],
                [-30,  0, 10, 15, 15, 10,  0,-30],
                [-30,  5, 15, 20, 20, 15,  5,-30],
                [-30,  0, 15, 20, 20, 15,  0,-30],
                [-30,  5, 10, 15, 15, 10,  5,-30],
                [-40,-20,  0,  5,  5,  0,-20,-40],
                [-50,-40,-30,-30,-30,-30,-40,-50]]).ravel()[::-1]

BISHOP_position_value = np.array([[-20,-10,-10,-10,-10,-10,-10,-20],
                [-10,  0,  0,  0,  0,  0,  0,-10],
                [-10,  0,  5, 10, 10,  5,  0,-10],
                [-10,  5,  5, 10, 10,  5,  5,-10],
                [-10,  0, 10, 10, 10, 10,  0,-10],
                [-10, 10, 10, 10, 10, 10, 10,-10],
                [-10,  5,  0,  0,  0,  0,  5,-10],
                [-20,-10,-10,-10,-10,-10,-10,-20]]).ravel()[::-1]

ROOK_position_value= np.array([[0,  0,  0,  0,  0,  0,  0,  0],
                [  5, 10, 10, 10, 10, 10, 10,  5],
                [ -5,  0,  0,  0,  0,  0,  0, -5],
                [ -5,  0,  0,  0,  0,  0,  0, -5],
                [ -5,  0,  0,  0,  0,  0,  0, -5],
                [ -5,  0,  0,  0,  0,  0,  0, -5],
                [ -5,  0,  0,  0,  0,  0,  0, -5],
                [  0,  0,  0,  5,  5,  0,  0,  0]]).ravel()[::-1]

QUEEN_position_value = np.array([[-20,-10,-10, -5, -5,-10,-10,-20],
                [-10,  0,  0,  0,  0,  0,  0,-10],
                [-10,  0,  5,  5,  5,  5,  0,-10],
                [ -5,  0,  5,  5,  5,  5,  0, -5],
                [  0,  0,  5,  5,  5,  5,  0, -5],
                [-10,  5,  5,  5,  5,  5,  0,-10],
                [-10,  0,  5,  0,  0,  0,  0,-10],
                [-20,-10,-10, -5, -5,-10,-10,-20]]).ravel()[::-1]

KING_MIDposition_value = np.array([[-30,-40,-40,-50,-50,-40,-40,-30],
                [-30,-40,-40,-50,-50,-40,-40,-30],
                [-30,-40,-40,-50,-50,-40,-40,-30],
                [-30,-40,-40,-50,-50,-40,-40,-30],
                [-20,-30,-30,-40,-40,-30,-30,-20],
                [-10,-20,-20,-20,-20,-20,-20,-10],
                [ 20, 20,  0,  0,  0,  0, 20, 20],
                [ 20, 30, 10,  0,  0, 10, 30, 20]]).ravel()[::-1]

KING_ENDposition_value = np.array([[-50,-40,-30,-20,-20,-30,-40,-50],
                [-30,-20,-10,  0,  0,-10,-20,-30],
                [-30,-10, 20, 30, 30, 20,-10,-30],
                [-30,-10, 30, 40, 40, 30,-10,-30],
                [-30,-10, 30, 40, 40, 30,-10,-30],
                [-30,-10, 20, 30, 30, 20,-10,-30],
                [-30,-30,  0,  0,  0,  0,-30,-30],
                [-50,-30,-30,-30,-30,-30,-30,-50]]).ravel()[::-1]


POSITION_dictionary = {1: PAWN_position_value,
                       2: KNIGHT_position_value,
                       3: BISHOP_position_value,
                       4: ROOK_position_value,
                       5: QUEEN_position_value,
                       6: [KING_MIDposition_value, KING_ENDposition_value]
                      }

import numpy as np

class AlphaBetaPlayer:
    """
    Implements a player that expands the game tree up to a certain depth,
    prunes the tree according to the alpha-beta pruning algorith,
    and selects the minimax move.
    
    The minimax move is the best case scenario assuming the a worst case 
    opponent. Assuming the opponent is as strong as possible and will respond 
    with their best move, we select the move that will maximize our payoff
    in this worst case scenario.
    
    The alpha-beta pruning algorithm avoids searching through nodes that 
    are guaranteed to violate the assumption that the opponent is as 
    strong as possible, allowing a more in-depth search.
    """

    
    def __init__(self, depth = 2):
        """
        Class constructor for minimax player.
        
        
        """
        self.board = None
        self.max_depth = depth
        self.my_color = None
    
    def get_move(self):
        move = self.alphabeta(move = 0, depth = self.max_depth)
        self.board.push(move)
    
    def alphabeta(self, move, depth, simulate_opponent = False, 
                alpha = -np.inf, beta = np.inf):
        """
        Recursive function that expands the minimax game tree and prunes
        it according to the alpha-beta pruning algorithm
        """
        
        # At leaf node, evaluate board position
        if depth == 0: 
            return self.static_analysis(move, self.board, self.my_color) 
        
        moves = list(self.board.legal_moves) 
        
        # Heuristics for optimal ordering for alpha-beta pruning
        moves = sorted(moves, key = self.ordering_heuristics, reverse=True)
        
        # If no more legal moves, then game has ended. Evaluate leaf node
        if len(moves) == 0:
            return self.static_analysis(None, self.board, self.my_color)
            
        # If simulating opponent, opponent tries to minimize my score
        if simulate_opponent:
            best_move_score = np.inf
            for move in moves:
                self.board.push(move)
                move.score = self.alphabeta(move, depth - 1, not simulate_opponent,
                                         alpha, beta) 
                best_move_score = min(best_move_score, move.score)
                beta = min(beta, best_move_score)
                self.board.pop()
                #the better the opponent scores, the worse we score
                if beta < alpha:
                    break
            return best_move_score
        else:
        # If simulating my moves, I try to maximize my own score
            best_move_score = -np.inf
            for move in moves:
                self.board.push(move)
                move.score = self.alphabeta(move, depth - 1, not simulate_opponent,
                                         alpha, beta) 
                best_move_score = max(best_move_score, move.score)
                alpha = max(alpha, best_move_score)
                self.board.pop()

                if beta <= alpha:
                    break

            if depth < self.max_depth:
                return best_move_score

        # Select a best move from a list of best moves
        best_move_score = max([move.score for move in moves])
        best_moves = [move for move in moves if move.score == best_move_score]
        random_best_move = random.choice(best_moves)
        
        return random_best_move
    
        
    def static_analysis(self, move, board, my_color):
        """
        Evaluate the board position according to point values previously
        described
        
        Piece values are:
            Pawn: 1
            Bishop: 4
            Queen: 10
            Knight: 3
            Rook: 5
        """
        # Return score if the game has ended
        if move == None:
            if board.result == "1-0":
                score = 100 if my_color == 1 else -100
            elif board.result == "0-1":
                score = -100 if my_color == 1 else 100
            else:
                score = 0 
            return score
        else:
            score = 0
                
        board.push(move)
        
        for (piece, value) in [(chess.PAWN, 100), 
                           (chess.BISHOP, 330), 
                           (chess.QUEEN, 900), 
                           (chess.KNIGHT, 320),
                           (chess.ROOK, 500)]:
            my_piece_position = board.pieces(piece, my_color)
            score += len(my_piece_position) * value
            for position in my_piece_position:
                score += POSITION_dictionary[piece][position]
            opponent_piece_position = board.pieces(piece, not my_color)
            score -= len(opponent_piece_position) * value
            for position in opponent_piece_position:
                score -= POSITION_dictionary[piece][position]
                
        # Evaluate king safety/activity depending on mid/end game
        my_king = list(board.pieces(chess.KING, my_color))[0]
        opponent_king = list(board.pieces(chess.KING, not my_color))[0]
        if self.board.fullmove_number < 50:
            score += POSITION_dictionary[chess.KING][0][my_king]
            score -= POSITION_dictionary[chess.KING][0][opponent_king]
        else:
            score += POSITION_dictionary[chess.KING][1][my_king]
            score -= POSITION_dictionary[chess.KING][1][opponent_king]
        
    
        score += 20000 if board.is_checkmate() else 0
        board.pop()
        return score
    
    def ordering_heuristics(self, move):
        move_order = self.board.is_capture(move)
        move_order += self.board.is_attacked_by(not self.board.turn, move.from_square)
        
        return move_order
    
    
    


In [76]:
tic = time.time()
game = ChessGame(AlphaBetaPlayer(depth = 2), HumanPlayer("SAN"))
res, msg = game.play_game(display = 0)
toc = time.time()
print (toc-tic)

fen = game.board.fen()
print(fen)
foo = fen.split(' ')
rows = foo[0].split('/')
print (rows)
def swapcase(a):
        if a.isalpha():
            return a.lower() if a.isupper() else a.upper()
        return a
def swapall(aa):
    return "".join([swapcase(a) for a in aa])
print ("/".join([swapall(row) for row in reversed(rows)]) \
    + " " + ('w' if foo[1] == 'b' else 'b') \
    + " " + "".join(sorted(swapall(foo[2]))) \
    + " " + foo[3] + " " + foo[4] + " " + foo[5])

def replace_tags_board(board_san):
    board_san = board_san.split(" ")[0]
    board_san = board_san.replace("2", "11")
    board_san = board_san.replace("3", "111")
    board_san = board_san.replace("4", "1111")
    board_san = board_san.replace("5", "11111")
    board_san = board_san.replace("6", "111111")
    board_san = board_san.replace("7", "1111111")
    board_san = board_san.replace("8", "11111111")
    return board_san.replace("/", "")

print(replace_tags_board(fen))
print (res, msg, game.board.fullmove_number)

Please provide your move in SAN notation (qqq to exit):qqq
1.9946479797363281
rnbqkbnr/pppppppp/8/8/8/2N5/PPPPPPPP/R1BQKBNR b KQkq - 1 1
['rnbqkbnr', 'pppppppp', '8', '8', '8', '2N5', 'PPPPPPPP', 'R1BQKBNR']
r1bqkbnr/pppppppp/2n5/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 1 1
rnbqkbnrpppppppp11111111111111111111111111N11111PPPPPPPPR1BQKBNR
None Game interrupted! 1


In [282]:
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from logging import getLogger
from threading import Lock
from multiprocessing import connection, Pipe
from threading import Thread

from keras.engine.topology import Input
from keras.engine.training import Model
from keras.layers.convolutional import Conv2D
from keras.layers.core import Activation, Dense, Flatten
from keras.layers.merge import Add
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
import numpy as np 


class ModelConfig:
    cnn_filter_num = 256
    cnn_first_filter_size = 5
    cnn_filter_size = 3
    res_layer_num = 3
    l2_reg = 1e-4
    value_fc_size = 256
    distributed = True
    input_depth = 18
    
class NodeStatistics:
    """
    Holds the stats needed for the AGZ MCTS algorithm for a specific action taken from a specific state.
    Attributes:
        :ivar int n: number of visits to this action by the algorithm
        :ivar float w: every time a child of this action is visited by the algorithm,
            this accumulates the value (calculated from the value network) of that child. This is modified
            by a virtual loss which encourages threads to explore different nodes.
        :ivar float q: mean action value (total value from all visits to actions
            AFTER this action, divided by the total number of visits to this action)
            i.e. it's just w / n.
        :ivar float p: prior probability of taking this action, given
            by the policy network.
            
    ## EDIT
    """
    def __init__(self):
        self.n = 0
        self.w = 0
        self.q = 0
        self.p = 0
        
class RLPlayer:
    """
    Implements a player that uses neural networks to play chess.
    
    Attributes:
    """
    def __init__(self, depth = 2, config = None):
        """
        Class constructor for minimax player.
        
        
        """
        self.board = None
        self.max_depth = depth
        self.my_color = None
        self.model = None
        if config == None:
            self.config_model = ModelConfig()
            self.config = Config()
        self.node_lock = defaultdict(Lock)
        self.game_tree = {}
        self.feed_input, self.return_policy_value = [], []
        
        # Dictionary to facilitate converting from network outputs to move
        self.move_code = {i: chess.Move.from_uci(move) 
                          for move, i in zip(create_uci_labels(), range(len(create_uci_labels())))}
        self.move_lookup = {chess.Move.from_uci(move): i
                          for move, i in zip(create_uci_labels(), range(len(create_uci_labels())))}
        
    def create_pipes(self):
        self.feed_input, self.return_policy_value = [], []
        for thread in range(30):
            me, you = Pipe()
            self.feed_input.append(me)
            self.return_policy_value.append(you)
        
        return self.feed_input, self.return_policy_value
    
    def get_move(self):
        # Set up multiprocessing for speed 
        self.feed_input, self.return_policy_value = self.create_pipes()
        
        # Starts a thread to listen on the pipe and make predictions
        prediction_worker = Thread(target=self._predict_batch_worker, name="prediction_worker")
        prediction_worker.daemon = True
        prediction_worker.start()
        
        move = self.minimax(self.max_depth)
        self.board.push(move)
        
    
        
    def build_model(self):
        """
        Builds the full Keras model and stores it in self.model.
        """
        mc = self.config_model
        in_x = x = Input((18, 8, 8))

        # (batch, channels, height, width)
        x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_first_filter_size, padding="same",
                   data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg),
                   name="input_conv-"+str(mc.cnn_first_filter_size)+"-"+str(mc.cnn_filter_num))(x)
        x = BatchNormalization(axis=1, name="input_batchnorm")(x)
        x = Activation("relu", name="input_relu")(x)

        for i in range(mc.res_layer_num):
            x = self._build_residual_block(x, i + 1)

        res_out = x
        
        # for policy output
        x = Conv2D(filters=2, kernel_size=1, data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg),
                    name="policy_conv-1-2")(res_out)
        x = BatchNormalization(axis=1, name="policy_batchnorm")(x)
        x = Activation("relu", name="policy_relu")(x)
        x = Flatten(name="policy_flatten")(x)
        # no output for 'pass'
        policy_out = Dense(self.config.n_labels, kernel_regularizer=l2(mc.l2_reg), activation="softmax", name="policy_out")(x)

        # for value output
        x = Conv2D(filters=4, kernel_size=1, data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg),
                    name="value_conv-1-4")(res_out)
        x = BatchNormalization(axis=1, name="value_batchnorm")(x)
        x = Activation("relu",name="value_relu")(x)
        x = Flatten(name="value_flatten")(x)
        x = Dense(mc.value_fc_size, kernel_regularizer=l2(mc.l2_reg), activation="relu", name="value_dense")(x)
        value_out = Dense(1, kernel_regularizer=l2(mc.l2_reg), activation="tanh", name="value_out")(x)

        self.model = Model(in_x, [policy_out, value_out], name="chess_model")

    def _build_residual_block(self, x, index):
        mc = self.config_model
        in_x = x
        res_name = "res"+str(index)
        x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same",
                   data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg), 
                   name=res_name+"_conv1-"+str(mc.cnn_filter_size)+"-"+str(mc.cnn_filter_num))(x)
        x = BatchNormalization(axis=1, name=res_name+"_batchnorm1")(x)
        x = Activation("relu",name=res_name+"_relu1")(x)
        x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same",
                   data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg), 
                   name=res_name+"_conv2-"+str(mc.cnn_filter_size)+"-"+str(mc.cnn_filter_num))(x)
        x = BatchNormalization(axis=1, name="res"+str(index)+"_batchnorm2")(x)
        x = Add(name=res_name+"_add")([in_x, x])
        x = Activation("relu", name=res_name+"_relu2")(x)
        return x

    def visualize_model(self):
        """
        Print out model summary (contains layer names, shape of input, 
        number of parameters, and connection to)
        """
        self.model.summary()
        
    
        
    
                
    def MCTS(self):
        """
        Using 30 workers (max_workers=self.play_config.search_threads)
        self.play_config.simulation_num_per_move = 800
        """
        futures = []
        with ThreadPoolExecutor(max_workers = 30) as executor:
            for _ in range(800):
                futures.append(executor.submit(self.select_move,board=self.board.copy(),is_root_node=True))

#         vals = [f.result() for f in futures]
        
    def select_move(self, board, is_root_node=False):
        with self.node_lock[board]:
            if board not in self.game_tree:
                policy, value = self.forward_pass(board)
                self.game_tree[board]['policy'] = policy
                self.game_tree[board]['action'] = defaultdict(NodeStatistics)
                self.game_tree[board]['total_visits'] = 0
                return value
            action = self.best_q_move(board)
            board.push(action)
            
        # Simulate enemy_move
        enemy_value = self.select_move(board)
        value = -enemy_value
            
        actions = self.game_tree[board]['action']
        with self.node_lock[board]:
            self.game_tree[board]['total_visits'] += 1
            actions[action].n += 1
            actions[action].w += value
            actions[action].q = actions[action].w / actions[action].n
            
        return value
        
    def best_q_move(board):
        """
        c_puct = 1.5
        """
        policy = self.game_tree[board]['policy']
        actions = self.game_tree[board]['action']
        unnormalized_prior = [policy[self.move_lookup[move]] for move in board.legal_moves]
        prior = unnormalized_prior / sum(unnormalized_prior)
        sqrt_total_visits = np.sqrt(self.game_tree[board]['total_visits'] + 1)
        
        UCT = []
        c_puct = 1.5
        for index, move in enumerate(board.legal_moves):
            UCT.append(actions[move].q + c_puct * prior[index] * sqrt_total_visits / (1 + actions[move].n))
            
            
            
        
        
        
    def forward_pass(self, board):
        input_planes = self.board_to_input(board, board.turn)
        input_pipe = self.feed_input.pop()
        input_pipe.send(input_planes)
        policy, value = input_pipe.recv()
        self.feed_input.append(input_pipe)
        return policy, value

                
    def _predict_batch_worker(self):
        """
        Thread worker which listens on each pipe in self.pipes for an observation, and then outputs
        the predictions for the policy and value networks when the observations come in. Repeats.
        
        ## CITE
        """
        while True:
            ready = connection.wait(self.return_policy_value,timeout=0.001)
            if not ready:
                continue
            data, result_pipes = [], []
            for pipe in ready:
                while pipe.poll():
                    data.append(pipe.recv())
                    result_pipes.append(pipe)

            data = np.asarray(data, dtype=np.float32)
            policy_array, value_array = self.model.predict_on_batch(data)
            for pipe, policy, value in zip(result_pipes, policy_array, value_array):
                pipe.send((policy, float(value)))
        
    def board_to_input(self, board, my_color = None):
        """
        FIX YOUR COLOR PROBLEM: ASSUME THAT THE NEURAL NETWORK RECEIVES THE INPUT FROM WHITE'S PERSPECTIVE
    
        Input: 18 planes of size (8,8) representing the entire board
        Boolean values: first 6 planes represent my pawn, knight, bishop, rook, queen, king
        Next 6 planes represent opponent's pieces (in the same order)
        Next 4 planes represent my king queen castling and opponents king queen castling
        Next plane represents half move clock (50 move without pawn advance or piece capture is a draw)
        Next plane represents the en passant square (if available)
        """
        if my_color == None:
            my_color = self.my_color
        pieces_planes = np.zeros(shape=(12, 8, 8), dtype=np.float32)
        board_colors = [not my_color, my_color]
        en_passant = np.zeros((8, 8), dtype=np.float32)
        
#         print (board_colors)
        if my_color == 0:
            for my_board, color in enumerate(board_colors):
                for piece in range(1, 7):
                    my_piece_position = board.pieces(piece, color)
                    rank, file = np.array([[(int(i / 8)) for i in list(my_piece_position) ], 
                                     [(7-(i % 8)) for i in list(my_piece_position) ]])
                    pieces_planes[(piece - 1) + (my_board + 1) % 2 * 6, rank, file] = 1
            en_passant[int(board.ep_square / 8), 7 - (board.ep_square % 8)] = 1
        else:
            for my_board, color in enumerate(board_colors):
                for piece in range(1, 7):
                    my_piece_position = board.pieces(piece, color)
                    rank, file = np.array([[(7 - int(i / 8)) for i in list(my_piece_position) ], 
                                     [(i % 8) for i in list(my_piece_position) ]])
                    pieces_planes[(piece - 1) + (my_board + 1) % 2 * 6, rank, file] = 1
            en_passant[7 - int(board.ep_square / 8), (board.ep_square % 8)] = 1
        
        
        auxiliary_planes = np.array([np.full((8, 8), board.has_kingside_castling_rights(my_color), dtype=np.float32),
                        np.full((8, 8), board.has_queenside_castling_rights(my_color), dtype=np.float32),
                        np.full((8, 8), board.has_kingside_castling_rights(not self.my_color), dtype=np.float32),
                        np.full((8, 8), board.has_queenside_castling_rights(not my_color), dtype=np.float32),
                        np.full((8, 8), board.halfmove_clock, dtype=np.float32),
                        en_passant])
                
        return (np.vstack((pieces_planes, auxiliary_planes)))
    

In [283]:
v = RLPlayer()
v.board = chess.Board(fen="rnb1k2r/pp4pp/3bpq2/3p2B1/1PpP1pP1/2P2N2/P4PBP/RN1QK2R b KQkq b3 0 10")
v.my_color = 1
tic = time.time()
# v.MCTS()
# print(v.board.piece_map())
x = {1: 3}
print (type(v.move_code[0]))
toc = time.time()
print(toc-tic)

<class 'chess.Move'>
0.0016341209411621094


In [223]:
v = RLPlayer()
v.board = chess.Board(fen="rnb1k2r/pp4pp/3bpq2/3p2B1/1PpP1pP1/2P2N2/P4PBP/RN1QK2R b KQkq b3 0 10")
v.my_color = 0
v.board_to_input()

[True, 0]
[[[ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  ..., 
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 1.  1.  0. ...,  0.  1.  1.]
  [ 0.  0.  0. ...,  0.  0.  0.]]

 [[ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  ..., 
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  1.  0.]]

 [[ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  ..., 
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  1.  0.  0.]]

 ..., 
 [[ 1.  1.  1. ...,  1.  1.  1.]
  [ 1.  1.  1. ...,  1.  1.  1.]
  [ 1.  1.  1. ...,  1.  1.  1.]
  ..., 
  [ 1.  1.  1. ...,  1.  1.  1.]
  [ 1.  1.  1. ...,  1.  1.  1.]
  [ 1.  1.  1. ...,  1.  1.  1.]]

 [[ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  ..., 
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0. 

In [123]:
tic = time.time()
game = ChessGame(AlphaBetaPlayer(depth = 1), AlphaBetaPlayer(depth = 1))
# res, msg = game.play_game(display = 0)
toc = time.time()
print (toc-tic)

fen = game.board.fen()
print(fen)
foo = fen.split(' ')
rows = foo[0].split('/')
print (rows)
def swapcase(a):
        if a.isalpha():
            return a.lower() if a.isupper() else a.upper()
        return a
def swapall(aa):
    return "".join([swapcase(a) for a in aa])
print ("/".join([swapall(row) for row in reversed(rows)]) \
    + " " + ('w' if foo[1] == 'b' else 'b') \
    + " " + "".join(sorted(swapall(foo[2]))) \
    + " " + foo[3] + " " + foo[4] + " " + foo[5])

def replace_tags_board(board_san):
    board_san = board_san.split(" ")[0]
    board_san = board_san.replace("2", "11")
    board_san = board_san.replace("3", "111")
    board_san = board_san.replace("4", "1111")
    board_san = board_san.replace("5", "11111")
    board_san = board_san.replace("6", "111111")
    board_san = board_san.replace("7", "1111111")
    board_san = board_san.replace("8", "11111111")
    return board_san.replace("/", "")

tic = time.time()
my_piece_position = game.board.pieces(1, 1)
# int(i / 8), i % 8
print([(int(i / 8)) for i in list(my_piece_position) ], [(i % 8) for i in list(my_piece_position) ])
# print(list(zip(*[(int(i / 8), i % 8) for i in list(my_piece_position)])))
pieces_planes = np.zeros(shape=(12, 8, 8), dtype=np.float32)
for color in range(2):
    for piece in range(1, 7):
        
        my_piece_position = game.board.pieces(piece, color)
        rank, file = np.array([[(int(i / 8)) for i in list(my_piece_position) ], 
                         [(i % 8) for i in list(my_piece_position) ]])
        
        
        pieces_planes[(piece - 1) + ((color + 1) % 2) * 6, rank, file] = 1
#         print ((pieces_planes[(piece - 1) + ((color + 1) % 2) * 6, rank, file]))
print (pieces_planes)
toc = time.time()
print (toc-tic)
print (res, msg, game.board.fullmove_number)


7.987022399902344e-05
rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1
['rnbqkbnr', 'pppppppp', '8', '8', '8', '8', 'PPPPPPPP', 'RNBQKBNR']
rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR b KQkq - 0 1
[1, 1, 1, 1, 1, 1, 1, 1] [0, 1, 2, 3, 4, 5, 6, 7]
[[[ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 1.  1.  1.  1.  1.  1.  1.  1.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]]

 [[ 0.  1.  0.  0.  0.  0.  1.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]]

 [[ 0.  0.  1.  0.  0.  1.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0

In [65]:
rl = RLPlayer()
rl.build_model()
rl.visualize_model()
rl.predict()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_14 (InputLayer)           (None, 18, 8, 8)     0                                            
__________________________________________________________________________________________________
input_conv-5-256 (Conv2D)       (None, 256, 8, 8)    115200      input_14[0][0]                   
__________________________________________________________________________________________________
input_batchnorm (BatchNormaliza (None, 256, 8, 8)    1024        input_conv-5-256[0][0]           
__________________________________________________________________________________________________
input_relu (Activation)         (None, 256, 8, 8)    0           input_batchnorm[0][0]            
__________________________________________________________________________________________________
res1_conv1

In [276]:
def create_uci_labels():
    """
    Creates the labels for the universal chess interface into an array and returns them
    :return:
    """
    labels_array = []
    letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
    numbers = ['1', '2', '3', '4', '5', '6', '7', '8']
    promoted_to = ['q', 'r', 'b', 'n']

    for l1 in range(8):
        for n1 in range(8):
            destinations = [(t, n1) for t in range(8)] + \
                           [(l1, t) for t in range(8)] + \
                           [(l1 + t, n1 + t) for t in range(-7, 8)] + \
                           [(l1 + t, n1 - t) for t in range(-7, 8)] + \
                           [(l1 + a, n1 + b) for (a, b) in
                            [(-2, -1), (-1, -2), (-2, 1), (1, -2), (2, -1), (-1, 2), (2, 1), (1, 2)]]
            for (l2, n2) in destinations:
                if (l1, n1) != (l2, n2) and l2 in range(8) and n2 in range(8):
                    move = letters[l1] + numbers[n1] + letters[l2] + numbers[n2]
                    labels_array.append(move)
    for l1 in range(8):
        l = letters[l1]
        for p in promoted_to:
            labels_array.append(l + '2' + l + '1' + p)
            labels_array.append(l + '7' + l + '8' + p)
            if l1 > 0:
                l_l = letters[l1 - 1]
                labels_array.append(l + '2' + l_l + '1' + p)
                labels_array.append(l + '7' + l_l + '8' + p)
            if l1 < 7:
                l_r = letters[l1 + 1]
                labels_array.append(l + '2' + l_r + '1' + p)
                labels_array.append(l + '7' + l_r + '8' + p)
    return labels_array

def flipped_uci_labels():
    """
    Seems to somehow transform the labels used for describing the universal chess interface format, putting
    them into a returned list.
    :return:
    """
    def repl(x):
        return "".join([(str(9 - int(a)) if a.isdigit() else a) for a in x])

    return [repl(x) for x in create_uci_labels()]

class Config:
    """
    Config describing how to run the application
    Attributes (best guess so far):
        :ivar list(str) labels: labels to use for representing the game using UCI
        :ivar int n_lables: number of labels
        :ivar list(str) flipped_labels: some transformation of the labels
        :ivar int unflipped_index: idk
        :ivar Options opts: options to use to configure this config
        :ivar ResourceConfig resources: resources used by this config.
        :ivar ModelConfig mode: config for the model to use
        :ivar PlayConfig play: configuration for the playing of the game
        :ivar PlayDataConfig play_date: configuration for the saved data from playing
        :ivar TrainerConfig trainer: config for how training should go
        :ivar EvaluateConfig eval: config for how evaluation should be done
    """
    labels = create_uci_labels()
    n_labels = int(len(labels))
    flipped_labels = flipped_uci_labels()
    unflipped_index = None

#     def __init__(self, config_type="mini"):
#         """
#         :param str config_type: one of "mini", "normal", or "distributed", representing the set of
#             configs to use for all of the config attributes. Mini is a small version, normal is the
#             larger version, and distributed is a version which runs across multiple GPUs it seems
#         """
#         self.opts = Options()
#         self.resource = ResourceConfig()

#         if config_type == "mini":
#             import chess_zero.configs.mini as c
#         elif config_type == "normal":
#             import chess_zero.configs.normal as c
#         elif config_type == "distributed":
#             import chess_zero.configs.distributed as c
#         else:
#             raise RuntimeError(f"unknown config_type: {config_type}")
#         self.model = c.ModelConfig()
#         self.play = c.PlayConfig()
#         self.play_data = c.PlayDataConfig()
#         self.trainer = c.TrainerConfig()
#         self.eval = c.EvaluateConfig()
#         self.labels = Config.labels
#         self.n_labels = Config.n_labels
#         self.flipped_labels = Config.flipped_labels

#     @staticmethod
#     def flip_policy(pol):
#         """
#         :param pol policy to flip:
#         :return: the policy, flipped (for switching between black and white it seems)
#         """
#         return np.asarray([pol[ind] for ind in Config.unflipped_index])


Config.unflipped_index = [Config.labels.index(x) for x in Config.flipped_labels]
import chess
print (create_uci_labels())
print ({i: chess.Move.from_uci(move) for move, i in zip(create_uci_labels(), range(len(create_uci_labels())))})

['a1b1', 'a1c1', 'a1d1', 'a1e1', 'a1f1', 'a1g1', 'a1h1', 'a1a2', 'a1a3', 'a1a4', 'a1a5', 'a1a6', 'a1a7', 'a1a8', 'a1b2', 'a1c3', 'a1d4', 'a1e5', 'a1f6', 'a1g7', 'a1h8', 'a1c2', 'a1b3', 'a2b2', 'a2c2', 'a2d2', 'a2e2', 'a2f2', 'a2g2', 'a2h2', 'a2a1', 'a2a3', 'a2a4', 'a2a5', 'a2a6', 'a2a7', 'a2a8', 'a2b3', 'a2c4', 'a2d5', 'a2e6', 'a2f7', 'a2g8', 'a2b1', 'a2c1', 'a2c3', 'a2b4', 'a3b3', 'a3c3', 'a3d3', 'a3e3', 'a3f3', 'a3g3', 'a3h3', 'a3a1', 'a3a2', 'a3a4', 'a3a5', 'a3a6', 'a3a7', 'a3a8', 'a3b4', 'a3c5', 'a3d6', 'a3e7', 'a3f8', 'a3b2', 'a3c1', 'a3b1', 'a3c2', 'a3c4', 'a3b5', 'a4b4', 'a4c4', 'a4d4', 'a4e4', 'a4f4', 'a4g4', 'a4h4', 'a4a1', 'a4a2', 'a4a3', 'a4a5', 'a4a6', 'a4a7', 'a4a8', 'a4b5', 'a4c6', 'a4d7', 'a4e8', 'a4b3', 'a4c2', 'a4d1', 'a4b2', 'a4c3', 'a4c5', 'a4b6', 'a5b5', 'a5c5', 'a5d5', 'a5e5', 'a5f5', 'a5g5', 'a5h5', 'a5a1', 'a5a2', 'a5a3', 'a5a4', 'a5a6', 'a5a7', 'a5a8', 'a5b6', 'a5c7', 'a5d8', 'a5b4', 'a5c3', 'a5d2', 'a5e1', 'a5b3', 'a5c4', 'a5c6', 'a5b7', 'a6b6', 'a6c6', 'a6d6',

In [10]:
!export PATH="/opt/local/bin:/opt/local/sbin:/Users/alng/anaconda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/TeX/texbin:/opt/local/bin:/Applications/Visual Studio Code.app/Contents/Resources/app/bin/usr/local/lib/python3.6/site-packages"

In [15]:
!printenv


path_merged=/Users/alng/Documents/Datascience/TREK/data
VIRTUALENVWRAPPER_PROJECT_FILENAME=.project
TERM_PROGRAM=Apple_Terminal
VIRTUALENVWRAPPER_SCRIPT=/usr/local/bin/virtualenvwrapper.sh
SHELL=/bin/bash
TERM=xterm-color
CLICOLOR=1
TMPDIR=/var/folders/2c/xyn8g_s90939jkf_j_4c8ywc0000gn/T/
Apple_PubSub_Socket_Render=/private/tmp/com.apple.launchd.S4y57F2wAD/Render
TERM_PROGRAM_VERSION=361.1
TERM_SESSION_ID=7E3CC992-5F71-4DA8-9525-4E0151E571FC
USER=alng
SSH_AUTH_SOCK=/private/tmp/com.apple.launchd.BGSHftPR31/Listeners
__CF_USER_TEXT_ENCODING=0x1F5:0x0:0x0
JPY_PARENT_PID=43717
PAGER=cat
WORKON_HOME=/Users/alng/.virtualenvs
VIRTUALENVWRAPPER_PYTHON=/usr/local/bin/python3
PATH=/opt/local/bin:/opt/local/sbin:/Users/alng/anaconda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/TeX/texbin:/opt/local/bin:/Applications/Visual Studio Code.app/Contents/Resources/app/bin
VIRTUALENVWRAPPER_HOOK_DIR=/Users/alng/.virtualenvs
_=/usr/bin/printenv
PWD=/Users/al

In [16]:
!export PATH="/opt/local/bin:/opt/local/sbin:/Users/alng/anaconda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/TeX/texbin:/opt/local/bin:/Applications/Visual Studio Code.app/Contents/Resources/app/bin:/usr/local/lib/python3.6"

In [17]:
!echo $PATH

/opt/local/bin:/opt/local/sbin:/Users/alng/anaconda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/TeX/texbin:/opt/local/bin:/Applications/Visual Studio Code.app/Contents/Resources/app/bin


In [21]:
python --version

NameError: name 'python' is not defined

In [22]:
import sys; sys.path

['',
 '/Users/alng/anaconda/envs/py36/lib/python36.zip',
 '/Users/alng/anaconda/envs/py36/lib/python3.6',
 '/Users/alng/anaconda/envs/py36/lib/python3.6/lib-dynload',
 '/Users/alng/anaconda/envs/py36/lib/python3.6/site-packages',
 '/Users/alng/anaconda/envs/py36/lib/python3.6/site-packages/IPython/extensions',
 '/Users/alng/.ipython']