In [1]:
%%capture capt
!pip install matplotlib
!pip install pandas
!pip install seaborn
!pip install scikit-learn
!pip install chess
!pip install tensorflow_addons

In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from collections import Counter
import time
import seaborn as sns
import math

import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score

import chess

# Load data

In [3]:
%run S3.ipynb

In [4]:
#moves_df = pd.read_csv("../Data/moves_df.csv")
moves_df = open_csv("moves_df.csv")

In [5]:
moves_df

Unnamed: 0,game_index,moves,evaluation,fen,zobrist_key
0,0,e2e4,35,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,9384546495678726550
1,0,e7e5,48,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...,595762792459712928
2,0,g1f3,111,rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQK...,15213300192948443293
3,0,b8c6,47,r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNB...,8704797333742910878
4,0,f1b5,52,r1bqkbnr/pppp1ppp/2n5/1B2p3/4P3/5N2/PPPP1PPP/R...,5409798013178080797
...,...,...,...,...,...
9023518,118318,a8c8,-6,2rq1rk1/pp2bppp/2n1pn2/3p4/8/P1NP1BP1/1P1BPP1P...,13935396515866781493
9023519,118318,f3g2,-2,2rq1rk1/pp2bppp/2n1pn2/3p4/8/P1NP2P1/1P1BPPBP/...,18028698229637126573
9023520,118318,a7a6,12,2rq1rk1/1p2bppp/p1n1pn2/3p4/8/P1NP2P1/1P1BPPBP...,2937820813377462641
9023521,118318,a1c1,25,2rq1rk1/1p2bppp/p1n1pn2/3p4/8/P1NP2P1/1P1BPPBP...,11978245410268853311


# Create board representations

### Functions

In [6]:
def ohe_piece_index(piece):
    piece = piece.upper()
    if piece == "P":
        return 0
    if piece == "N":
        return 1
    if piece == "B":
        return 2
    if piece == "R":
        return 3
    if piece == "Q":
        return 4
    if piece == "K":
        return 5

def ohe_piece(piece):
    #lowercase = black pieces
    #uppercase = white pieces
    ohe = [0, 0, 0, 0, 0, 0]
    index = ohe_piece_index(piece)
    if piece == piece.lower():
        #is a black piece
        ohe[index] = -1
        return ohe
    ohe[index] = 1
    return ohe

In [7]:
def get_coord(square):
    row = 7 - square // 8
    col = square % 8
    return row, col

def fen_to_matrix(fen):
    matrix = np.zeros((8, 8, 6), dtype=np.uint8)
    pieces = chess.Board(fen).piece_map()
    for square in pieces.keys():
        row, col = get_coord(square)
        piece = pieces[square]
        piece_symbol = piece.symbol()
        matrix[row, col] = ohe_piece(piece_symbol)
    return matrix

### Multiprocessing

In [8]:
import multiprocessing
from multiprocessing import Pool

In [9]:
nb_cpu_cores = multiprocessing.cpu_count()

In [10]:
## we'll divide the fens into the nb of cpu cores we have, to use parallelize the creation of board representations

def get_starting_indices(len_data, divide_into):
    linspace = np.linspace(0, len_data, divide_into)
    indices = [math.floor(x) for x in linspace]
    return indices    

def get_lengths(len_data, indices):
    if len(indices) <= 1:
        return [len_data]
    lengths = [indices[1]]
    for i in range(2, len(indices)):
        lengths.append(indices[i]-indices[i-1])
    lengths.append(len_data-indices[-1])
    return lengths

In [11]:
def get_matrix_representations__subpart(all_fens, starting_index, length):
    fens = all_fens[starting_index:starting_index+length]
    X = np.zeros((len(fens), 8, 8, 6), dtype = np.uint8)
    for i, fen in enumerate(fens):
        X[i] = fen_to_matrix(fen)
    return X

def get_matrix_representations(df, total_size, divide_into=nb_cpu_cores):
    X = np.zeros((total_size, 8, 8, 6), dtype = np.uint8)
    fens = list(df.iloc[:total_size].fen)
    
    starting_indices = get_starting_indices(total_size, divide_into)
    lengths = get_lengths(total_size, starting_indices)

    with Pool() as pool:
        async_results = [pool.apply_async(get_matrix_representations__subpart,
                                        args = (fens.copy(), starting_indices[i], lengths[i])) for i in range(len(lengths))]

        for i in range(len(async_results)):
            start_index, length = starting_indices[i], lengths[i]
            #sub_X = async_results[i].get()
            X[start_index:start_index+length] = async_results[i].get()
    return X

In [12]:
total_size = 4_000_000
fens = list(moves_df.fen)[:total_size]

In [13]:
if __name__ == "__main__": 
    X = get_matrix_representations(moves_df, total_size, divide_into=nb_cpu_cores)

### Split into train/val/test sets

In [14]:
y = np.array(moves_df.iloc[:total_size].evaluation)

In [15]:
def train_val_test(dataX, dataY, train_ratio=0.75, validation_ratio=0.15, test_ratio=0.10):
    if train_ratio+test_ratio+validation_ratio != 1:
        print("Ratios do not add up to 1")
    else:
        x_train, x_test, y_train, y_test = train_test_split(dataX, dataY, test_size=1 - train_ratio)
        x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=test_ratio/(test_ratio + validation_ratio)) 

        return x_train, x_val, x_test, y_train, y_val, y_test

In [16]:
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test(X, y)

In [17]:
print(f"Training set shape : X_train -> {X_train.shape}, y_train -> {y_train.shape}")
print(f"Validation set shape : X_val -> {X_val.shape}, y_val -> {y_val.shape}")
print(f"Test set shape : X_test -> {X_test.shape}, y_train -> {y_test.shape}")

Training set shape : X_train -> (3000000, 8, 8, 6), y_train -> (3000000,)
Validation set shape : X_val -> (600000, 8, 8, 6), y_val -> (600000,)
Test set shape : X_test -> (400000, 8, 8, 6), y_train -> (400000,)


# Try Deep Learning

In [18]:
import tensorflow as tf
from keras.callbacks import EarlyStopping
from tensorflow.keras import datasets, layers, models
from tensorflow_addons.metrics import RSquare
from tensorflow.keras import regularizers

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #remove warnings caused by tensorflow

print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("GPU is", "available" if tf.config.list_physical_devices("GPU") else "NOT AVAILABLE")

2023-06-29 17:35:00.825439: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Version:  2.12.0
Eager mode:  True
GPU is available


In [19]:
model = models.Sequential()

model.add(layers.Conv2D(1024, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(8, 8, 6)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(512, (3, 3), activation='relu'))

model.add(layers.Flatten())

model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.2))

model.add(layers.Dense(2048, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(1024, activation='relu'))

model.add(layers.BatchNormalization())
model.add(layers.Dense(512, activation='relu'))

model.add(layers.Dense(1))

2023-06-29 17:36:25.993148: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13545 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:04:00.0, compute capability: 7.5


In [20]:
#model.build(X_train_us.shape)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 6, 6, 1024)        56320     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 3, 3, 1024)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 1, 1, 512)         4719104   
                                                                 
 flatten (Flatten)           (None, 512)               0         
                                                                 
 batch_normalization (BatchN  (None, 512)              2048      
 ormalization)                                                   
                                                                 
 dropout (Dropout)           (None, 512)               0

In [21]:
initial_learning_rate = 0.001
decay_steps = 1000
decay_rate = 0.9

# Define the learning rate schedule
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps, decay_rate)

model.compile(loss=tf.keras.losses.MeanSquaredError(),
          optimizer=tf.keras.optimizers.Adam(
              learning_rate=lr_schedule
              #learning_rate = 1e-4
          ),
          metrics=[RSquare()])

In [22]:
es = EarlyStopping(monitor='val_loss', patience=8, verbose=1)

callbacks = [es]

In [None]:
start=time.time()

history = model.fit(X_train, y_train, batch_size = 256, epochs=100, 
                    validation_data=(X_val, y_val), 
                    verbose=1, 
                    #callbacks=callbacks
                   )

stop=time.time()

Epoch 1/100


2023-06-29 17:36:38.980648: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8900
2023-06-29 17:36:40.297639: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x7f93a8013480 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-06-29 17:36:40.297678: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2023-06-29 17:36:40.305305: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-06-29 17:36:40.517072: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/100
Epoch 3/100

In [None]:
print(f"Took {round(stop-start, 2)}s to run. ")

In [None]:
def plot_graphs(history, metric):
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric], '')
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend([metric, 'val_'+metric])
    
def plot_history(history):
    plt.figure(figsize=(10, 7))
    plt.subplot(1, 2, 1)
    plot_graphs(history, 'loss')
    #plt.ylim(None, 1)
    plt.subplot(1, 2, 2)
    plot_graphs(history, 'r_square')
    #plt.ylim(0, None)
    plt.show()

In [None]:
plot_history(history)

### Keras Tuner

Keras Tuner is the same idea as GridSearch, but in this case it is to try different NN designs to determine which one would be the best. 

In [32]:
import keras_tuner
from tensorflow.keras import layers
from tensorflow import keras

In [33]:
nb_classes = len(y_test_us_dl[0])

In [34]:
def build_model(hp):
    model = keras.Sequential()
    # Tune the number of layers.
    for i in range(hp.Int("num_layers", 1, 3)):
        model.add(
            layers.Dense(
                # Tune number of units separately.
                units=hp.Int(f"units_{i}", min_value=32, max_value=512, step=32),
                activation=hp.Choice("activation", ["elu", "selu", "relu"]),
            )
        )
        
    if hp.Boolean("dropout"):
        dropout_rate = hp.Float("dropout_rate", min_value=0.2, max_value=0.4, step = 0.1)
        model.add(layers.Dropout(rate=dropout_rate))
        
    if hp.Boolean("batchNormalization"):
        model.add(layers.BatchNormalization())
        
    if hp.Boolean("LSTM"):
        model.add(layers.Reshape((-1, 1)))
        lstm_dropout = hp.Float("dropout_rate", min_value=0.2, max_value=0.4, step = 0.1)
        model.add(layers.Bidirectional(tf.keras.layers.LSTM(64, dropout=lstm_dropout)))

    for i in range(hp.Int("num_layers", 0, 2)):
        model.add(
            layers.Dense(
                # Tune number of units separately.
                units=hp.Int(f"units_{i}", min_value=32, max_value=512, step=32),
                activation=hp.Choice("activation", ["elu", "selu"]),
            )
        )          
                  
    model.add(layers.Dense(nb_classes, activation="softmax"))


    learning_rate = hp.Float("lr", min_value=1e-5, max_value=1e-2, sampling="log")
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

In [44]:
tuner = keras_tuner.RandomSearch(
    hypermodel=build_model,
    objective="val_accuracy",
    #max_trials=5,
    executions_per_trial=3,
    overwrite=True,
    directory="prediction_models",
    project_name="tuner_search_chess",
)

In [45]:
tuner.search_space_summary()

Search space summary
Default search space size: 7
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': None}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
activation (Choice)
{'default': 'elu', 'conditions': [], 'values': ['elu', 'selu', 'relu'], 'ordered': False}
dropout (Boolean)
{'default': False, 'conditions': []}
batchNormalization (Boolean)
{'default': False, 'conditions': []}
LSTM (Boolean)
{'default': False, 'conditions': []}
lr (Float)
{'default': 1e-05, 'conditions': [], 'min_value': 1e-05, 'max_value': 0.01, 'step': None, 'sampling': 'log'}


In [46]:
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=8, verbose=1)
callbacks = [es]

In [None]:
tuner.search(X_train_us, y_train_us_dl, epochs=30, validation_data=(X_test_us, y_test_us_dl), callbacks=callbacks)

Trial 7 Complete [00h 02m 49s]
val_accuracy: 0.20367111265659332

Best val_accuracy So Far: 0.2072751671075821
Total elapsed time: 00h 49m 24s

Search: Running Trial #8

Value             |Best Value So Far |Hyperparameter
1                 |1                 |num_layers
480               |96                |units_0
selu              |relu              |activation
True              |True              |dropout
False             |True              |batchNormalization
True              |False             |LSTM
5.3902e-05        |7.56e-05          |lr
0.3               |0.4               |dropout_rate
512               |None              |units_1
384               |None              |units_2

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 10: early stopping
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
 27/498 [>.............................] - 

We can see that the best accuracy that was found via the keras tuner search was 20%. We have 5 classes, so basically the neural network does not learn any pattern within the data, it just guesses every class for each move, and gets a 1/5 chance everytime to guess the right class. </br>
There is not enough information contained in the zobrist key to represent well enough a chess position for the neural network. From our research, we would need to work with matrices that represent a chess board and its pieces, and use convolutional neural networks to train our AI to understand what a goor or bad move is. 