In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

import io
import pickle
import os
import glob

import chess.pgn
import chess.polyglot

from sklearn.model_selection import train_test_split

## Import the database with moves and evaluations

In [2]:
moves_df = pd.read_csv("../Data/moves_df.csv", dtype={"fen": str, 'zobrist_key': str})
moves_df.fillna('', inplace=True)

In [3]:
moves_df

Unnamed: 0,game_index,moves,evaluation,fen,zobrist_key
0,0,e2e4,35,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,9384546495678726550
1,0,e7e5,48,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...,595762792459712928
2,0,g1f3,111,rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQK...,15213300192948443293
3,0,b8c6,47,r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNB...,8704797333742910878
4,0,f1b5,52,r1bqkbnr/pppp1ppp/2n5/1B2p3/4P3/5N2/PPPP1PPP/R...,5409798013178080797
...,...,...,...,...,...
9023518,118318,a8c8,-6,2rq1rk1/pp2bppp/2n1pn2/3p4/8/P1NP1BP1/1P1BPP1P...,13935396515866781493
9023519,118318,f3g2,-2,2rq1rk1/pp2bppp/2n1pn2/3p4/8/P1NP2P1/1P1BPPBP/...,18028698229637126573
9023520,118318,a7a6,12,2rq1rk1/1p2bppp/p1n1pn2/3p4/8/P1NP2P1/1P1BPPBP...,2937820813377462641
9023521,118318,a1c1,25,2rq1rk1/1p2bppp/p1n1pn2/3p4/8/P1NP2P1/1P1BPPBP...,11978245410268853311


## Create the difference of evaluation for each move

In [4]:
def getDiffPerGame(game_index, index):
    diff = [moves_df.iloc[index].evaluation]
    index+=1
    while index < len(moves_df) and moves_df.iloc[index].game_index == game_index:
        diff_i = moves_df.iloc[index].evaluation - moves_df.iloc[index-1].evaluation
        diff.append(diff_i)
        index += 1
    return diff, index

In [34]:
nb_total_games = moves_df.iloc[-1].game_index+1

In [7]:
differences = []
index = 0
for game_index in tqdm(range(nb_total_games)):
    diff_game, index = getDiffPerGame(game_index, index)
    differences += diff_game

100%|██████████████████████████████████████████████████████████████████████████| 118318/118318 [41:23<00:00, 47.63it/s]


In [8]:
moves_df['eval_difference'] = pd.Series(differences)

In [58]:
q0, q1, q2 = np.quantile(moves_df.eval_difference, [0.25, 0.5, 0.75])

In [59]:
q0, q1, q2

(-27.0, 1.0, 29.0)

In [64]:
move_class = []
for i in tqdm(range(len(moves_df))):
    diff = moves_df.iloc[i].eval_difference
    if diff <= q0:
        move_class.append(0)
    elif diff <= q1:
        move_class.append(1)
    elif diff <= q2:
        move_class.append(2)
    else:
        move_class.append(3)

100%|█████████████████████████████████████████████████████████████████████| 9023523/9023523 [14:14<00:00, 10560.83it/s]


In [65]:
moves_df["move_class"] = pd.Series(move_class)

In [66]:
moves_df

Unnamed: 0,game_index,moves,evaluation,fen,zobrist_key,eval_difference,move_class
0,0,e2e4,35,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,9384546495678726550,35.0,3
1,0,e7e5,48,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...,595762792459712928,13.0,2
2,0,g1f3,111,rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQK...,15213300192948443293,63.0,3
3,0,b8c6,47,r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNB...,8704797333742910878,-64.0,0
4,0,f1b5,52,r1bqkbnr/pppp1ppp/2n5/1B2p3/4P3/5N2/PPPP1PPP/R...,5409798013178080797,5.0,2
...,...,...,...,...,...,...,...
9023518,118318,a8c8,-6,2rq1rk1/pp2bppp/2n1pn2/3p4/8/P1NP1BP1/1P1BPP1P...,13935396515866781493,-6.0,1
9023519,118318,f3g2,-2,2rq1rk1/pp2bppp/2n1pn2/3p4/8/P1NP2P1/1P1BPPBP/...,18028698229637126573,4.0,2
9023520,118318,a7a6,12,2rq1rk1/1p2bppp/p1n1pn2/3p4/8/P1NP2P1/1P1BPPBP...,2937820813377462641,14.0,2
9023521,118318,a1c1,25,2rq1rk1/1p2bppp/p1n1pn2/3p4/8/P1NP2P1/1P1BPPBP...,11978245410268853311,13.0,2


In [67]:
moves_df.to_csv("../Data/full_moves_df.csv", index=False)

In [5]:
moves_df = pd.read_csv("../Data/full_moves_df.csv")

## Create Sets

In [11]:
X = []
y = []
for i in tqdm(range(len(moves_df))):
    move = hash(moves_df.iloc[i].moves)
    fen = hash(moves_df.iloc[i].fen)
    zobrist = moves_df.iloc[i].zobrist_key
    eval_ = moves_df.iloc[i]['evaluation']
    
    move_class = moves_df.iloc[i].move_class
    
    X.append([move, fen, zobrist, eval_])
    y.append(move_class)

100%|████████████████████████████████████████████████████████████████████| 9023523/9023523 [1:35:15<00:00, 1578.71it/s]


In [12]:
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

## Random Forests

In [13]:
from sklearn.ensemble import RandomForestClassifier

In [18]:
clf = RandomForestClassifier(n_estimators=200, n_jobs=-1)
clf.fit(X_train, y_train)

MemoryError: Unable to allocate 27.5 MiB for an array with shape (7218818, 1) and data type int32

In [None]:
prediction = clf.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
acc = accuracy_score(prediction, y_test)
print(f"Accuracy = {round(100*acc, 2)}%")

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
cf_matrix = confusion_matrix(y_test, prediction)

In [None]:
fig = plt.figure(figsize=(15,10))
ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues', fmt=".2f")

ax.set_title('Matrice de Confusion avec labels \n');
ax.set_xlabel('\nValeurs Prédites')
ax.set_ylabel('Valeurs Réelles');
#for t in ax.texts: t.set_text(t.get_text() + " %")
## Ticket labels - List must be in alphabetical order
#ax.xaxis.set_ticklabels(['-', '0', '+' ])
#ax.yaxis.set_ticklabels(['-', '0', '+'])

## Display the visualization of the Confusion Matrix.
plt.show()