# Bonus : construct dataset for the website 

## 1. Library import 

In [1]:
import pandas as pd 

from pathlib import Path
import sys 

# Root of the project 
PROJECT_ROOT = Path.cwd().parent
sys.path.append(str(PROJECT_ROOT))


from src.utils.config import (
    FIGURES_DIR,
    METRICS_DIR,
    MODELS_DIR
)

from src.data_loading.data_sample_pred import predict_on_sample
from src.utils.io import load_model
from src.utils.boards import download_boards


## 2. Data import 

We first import the dataset we will use to show our results in the website. It contains the moves played by Magnus and his opponents in 5 different games. These games are not included in the training dataset.

In [2]:

# Import the dataframe we will use. 
df_samp = pd.read_csv(METRICS_DIR / "data_sample_magnus.csv")
df_samp

Unnamed: 0,ID_game,ID_move,PlayerName,PlayerSide,PlayerElo,PlayerTitle,MagnusElo,OpponentElo,OpponentTitle,OpponentName,...,FEN_after,OpponentTimeSpend,TimeSpend,time_spend_before,TimeRatio,TimePressure,MoveTimeFraction,IsCastling,IsPromotion,IsEnPassant
0,2291,1,"Carlsen, Magnus",1,3298.0,4,3298,2683.0,2,"Dowgird, Filip",...,rnbqkbnr/pppppppp/8/8/8/1P6/P1PPPPPP/RNBQKBNR ...,0.0,1.0,0.0,0.994475,0,0.005525,0,0,0
1,2291,2,"Dowgird, Filip",0,2683.0,2,3298,2683.0,2,"Dowgird, Filip",...,rnbqkbnr/ppp1pppp/8/3p4/8/1P6/P1PPPPPP/RNBQKBN...,1.0,3.0,0.0,0.994475,0,0.016575,0,0,0
2,2291,3,"Carlsen, Magnus",1,3298.0,4,3298,2683.0,2,"Dowgird, Filip",...,rnbqkbnr/ppp1pppp/8/3p4/8/1P6/PBPPPPPP/RN1QKBN...,3.0,2.0,1.0,1.005587,0,0.011050,0,0,0
3,2291,4,"Dowgird, Filip",0,2683.0,2,3298,2683.0,2,"Dowgird, Filip",...,rnbqkb1r/ppp1pppp/5n2/3p4/8/1P6/PBPPPPPP/RN1QK...,2.0,1.0,3.0,0.988889,0,0.005587,0,0,0
4,2291,5,"Carlsen, Magnus",1,3298.0,4,3298,2683.0,2,"Dowgird, Filip",...,rnbqkb1r/ppp1pppp/5n2/3p4/8/1P3N2/PBPPPPPP/RN1...,1.0,2.0,2.0,1.000000,0,0.011111,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
347,4688,68,"Carlsen, Magnus",0,3221.0,0,3221,3258.0,0,"Nakamura, Hikaru",...,3r4/1p4kp/p5p1/4Np2/2P1p3/1PN5/4K3/8 w - - 2 35,1.0,14.0,1.0,9.400000,0,0.291667,0,0,0
348,4688,69,"Nakamura, Hikaru",1,3258.0,0,3221,3258.0,0,"Nakamura, Hikaru",...,3r4/1p4kp/p5p1/3NNp2/2P1p3/1P6/4K3/8 b - - 3 35,14.0,0.0,1.0,0.114286,1,0.000000,0,0,0
349,4688,70,"Carlsen, Magnus",0,3221.0,0,3221,3258.0,0,"Nakamura, Hikaru",...,3r4/1p4kp/p7/3NNpp1/2P1p3/1P6/4K3/8 w - - 0 36,0.0,5.0,14.0,5.666667,0,0.142857,0,0,0
350,4688,71,"Nakamura, Hikaru",1,3258.0,0,3221,3258.0,0,"Nakamura, Hikaru",...,3r4/1p4kp/p7/3NNpp1/2P1p3/1P2K3/8/8 b - - 1 36,5.0,4.0,0.0,0.161290,1,0.666667,0,0,0


In [3]:
# Load the model to test if it works
model_timespent = load_model(MODELS_DIR, "random_forest_time_spent.pkl")

Model loaded successfully


## 3. Create exemple dataset 

### 3.0 Functions 

In [4]:
TRAIN_FEATURES = ['ID_move', 
                 'PlayerSide', 
                 'PlayerTimeLeft', 
                 'OpponentTimeLeft', 
                 'Phase', 'Increment', 
                 'IsCapture', 
                 'IsCheck', 
                 'OppIsCaptured', 
                 'OppIsCheck', 
                 'NumLegalMoves', 
                 'OpponentTimeSpend', 
                 "TimeTotal", 
                 "Phase",
                "TimeRatio",
                "TimePressure",


                    "IsCastling",
                    "IsPromotion",
                    "IsEnPassant"] 

FINAL_FEATURES = ["ID_game", "ID_move", "PlayerName", "PlayerSide","FEN", "TimeSpend", "TimeSpendPred"]

We create a function to build the dataset we will use to show our results. This function associate to every move Magnus play in my sample dataframe the Time spend predict by my model. It associate a value 'NaN' to the moves played by the opponents. 

### 3.1 Predict the time spend in our data sample 

In [5]:
# Use the function to associate every move to a prediction 
df_samp_pred = predict_on_sample(df_samp, model_timespent, TRAIN_FEATURES, FINAL_FEATURES)

# Print the result 
df_samp_pred.head(25)

Unnamed: 0,ID_game,ID_move,PlayerName,PlayerSide,FEN,TimeSpend,TimeSpendPred
0,2291,1,"Carlsen, Magnus",1,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,1.0,2.894973
1,2291,2,"Dowgird, Filip",0,rnbqkbnr/pppppppp/8/8/8/1P6/P1PPPPPP/RNBQKBNR ...,3.0,
2,2291,3,"Carlsen, Magnus",1,rnbqkbnr/ppp1pppp/8/3p4/8/1P6/P1PPPPPP/RNBQKBN...,2.0,0.993453
3,2291,4,"Dowgird, Filip",0,rnbqkbnr/ppp1pppp/8/3p4/8/1P6/PBPPPPPP/RN1QKBN...,1.0,
4,2291,5,"Carlsen, Magnus",1,rnbqkb1r/ppp1pppp/5n2/3p4/8/1P6/PBPPPPPP/RN1QK...,2.0,1.262474
5,2291,6,"Dowgird, Filip",0,rnbqkb1r/ppp1pppp/5n2/3p4/8/1P3N2/PBPPPPPP/RN1...,1.0,
6,2291,7,"Carlsen, Magnus",1,rn1qkb1r/ppp1pppp/5n2/3p1b2/8/1P3N2/PBPPPPPP/R...,1.0,1.072982
7,2291,8,"Dowgird, Filip",0,rn1qkb1r/ppp1pppp/5n2/3p1b2/7N/1P6/PBPPPPPP/RN...,3.0,
8,2291,9,"Carlsen, Magnus",1,rn1qkb1r/ppp1pppp/5nb1/3p4/7N/1P6/PBPPPPPP/RN1...,6.0,4.618
9,2291,10,"Dowgird, Filip",0,rn1qkb1r/ppp1pppp/5nb1/3p4/7N/1P4P1/PBPPPP1P/R...,2.0,


### 3.2 Create boards images

In [6]:
# Use the function to create all the boards by using the FEN and download it in the results. We will use it for the website. 
download_boards(df_samp_pred, FIGURES_DIR / "boards")

Old board images successfully deleted
New board images successfully generated
