In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/train-a-strong-stockfish-wdl-model/sample_submission.csv
/kaggle/input/train-a-strong-stockfish-wdl-model/shuffled_fens.csv
/kaggle/input/train-a-strong-stockfish-wdl-model/test.csv


In [2]:
df = pd.read_csv('/kaggle/input/train-a-strong-stockfish-wdl-model/shuffled_fens.csv')
df.columns

Index(['FEN', 'WDL'], dtype='object')

In [3]:
df.sample(10)
df.head()
df['WDL'].unique()

array([1. , 0. , 0.5])

In [4]:
!pip install python-chess

Collecting python-chess
  Downloading python_chess-1.999-py3-none-any.whl.metadata (776 bytes)
Collecting chess<2,>=1 (from python-chess)
  Downloading chess-1.11.2.tar.gz (6.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m63.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading python_chess-1.999-py3-none-any.whl (1.4 kB)
Building wheels for collected packages: chess
  Building wheel for chess (setup.py) ... [?25l[?25hdone
  Created wheel for chess: filename=chess-1.11.2-py3-none-any.whl size=147775 sha256=27535768f5477d578e2e403382c975d796974e716f049e4ee91adfd9e863a00a
  Stored in directory: /root/.cache/pip/wheels/fb/5d/5c/59a62d8a695285e59ec9c1f66add6f8a9ac4152499a2be0113
Successfully built chess
Installing collected packages: chess, python-chess
Successfully installed chess-1.11.2 python-chess-1.999


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import chess
import chess.engine
import chess.pgn

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error
from joblib import Parallel, delayed

In [6]:
print(chess.Board('1r6/R3Pk2/6p1/2KB3p/5P2/8/8/8 b - - 5 50'))

. r . . . . . .
R . . . P k . .
. . . . . . p .
. . K B . . . p
. . . . . P . .
. . . . . . . .
. . . . . . . .
. . . . . . . .


In [7]:
def parse_fen(fen):
    board = chess.Board(fen)  # Extract the full FEN
    rows = board.board_fen().split("/")                               
    features = {
        'FEN' : board.board_fen(),
        'num_white_pawns': len(board.pieces(chess.PAWN, chess.WHITE)),
        'num_black_pawns': len(board.pieces(chess.PAWN, chess.BLACK)),
        'num_white_knights': len(board.pieces(chess.KNIGHT, chess.WHITE)),
        'num_black_knights': len(board.pieces(chess.KNIGHT, chess.BLACK)),
        'num_white_bishops': len(board.pieces(chess.BISHOP, chess.WHITE)),
        'num_black_bishops': len(board.pieces(chess.BISHOP, chess.BLACK)),
        'num_white_rooks': len(board.pieces(chess.ROOK, chess.WHITE)),
        'num_black_rooks': len(board.pieces(chess.ROOK, chess.BLACK)),
        'num_white_queens': len(board.pieces(chess.QUEEN, chess.WHITE)),
        'num_black_queens': len(board.pieces(chess.QUEEN, chess.BLACK)),
        #Castling options
        'can_white_cstl_ks' : int(board.has_kingside_castling_rights(chess.WHITE)),
        'can_white_cstl_qs' : int(board.has_queenside_castling_rights(chess.WHITE)),
        'can_black_cstl_ks' : int(board.has_kingside_castling_rights(chess.BLACK)),
        'can_black_cstl_qs' : int(board.has_queenside_castling_rights(chess.BLACK)),
        # Total pieces for each color
        'num_white_pieces': sum(len(board.pieces(piece_type, chess.WHITE)) for piece_type in chess.PIECE_TYPES),
        'num_black_pieces': sum(len(board.pieces(piece_type, chess.BLACK)) for piece_type in chess.PIECE_TYPES),
        # Board status. Each digit represents a status issue. 0 means no issues
        'status' : board.status(),
        # Number of BW turns
        'round' : board.fullmove_number,
        # Next turn. 1-white, 0-black
        'is_white_to_move': int(board.turn),
        "row1" : rows[0],
        "row2" : rows[1],
        "row3" : rows[2],
        "row4" : rows[3],
        "row5" : rows[4],
        "row6" : rows[5],
        "row7" : rows[6],
        "row8" : rows[7],
        
    }
    return features

In [8]:
# Parallelize FEN parsing
def parse_fen_parallel(fen_list, n_jobs=-1):
    return Parallel(n_jobs=n_jobs)(delayed(parse_fen)(fen) for fen in fen_list)

# Apply parallel parsing
# Subset shuffle file
# fen_list = df[0:1000000]['FEN'].tolist()
fen_list = df[0:100000]['FEN']
fen_features = parse_fen_parallel(fen_list)
features_df = pd.DataFrame(fen_features)

features_df.head()

Unnamed: 0,FEN,num_white_pawns,num_black_pawns,num_white_knights,num_black_knights,num_white_bishops,num_black_bishops,num_white_rooks,num_black_rooks,num_white_queens,...,round,is_white_to_move,row1,row2,row3,row4,row5,row6,row7,row8
0,r1b4r/1p3kb1/p2pp1p1/3q3p/3N1Pp1/2P3R1/PP1Q1BP...,6,7,1,0,1,2,2,2,1,...,23,1,r1b4r,1p3kb1,p2pp1p1,3q3p,3N1Pp1,2P3R1,PP1Q1BPP,4R1K1
1,r1bqkb1r/pp3ppp/2np1n2/8/2BNP3/8/PP3PPP/RNBQK2R,6,6,2,2,2,2,2,2,1,...,4,1,r1bqkb1r,pp3ppp,2np1n2,8,2BNP3,8,PP3PPP,RNBQK2R
2,8/R7/5k2/2pqp3/6n1/8/8/1K6,0,2,0,1,0,0,1,0,0,...,50,0,8,R7,5k2,2pqp3,6n1,8,8,1K6
3,1r6/R3Pk2/6p1/2KB3p/5P2/8/8/8,2,2,0,0,1,0,1,1,0,...,50,0,1r6,R3Pk2,6p1,2KB3p,5P2,8,8,8
4,2RB2bk/p2n3p/4Np2/8/2B3P1/1P3p1P/1P3P1K/2br4,5,4,1,1,2,2,1,1,0,...,35,1,2RB2bk,p2n3p,4Np2,8,2B3P1,1P3p1P,1P3P1K,2br4


In [9]:
#better to not select by index in case df changes.  Fix if time.
X = features_df.iloc[:, 1:19]
# y = df[0:100000]['WDL'].astype(int)

In [10]:
# map WDL values to categories
y = pd.DataFrame(df[0:100000]['WDL'])

target_encoding = {1:1, 0:0, .5:2}
y['target'] = y['WDL'].map(target_encoding)

y = pd.DataFrame(y['target'])

print(y.sample(10))

       target
86195       1
35867       1
60519       0
56484       1
42832       1
95300       2
83968       1
46933       2
90014       1
21715       0


In [12]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size= 0.20, random_state=42)

clf = MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=500, alpha=0.0001,
                     solver='sgd', verbose=10,  random_state=21,tol=0.000000001)

clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

  y = column_or_1d(y, warn=True)


Iteration 1, loss = 0.97166185
Iteration 2, loss = 0.92094040
Iteration 3, loss = 0.90642231
Iteration 4, loss = 0.89304108
Iteration 5, loss = 0.88897182
Iteration 6, loss = 0.88216278
Iteration 7, loss = 0.87691416
Iteration 8, loss = 0.87308160
Iteration 9, loss = 0.86786501
Iteration 10, loss = 0.86439778
Iteration 11, loss = 0.85985916
Iteration 12, loss = 0.85736701
Iteration 13, loss = 0.85670199
Iteration 14, loss = 0.85481775
Iteration 15, loss = 0.85127888
Iteration 16, loss = 0.85450441
Iteration 17, loss = 0.84971423
Iteration 18, loss = 0.85010298
Iteration 19, loss = 0.84587945
Iteration 20, loss = 0.84553303
Iteration 21, loss = 0.84411184
Iteration 22, loss = 0.84188226
Iteration 23, loss = 0.84065795
Iteration 24, loss = 0.84259747
Iteration 25, loss = 0.84160993
Iteration 26, loss = 0.83889681
Iteration 27, loss = 0.83948943
Iteration 28, loss = 0.83825262
Iteration 29, loss = 0.83615965
Iteration 30, loss = 0.83825291
Iteration 31, loss = 0.83768887
Iteration 32, los