In [2]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

Kaggle Data set
https://www.kaggle.com/datasets/ronakbadhe/chess-evaluations

In [9]:
df = pd.read_csv('Data/chessData.csv')
df = df[:len(df)//2].copy()

In [11]:
print(df.head())

                                                 FEN Evaluation
0  rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...        -10
1  rnbqkbnr/pppp1ppp/4p3/8/4P3/8/PPPP1PPP/RNBQKBN...        +56
2  rnbqkbnr/pppp1ppp/4p3/8/3PP3/8/PPP2PPP/RNBQKBN...         -9
3  rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPP2PPP/RNBQKB...        +52
4  rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPPN1PPP/R1BQK...        -26


In [10]:
def clean_evals(entry:str):
    if entry[0] == '#':
        return int(entry[1:])
    else:
        return int(entry)



In [12]:
df['Evaluation'] = df['Evaluation'].apply(clean_evals)

In [13]:
def convert_to_index(pos: str) -> int:
    if pos == "-":
        return 0
    file_map = {value: key for key, value in enumerate("abcdefgh")}
    col = file_map[pos[0]]
    row = int(pos[1]) - 1
    # print(f"col: {col}, row: {row}")
    return 1 << (row * 8) + col


In [14]:

def convert_fen(fen: str):
    piece_map = {value: 0 for key, value in enumerate("KQRBNPkqrbnp")}
    # print(f"Piece_Map: {piece_map}")
    ranks = fen.split("/")
    # print(f"ranks: {ranks}")
    curr_rank = 8
    while curr_rank > 0:
        r = ranks[8-curr_rank]
        file_index = 0
        # print(f"rank: {curr_rank-1} | {r}")
        for i in range(len(r)):
            c = r[i]
            if file_index >= 8:
                states = r[i:].split(" ")[1:]
                # print(f"States: {states}")
                piece_map["moves"] = 0 if states[0] == 'w' else 1
                piece_map["castle"] = [0 if c == "-" else 1 for c in states[1]] # KQkq
                piece_map["en_passant"] = convert_to_index(states[2])
                piece_map["half_clock"] = int(states[3])
                piece_map["full_clock"] = int(states[4])
                return piece_map
            else:
                if c.isdigit():
                    file_index += int(c)
                else:
                    # print(f"c: {c}")
                    piece_map[c] += (1 << ((curr_rank * 8) + file_index))
                    file_index += 1
        curr_rank -= 1
    return piece_map
                


In [15]:
df['Longs'] = df['FEN'].apply(convert_fen)
df = pd.concat([df, pd.json_normalize(df['Longs'])], axis=1)
expanded_cols = pd.DataFrame(df['castle'].tolist(), index=df.index)
expanded_cols.columns = [s for s in ["WhiteKing", "WhiteQueen", "BlackKing", "BlackQueen"]]
df = pd.concat([df.drop(columns=['castle']), expanded_cols], axis=1)
df = df.drop(['FEN', 'Longs'], axis=1)
print(df.head())

   Evaluation     K     Q      R     B       N             P  \
0         -10  4096  2048  33024  9216   16896   68735139840   
1          56  4096  2048  33024  9216   16896   68735139840   
2          -9  4096  2048  33024  9216   16896  103094353920   
3          52  4096  2048  33024  9216   16896  103094353920   
4         -26  4096  2048  33024  9216  540672  103094353920   

                       k                      q                       r  ...  \
0  295147905179352825856  147573952589676412928  2379629985508532158464  ...   
1  295147905179352825856  147573952589676412928  2379629985508532158464  ...   
2  295147905179352825856  147573952589676412928  2379629985508532158464  ...   
3  295147905179352825856  147573952589676412928  2379629985508532158464  ...   
4  295147905179352825856  147573952589676412928  2379629985508532158464  ...   

                        n                     p  moves  en_passant  \
0  1217485108864830406656  18374686479671623680      1          

In [34]:
print(df.columns)

df['WhiteQueen'] = df['WhiteQueen'].astype(int)
df['BlackKing'] = df['BlackKing'].astype(int)
df["BlackQueen"] = df['BlackQueen'].astype(int)

Index(['Evaluation', 'K', 'Q', 'R', 'B', 'N', 'P', 'k', 'q', 'r', 'b', 'n',
       'p', 'moves', 'en_passant', 'half_clock', 'full_clock', 'WhiteKing',
       'WhiteQueen', 'BlackKing', 'BlackQueen'],
      dtype='object')


IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

In [24]:
X = df[['K', 'Q', 'R', 'B', 'N', 'P', 
      'k', 'q', 'r', 'b', 'n','p', 
      'moves', 'en_passant', 'half_clock', 'full_clock', 
      'WhiteKing', 'WhiteQueen', 'BlackKing', 'BlackQueen']]
y = df['Evaluation']
nfeats = len(X.columns)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
class Chess_Dataset(Dataset):
    def __init__(self, X_int, X_bin, y):
        self.positions = torch.tensor(X_int, dtype=torch.int64)
        self.bins = torch.tensor(X_bin, dtype=torch.int8)
        self.y = torch.tensor(y.values, dtype=torch.int32).unsqueeze(1)
        self.features = torch.cat((self.positions, self.bins), dim=1)
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.features[idx], self.y[idx]

In [19]:
BATCH_SIZE = 16
CKPT_DIR = "CKPT"
LR = 0.001
EPOCHS = 10

In [27]:
print(X_train)

                    K             Q           R                      B  \
2709583          1024        262144       34816                1048576   
1818301         16384  274877906944   268437504          2199023255552   
870192            256        524288       12288               16777216   
5987642         16384  274877906944       10240  590295810358705651712   
5187323           512             0        6144           137438953472   
...               ...           ...         ...                    ...   
4926484  274877906944             0        1024                      0   
4304572         16384          2048        4608           137438953472   
1692743         16384        262144        8448           137440002048   
6423388     268435456             0  8589934592          2199023255552   
6413414         16384          2048        9216              269484032   

                      N                P                       k  \
2709583     34359738880    8801468153856  1

In [26]:
train_dataset = Chess_Dataset(
    X_int=X_train[['K', 'Q', 'R', 'B', 'N', 'P', 'k', 'q', 'r', 'b', 'n','p','en_passant']], 
    X_bin=X_train[['moves', 'half_clock', 'full_clock','WhiteKing', 'WhiteQueen', 'BlackKing', 'BlackQueen']], 
    y=y_train)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\austi\CS\Austin_chess\myenv\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\austi\CS\Austin_chess\myenv\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "c:\Users\austi\CS\Austin_chess\myenv\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start()
  File "c:

ValueError: could not determine the shape of object type 'DataFrame'