In [2]:
import chess.engine
import chess 
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import chess.pgn 
from stockfish import Stockfish
import pandas as pd
import time
import numpy as np
import json
import regex as re

In [121]:
path = "../data/Landing/fics_blitz_2010.ndjson"
df = pd.read_json(path, lines=True)
df.head(2)


Unnamed: 0,Event,Site,Date,Round,White,Black,Result,Moves,Move_times,WhiteElo,BlackElo
0,FICS rated blitz game,FICS freechess.org,2010-01-31,?,dcbadcba,Nelanie,1-0,"[d2d4, h7h6, c2c4, d7d6, b1c3, c7c6, e2e4, d8a...","[0.0, 0.0, 0.844, 0.581, 1.0, 0.581, 1.063, 0....",1454,1393
1,FICS rated blitz game,FICS freechess.org,2010-01-31,?,callipygian,ZlatkoVasilkoski,0-1,"[e2e4, e7e5, d2d4, e5d4, d1d4, b8c6, d4d5, g8f...","[0.0, 0.0, 0.1, 1.6019999999999999, 0.1, 1.671...",1303,1297


In [58]:
print(len(df))

14827


Here we are sampling our data, for now 15,000 games would be too much. 

I put a bit of extra preference on higher elo player as I think low elo players would not likely be playing in a tornament. 

In [59]:
# Assuming df is your DataFrame

# Define the number of bins
num_bins = 20  # Adjust as needed to get more or fewer bins

# Create bins based on the range of WhiteElo
df['EloBin'] = pd.cut(df['WhiteElo'], bins=np.linspace(700, 2500, num_bins+1))


# Calculate the number of samples per bin
total_samples = 1000
samples_per_bin = total_samples // num_bins

# Stratified sampling
sampled_df = df.groupby('EloBin', group_keys=False).apply(
    lambda x: x.sample(n=min(len(x), samples_per_bin), random_state=42)
)

# Optional: Shuffle the sampled_df to mix the data
sampled_df = sampled_df.sample(frac=1, random_state=42).reset_index(drop=True)

  sampled_df = df.groupby('EloBin', group_keys=False).apply(


In [62]:
# Dropping empty rows
print(len(sampled_df))
drop_na = ["WhiteElo", "BlackElo", "Move_times", "Moves"]
sampled_df.dropna(subset=drop_na, inplace=True)
print(len(sampled_df))

879
879


In [None]:
# Executing the maia cpl
import maia_functions as mf
stockfish=Stockfish("../stockfish/stockfish-windows-x86-64-avx2.exe")
stockfish.set_depth(12)#How deep the AI looks

sampled_df["maia_cpl_w"] = 0
sampled_df["maia_cpl_b"] = 0

starttime = time.time()
for index, row in sampled_df.iterrows():
    
    cpls = mf.maia_cpl(row, stockfish)
    sampled_df.loc[index, "maia_cpl_w"] = cpls[0]
    sampled_df.loc[index, "maia_cpl_b"] = cpls[1]
endtime = time.time()
print(f"time taken: {endtime - starttime}")

In [69]:
# Getting rid of none rows
print(len(sampled_df))
drop_na = ["maia_cpl_b", "maia_cpl_w"]
sampled_df.dropna(subset=drop_na, inplace=True)
print(len(sampled_df))

879
877


In [3]:
# Executing the stockfish cpl
# Time taken 48 minutes
import stockfish_functions as sf

sampled_df["stock_cpl_w"] = 0
sampled_df["stock_cpl_b"] = 0

starttime = time.time()
for index, row in sampled_df.iterrows():
    
    cpls = sf.stockfish_cpl(row)
    sampled_df.loc[index, "stock_cpl_w"] = cpls[0]
    sampled_df.loc[index, "stock_cpl_b"] = cpls[1]
endtime = time.time()
print(f"time taken: {endtime - starttime}")


  sampled_df.loc[index, "stock_cpl_w"] = cpls[0]
  sampled_df.loc[index, "stock_cpl_b"] = cpls[1]


time taken: 2853.3141565322876


In [109]:
sampled_df["avg_mt_w"] = -1
sampled_df["avg_mt_b"] = -1

starttime = time.time()
for index, row in sampled_df.iterrows():

    sampled_df.loc[index, "avg_mt_w"] = np.mean([row["Move_times"][j] for j in range(6, len(row["Move_times"]), 2)])
    sampled_df.loc[index, "avg_mt_b"] = np.mean([row["Move_times"][j] for j in range(7, len(row["Move_times"]), 2)])

endtime = time.time()
print(f"time taken: {endtime - starttime}")

time taken: 0.3086581230163574


  sampled_df.loc[index, "avg_mt_w"] = np.mean([row["Move_times"][j] for j in range(6, len(row["Move_times"]), 2)])
  sampled_df.loc[index, "avg_mt_b"] = np.mean([row["Move_times"][j] for j in range(7, len(row["Move_times"]), 2)])


In [None]:
output_file = '../data/Raw/fics_maia_cpl.ndjson'

# Write DataFrame to NDJSON
with open(output_file, 'w') as file:
    for _, row in sampled_df.iterrows():
        # Convert each row to a JSON object and write to file
        row_json = row.to_json()
        file.write(row_json + '\n')

Below we look at some of the distributions of our features

In [126]:
path = '../data/Raw/fics_maia_cpl.ndjson'
sampled_df = pd.read_json(path, lines=True)

Now we repeat with the wuocc dataset

In [3]:
path = "..\data\Raw\wuocc-blitz-div-d_raw.ndjson"
wuocc_df = pd.read_json(path, lines=True)

In [4]:
print(len(wuocc_df))
wuocc_df = wuocc_df[wuocc_df['Moves'].apply(len) >= 7]
print(len(wuocc_df))

437
425


In [None]:
# Executing the maia cpl
import maia_functions as mf
stockfish=Stockfish("../stockfish/stockfish-windows-x86-64-avx2.exe")
stockfish.set_depth(12)#How deep the AI looks

wuocc_df["maia_cpl_w"] = 0
wuocc_df["maia_cpl_b"] = 0

err_count = 0

starttime = time.time()
for index, row in wuocc_df.iterrows():
    try:
        cpls = mf.maia_cpl(row, stockfish)
        wuocc_df.loc[index, "maia_cpl_w"] = cpls[0]
        wuocc_df.loc[index, "maia_cpl_b"] = cpls[1]
    except:
        print(row["Moves"])
        print(index)
        err_count += 1
        
endtime = time.time()
print(f"time taken: {endtime - starttime}")

In [8]:
# Executing the stockfish cpl
# Time taken 48 minutes
import stockfish_functions as sf

wuocc_df["stock_cpl_w"] = 0
wuocc_df["stock_cpl_b"] = 0

starttime = time.time()
for index, row in wuocc_df.iterrows():
    
    cpls = sf.stockfish_cpl(row)
    wuocc_df.loc[index, "stock_cpl_w"] = cpls[0]
    wuocc_df.loc[index, "stock_cpl_b"] = cpls[1]
endtime = time.time()
print(f"time taken: {endtime - starttime}")


  wuocc_df.loc[index, "stock_cpl_w"] = cpls[0]
  wuocc_df.loc[index, "stock_cpl_b"] = cpls[1]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


time taken: 1469.4982805252075


In [20]:
output_file = '../data/Raw/wuocc_blitz_raw.ndjson'

# Write DataFrame to NDJSON
with open(output_file, 'w') as file:
    for _, row in wuocc_df.iterrows():
        # Convert each row to a JSON object and write to file
        row_json = row.to_json()
        file.write(row_json + '\n')

In [3]:
path = "..\data\Raw\wuocc_blitz_raw.ndjson"
wuocc_df = pd.read_json(path, lines=True)

path = '../data/Raw/fics_maia_cpl.ndjson'
sampled_df = pd.read_json(path, lines=True)

In [5]:
sampled_df.columns

Index(['Event', 'Site', 'Date', 'Round', 'White', 'Black', 'Result', 'Moves',
       'Move_times', 'WhiteElo', 'BlackElo', 'EloBin', 'maia_cpl_w',
       'maia_cpl_b', 'stock_cpl_w', 'stock_cpl_b', 'avg_mt_w', 'avg_mt_b'],
      dtype='object')

In [9]:
print(wuocc_df.columns)
print(sampled_df.columns)

Index(['Event', 'Site', 'Date', 'Round', 'White', 'Black', 'Result', 'Moves',
       'WhiteElo', 'BlackElo', 'maia_cpl_w', 'maia_cpl_b', 'stock_cpl_w',
       'stock_cpl_b', 'avg_mt_w', 'avg_mt_b'],
      dtype='object')
Index(['Event', 'Site', 'Date', 'Round', 'White', 'Black', 'Result', 'Moves',
       'Move_times', 'WhiteElo', 'BlackElo', 'EloBin', 'maia_cpl_w',
       'maia_cpl_b', 'stock_cpl_w', 'stock_cpl_b', 'avg_mt_w', 'avg_mt_b'],
      dtype='object')


In [4]:
curated = []
for index, row in sampled_df.iterrows():
    curated.append({
        "name": row["White"],
        "maia_cpl": row["maia_cpl_w"],
        "stockfish_cpl": row["stock_cpl_w"],
        "elo": row["WhiteElo"],
        "moves": len(row["Moves"])
    })

    curated.append({
        "name": row["Black"],
        "maia_cpl": row["maia_cpl_b"],
        "stockfish_cpl": row["stock_cpl_b"],
        "elo": row["BlackElo"],
        "moves": len(row["Moves"])
    })

In [38]:
out = df = pd.DataFrame(curated)
df.to_csv("../data/Curated/baseline_training.csv", index=False)

In [40]:
curated = []
for index, row in wuocc_df.iterrows():
    curated.append({
        "name": row["White"],
        "maia_cpl": row["maia_cpl_w"],
        "stockfish_cpl": row["stock_cpl_w"],
        "elo": row["WhiteElo"],
        "moves": len(row["Moves"])
    })

    curated.append({
        "name": row["Black"],
        "maia_cpl": row["maia_cpl_b"],
        "stockfish_cpl": row["stock_cpl_b"],
        "elo": row["BlackElo"],
        "moves": len(row["Moves"])
    })

In [42]:
out = df = pd.DataFrame(curated)
df.to_csv("../data/Curated/wuocc_blitz_div-d.csv", index=False)

## Potential new features


https://www.pawnalyze.com/elocator




