In [44]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.linear_model import ElasticNetCV
import numpy as np
from sklearn.metrics import r2_score
import warnings
import math
# Suppress FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [57]:
df = pd.read_json('data/curated/FIDE_MAIA_shifted.ndjson', lines = True)
# df = pd.read_json('data/curated/FIDE_ALL.ndjson', lines = True)
# df = pd.read_json("data/curated/FICS_MAIA.ndjson", lines = True)
#df = df.fillna(0)

In [58]:
print(len(df))
df = df[df['white'] != "?"]
df = df[df['black'] != "?"]
print(len(df))
df = df[df['stockfish_eval'].apply(lambda x: len(x) > 7)]
print(len(df))
df = df[df['white_cpl'].apply(lambda x: isinstance(x, (list, str)) and len(x) > 0)]
df = df[df['black_cpl'].apply(lambda x: isinstance(x, (list, str)) and len(x) > 0)]
df = df[df['stockfish_eval'].apply(lambda x: isinstance(x, (list, str)) and len(x) > 0)]
print(len(df))
df = df[df['maia_cpl_w'].apply(lambda x: isinstance(x, (list, str)) and len(x) > 0)]
df = df[df['maia_cpl_b'].apply(lambda x: isinstance(x, (list, str)) and len(x) > 0)]
print(len(df))
df = df[df['result'] != "*"]
print(len(df))

df['white_elo'] = pd.to_numeric(df['white_elo'], errors='coerce')
df['black_elo'] = pd.to_numeric(df['black_elo'], errors='coerce')


df["ECO"] = df["ECO"].astype('category')
df["Opening"] = df["Opening"].astype('category')
df["result"] = df["result"].astype('category')

df["mean_elos"] = df[['white_elo', 'black_elo']].mean(axis=1)
df["diff_elos"] = df['white_elo'] - df['black_elo']

df['white_mean'] = df['white_cpl'].apply(np.mean)
df['white_std'] = df['white_cpl'].apply(np.std)
df['white_min'] = df['white_cpl'].apply(np.min)
df['white_max'] = df['white_cpl'].apply(np.max)

df['black_mean'] = df['black_cpl'].apply(np.mean)
df['black_std'] = df['black_cpl'].apply(np.std)
df['black_min'] = df['black_cpl'].apply(np.min)
df['black_max'] = df['black_cpl'].apply(np.max)

df['stockfish_mean'] = df['stockfish_eval'].apply(np.mean)
df['stockfish_std'] = df['stockfish_eval'].apply(np.std)
df['stockfish_min'] = df['stockfish_eval'].apply(np.min)
df['stockfish_max'] = df['stockfish_eval'].apply(np.max)

df['maia_w_mean'] = df['maia_cpl_w'].apply(np.mean)
df['maia_w_std'] = df['maia_cpl_w'].apply(np.std)
df['maia_w_min'] = df['maia_cpl_w'].apply(np.min)
df['maia_w_max'] = df['maia_cpl_w'].apply(np.max)

df['maia_b_mean'] = df['maia_cpl_b'].apply(np.mean)
df['maia_b_std'] = df['maia_cpl_b'].apply(np.std)
df['maia_b_min'] = df['maia_cpl_b'].apply(np.min)
df['maia_b_max'] = df['maia_cpl_b'].apply(np.max)

dep_var = "mean_elos" # 19955

40030
32506
28935
28934
28933
28914


In [59]:
# df.to_json('data/curated/FIDE_MAIA_curated.ndjson', orient='records', lines=True)
# df.read_json('data/curated/FIDE_MAIA_curated.ndjson', lines = True)

In [60]:
cont = ['queen_moved_at', 'total_checks', 'first_check_at', 'queen_changed_at', 'total_moves', 'end_r',
 'end_p', 'end_b', 'end_n', 'end_q', 'end_N', 'end_P', 'end_Q', 'end_B', 'end_R', 'promotion', 'can_claim_draw',
 'white_mean', 'white_std', 'white_min', 'white_max', 'black_mean', 'black_std', 'black_min', 'black_max', 'stockfish_mean',
 'stockfish_std', 'stockfish_min', 'stockfish_max', 'maia_w_mean',
 'maia_w_std', 'maia_w_min', 'maia_w_max', 'maia_b_mean',
 'maia_b_std', 'maia_b_min', 'maia_b_max']

In [61]:
import joblib
results = pd.get_dummies(df["result"])

X = pd.concat([df[cont], results], axis = 1)
X = X.fillna(0)

y1 = df["mean_elos"]
y2 = df["diff_elos"]

In [67]:
from joblib import load
rf_mean = load('rf_mean.pkl')
rf_diff = load('rf_diff.pkl')

In [68]:

to_keep_mean = ['black_mean',
 'white_mean',
 'stockfish_mean',
 'stockfish_std',
 'white_std',
 'first_check_at',
 'black_std',
 'total_moves',
 'maia_w_mean',
 'white_min',
 'black_min',
 'stockfish_min',
 'stockfish_max',
 'maia_b_mean',
 'white_max',
 'maia_w_max',
 'maia_b_max',
 'maia_w_min',
 'maia_b_std',
 'maia_b_min',
 'black_max',
 'maia_w_std',
 'queen_moved_at',
 'queen_changed_at',
 'total_checks',
 'end_P',
 'end_p']


to_keep_diff = ['0-1',
 'stockfish_mean',
 'stockfish_min',
 'stockfish_max',
 '1-0',
 'black_max',
 'white_mean',
 'black_mean',
 'white_std',
 'maia_b_mean',
 'maia_w_max',
 'black_std',
 'white_max',
 'maia_b_max',
 'stockfish_std',
 'maia_w_mean',
 'maia_b_min',
 'maia_w_std',
 'maia_b_std',
 'white_min',
 'black_min',
 'maia_w_min',
 'total_moves',
 'first_check_at',
 'queen_moved_at',
 'queen_changed_at',
 'total_checks',
 'end_p',
 'end_P']


In [70]:

pred_mean = rf_mean.predict(X[to_keep_mean])

pred_diff = rf_diff.predict(X[to_keep_diff])

In [71]:
df["pred_diff"] = rf_diff.predict(X[to_keep_diff])
df["pred_mean"] = rf_mean.predict(X[to_keep_mean])

In [72]:
df["white_pred"] = df["pred_mean"] + df["pred_diff"]/2
df["black_pred"] = df["pred_mean"] - df["pred_diff"]/2

In [74]:
# df.to_json('data/curated/FIDE_MAIA_PREDS.ndjson', orient='records', lines=True)
# df = pd.read_json('data/curated/FIDE_MAIA_PREDS.ndjson', lines=True)

In [75]:
white_actual = []
black_actual = []
for ind, row in df.iterrows():
    white_actual.append(row["white_elo"])
    black_actual.append(row["black_elo"])

all_actual = white_actual + black_actual

In [76]:
white_pred = []
black_pred = []
for ind, row in df.iterrows():
    white_pred.append(row["white_pred"])
    black_pred.append(row["black_pred"])

all_preds = white_pred + black_pred

In [77]:
import numpy as np
from sklearn.metrics import mean_absolute_error

# Convert to numpy arrays if they aren't already
all_actual = np.array(all_actual)
all_preds = np.array(all_preds)

# Create a mask to filter out NaN values
mask = ~np.isnan(all_actual) & ~np.isnan(all_preds)

# Filter the data
all_actual_clean = all_actual[mask]
all_preds_clean = all_preds[mask]

# Calculate MAE
mae = mean_absolute_error(all_actual_clean, all_preds_clean)
print(mae)

346.89167030260666


In [78]:
len(all_actual_clean)

46277