In [1]:
import pandas as pd

In [12]:
fide = pd.read_json('data/curated/FIDE_MAIA_PREDS.ndjson', lines=True)

In [13]:
fide.columns

Index(['event', 'round', 'white', 'black', 'result', 'moves', 'white_elo',
       'black_elo', 'ECO', 'Opening', 'white_cpl', 'black_cpl',
       'stockfish_eval', 'queen_moved_at', 'queen_changed_at', 'total_checks',
       'first_check_at', 'total_moves', 'end_r', 'end_k', 'end_p', 'end_Q',
       'end_b', 'end_P', 'end_R', 'end_K', 'end_n', 'end_q', 'end_B', 'end_N',
       'promotion', 'can_claim_draw', 'insufficient_material', 'maia_cpl_w',
       'maia_cpl_b', 'mean_elos', 'diff_elos', 'white_mean', 'white_std',
       'white_min', 'white_max', 'black_mean', 'black_std', 'black_min',
       'black_max', 'stockfish_mean', 'stockfish_std', 'stockfish_min',
       'stockfish_max', 'maia_w_mean', 'maia_w_std', 'maia_w_min',
       'maia_w_max', 'maia_b_mean', 'maia_b_std', 'maia_b_min', 'maia_b_max',
       'pred_diff', 'pred_mean', 'white_pred', 'black_pred'],
      dtype='object')

In [14]:
# Flattening the dataframe
games = []
for ind, row in fide.iterrows():
    white = {
        "name": row["white"],
        "event": row["event"],
        "opening": row["Opening"],
        "elo": row["white_elo"],
        "stockfish_mean": row["white_mean"],
        "stockfish_min": row["white_min"],
        "stockfish_max": row["white_max"],
        "stockfish_std": row["white_std"],
        "maia_mean": row["maia_w_mean"],
        "maia_min": row["maia_w_min"],
        "maia_max": row["maia_w_max"],
        "maia_std": row["maia_w_std"],
        "result": row["result"],
        "is_white": 1,
        'moves': row["moves"],
        'pred': row["white_pred"]
    }

    black = {
        "name": row["black"],
        "event": row["event"],
        "opening": row["Opening"],
        "elo": row["black_elo"],
        "stockfish_mean": row["black_mean"],
        "stockfish_min": row["black_min"],
        "stockfish_max": row["black_max"],
        "stockfish_std": row["black_std"],
        "maia_mean": row["maia_b_mean"],
        "maia_min": row["maia_b_min"],
        "maia_max": row["maia_b_max"],
        "maia_std": row["maia_b_std"],
        "result": row["result"],
        "is_white": 0,
        'moves': row["moves"],
        'pred': row["black_pred"]
    }

    games.append(white)
    games.append(black)

In [15]:
df = pd.DataFrame(games)

In [16]:
df.columns

Index(['name', 'event', 'opening', 'elo', 'stockfish_mean', 'stockfish_min',
       'stockfish_max', 'stockfish_std', 'maia_mean', 'maia_min', 'maia_max',
       'maia_std', 'result', 'is_white', 'moves', 'pred'],
      dtype='object')

In [21]:
# Assuming your DataFrame is called df
# Group by 'name' and 'event', then calculate the mean and std of 'stockfish_mean'
agg_df = df.groupby(['name', 'event']).agg(
    pred_avg = ('pred', 'mean'),
    pred_std = ('pred', 'std'),
    pred_min = ('pred', 'min'),
    pred_max = ('pred', 'max'),
    stockfish_mean_avg=('stockfish_mean', 'mean'),
    stockfish_mean_std=('stockfish_mean', 'std')
).reset_index()

# Display the resulting DataFrame
agg_df.head()

Unnamed: 0,name,event,pred_avg,pred_std,pred_min,pred_max,stockfish_mean_avg,stockfish_mean_std
0,"-, Shweta",Queen’s Chess Festival - Asia - Category D,1391.931294,72.419104,1292.834201,1544.952115,50.538535,17.783643
1,"., Kulvinder",WUOCC - Rapid Div C,1611.658029,144.109774,1396.156374,1792.85095,20.494096,17.379366
2,"A J, Athira",World Youth Cup 2021 O16-G12 - Girls 12,1539.521425,120.780218,1393.137722,1798.749348,29.273065,11.825482
3,"A R, Ilamparthi",FIDE World Youth Rapid Champ - Asia - U12 - Open,1522.815336,85.371958,1365.864051,1630.714765,24.292355,6.853954
4,"A, Anjana",Queens Festival 2022 Asia - Category D,1447.378033,115.050678,1290.701067,1680.503796,42.465708,20.371892


In [6]:
df.head(2)

Unnamed: 0,event,round,white,black,result,moves,white_elo,black_elo,ECO,Opening,...,maia_w_min,maia_w_max,maia_b_mean,maia_b_std,maia_b_min,maia_b_max,pred_diff,pred_mean,white_pred,black_pred
0,FIDE Online Olympiad for people with disabilities,1.1,"Tran, Thi Bich Thuy","Zhukovskaya, Nina",1-0,"[d2d4, g8f6, c1g5, d7d6, g5f6, g7f6, e2e3, e7e...",1422.0,1475.0,A45,"Trompovsky attack (Ruth, Opovcensky opening)",...,-117,104,-12.542857,185.118239,-634,624,57.89489,1461.106721,1490.054166,1432.159276
1,FIDE Online Olympiad for people with disabilities,1.1.1,"Molenda, Marcin","Alam, Md. Khorshed",1-0,"[d2d4, d7d5, c2c4, c7c6, g1f3, g8f6, d1b3, d8b...",2349.0,2065.0,D11,QGD Slav,...,-165,171,27.210526,104.022052,-89,451,86.833219,1463.971579,1507.388188,1420.554969


In [None]:
temp = df[df["white"]]

In [7]:
cheaters = pd.read_csv('data/landing/Cheating.csv')

In [8]:
cheaters.head()

Unnamed: 0,Player name,Confidence level,Rating,Event name,File name,Special feature,Comments
0,"Alkortabi,, Abdalhakeem A.",80,1648,FIDE World Youth Rapid Champ - Africa - U18,fide-world-youth-rapid-champ-africa-u18,,
1,"Abhishek, Bhargav",80,1177,WUOCC - Blitz Div D,wuocc-blitz-div-d,,various exit interviews https://studio.youtube...
2,"Saurabh, Sharma",80,1381,WUOCC - Blitz Div D,wuocc-blitz-div-d,,
3,"Atharvaa, P Tayade",75,1926,WUOCC - Blitz Div B,wuocc-blitz-div-b,,
4,"Nandwal, Ankit",75,1340,WUOCC - Blitz Div B,wuocc-blitz-div-b,,
