In [50]:
import pandas as pd
import requests
import re
from pgn_parser import pgn, parser

In [116]:
class PGN_Parse():
    def __init__(self, raw_pgn):
        moves = raw_pgn.split('\n')[-2]
        moves = re.sub('\{\[[\%\s\1-9a-zA-Z\:]*\]\}','',moves)
        moves = re.sub('\.\s', '.', moves)
        moves = re.sub('\s\s', ' ', moves)
        moves = moves.split(' ')
        white_moves=[]
        black_moves=[]
        winner = 'white'
        for move in moves:
            if '-' in move: #this is the score
                if move == '1-0': winner = 'white'
                elif move == '0-1': winner = 'black'
                else: winner = 'draw'
                continue
            if '...' in move: #black reply
                black_moves.append(move.split('.')[-1])
            else:
                white_moves.append(move.split('.')[-1])
        self.black_moves = black_moves
        self.white_moves = white_moves
        self.winner = winner


def get_mat(char):
    if char=='k':
        return 0
    elif char=='q':
        return 9
    elif char=='r':
        return 5
    elif char=='b' or char=='n':
        return 3
    elif char=='p':
        return 1
    else:
        print('WARNING: Unknown piece type. Cannot calculate material worth', char)


def calculate_final_mat(raw_fen):
    fen = game['fen'].split(' ')[0]
    fen = re.sub('[0-9\/]', '',fen)
    black_mat=0
    white_mat=0
    for char in fen:
        if char.isupper(): #is white
            white_mat += get_mat(char.lower())
        else:
            black_mat += get_mat(char.lower())
    return white_mat, black_mat

def make_request(url):
    resp = requests.get(url)
    if resp.status_code != 200:
        raise ApiError('GET /tasks/ {}'.format(resp.status_code))
    return resp

In [180]:
username = 'its_totally_kyle'
stats_ls = []
archives = make_request(f'https://api.chess.com/pub/player/{username}/games/archives').json()['archives']
for archive in archives[::-1]: # go in reverse order. That way we can do early stopping if necessary for memory
    game_month = archive.split('/')[-1]
    game_year = archive.split('/')[-2]
    print(game_month, game_year)
    games = make_request(archive).json()['games']
    for game in games:
        if len(stats_ls) > 10000:
            break
        usercolor = 'white' if game['white']['username'] == username else 'black'
        result = game['white']['result'] + '/' + game['black']['result']
        opponent = game['white']['username'] if usercolor == 'black' else game['black']['username']
        gamepgn = PGN_Parse(game['pgn'])
        white_mat, black_mat = calculate_final_mat(game['fen'])
        stats_ls.append({'username': username, 'user_color': usercolor, 'game_month': game_month, 'game_year': game_year,
                         'white_mat_end':white_mat, 'black_mat_end':black_mat,
                         'white_first_move':gamepgn.white_moves[0], 'black_first_move':gamepgn.black_moves[0],
                         'winner':gamepgn.winner, 'opponent':opponent, 'result':result
                        })
df_stats = pd.DataFrame(stats_ls)

06 2021
05 2021
04 2021
03 2021
02 2021
01 2021
12 2020


In [181]:
df_stats

Unnamed: 0,username,user_color,game_month,game_year,white_mat_end,black_mat_end,white_first_move,black_first_move,winner,opponent,result
0,its_totally_kyle,white,06,2021,25,8,c4,Nc6,white,thengnathan,win/abandoned
1,its_totally_kyle,white,06,2021,20,26,c4,e5,white,Adria92,win/checkmated
2,its_totally_kyle,white,05,2021,19,14,e4,e5,white,Dan3762,win/resigned
3,its_totally_kyle,white,05,2021,17,31,c4,e5,black,oops_all_gambits,resigned/win
4,its_totally_kyle,black,05,2021,17,21,d4,d5,black,ToccoRoyale,resigned/win
...,...,...,...,...,...,...,...,...,...,...,...
407,its_totally_kyle,black,12,2020,19,2,e4,c5,white,gmason454,win/resigned
408,its_totally_kyle,white,12,2020,13,26,c4,c5,black,Roberto_96,timeout/win
409,its_totally_kyle,black,12,2020,0,23,e4,b5,black,Djagger7,checkmated/win
410,its_totally_kyle,white,12,2020,16,0,e4,h5,white,DiegoPro44,win/checkmated


In [183]:
df_stats['mat_diff'] = df_stats.apply(lambda x: (x['white_mat_end'] - x['black_mat_end']) * (2*(x['user_color']=='white')-1), axis=1)
df_stats['user_first_move'] = df_stats['white_first_move']*(df_stats['user_color']=='white') + df_stats['black_first_move']*(df_stats['user_color']=='black')

In [184]:
wins = df_stats[(df_stats['user_color'] == df_stats['winner']) & (df_stats['result'].str.contains('checkmated'))]
wins['mat_diff'].describe()

count    95.000000
mean      8.863158
std       8.139228
min      -8.000000
25%       2.500000
50%       8.000000
75%      15.000000
max      27.000000
Name: mat_diff, dtype: float64

In [185]:
wins[wins['mat_diff']==-8]

Unnamed: 0,username,user_color,game_month,game_year,white_mat_end,black_mat_end,white_first_move,black_first_move,winner,opponent,result,mat_diff,user_first_move
96,its_totally_kyle,white,3,2021,21,29,c4,e5,white,Google-slide,win/checkmated,-8,c4


In [186]:
wins['user_first_move'].value_counts()

d5     42
c4     34
e4      6
e5      3
c5      3
Nf6     1
f5      1
Nf3     1
Nc3     1
d4      1
b5      1
f6      1
Name: user_first_move, dtype: int64

In [189]:
losses = df_stats[(df_stats['user_color'] != df_stats['winner']) & (df_stats['result'].str.contains('checkmated'))]
losses['mat_diff'].describe()

count    73.000000
mean     -7.219178
std       7.513408
min     -23.000000
25%     -13.000000
50%      -7.000000
75%      -1.000000
max       9.000000
Name: mat_diff, dtype: float64

In [191]:
losses[losses['mat_diff'] == 9]

Unnamed: 0,username,user_color,game_month,game_year,white_mat_end,black_mat_end,white_first_move,black_first_move,winner,opponent,result,mat_diff,user_first_move
121,its_totally_kyle,black,3,2021,26,35,d4,d5,white,Immortal22u,win/checkmated,9,d5


In [192]:
losses['user_first_move'].value_counts()

c4     34
d5     27
e4      5
e5      2
d4      2
Nc6     1
f6      1
c5      1
Name: user_first_move, dtype: int64

In [177]:
game['white']['result']

'win'