# Chess analytics project
## Description du project
Voir : https://www.notion.so/aldbs/Dashboard-sur-mes-stats-Chess-com-b9b4b950d0e24d0c8e3a1655e0429d22

Documentation technique : https://pypi.org/project/chess.com/, https://chesscom.readthedocs.io/en/latest/#api-reference

Chess module python : https://python-chess.readthedocs.io/en/latest/pgn.html

## Etapes
1. Intégrer toutes les données d'un joueur
2. Calcul de KPI (Nombre de match jouer sur la période, nombre de victoires, analyse des variantes jouées)


In [None]:
pip install chess.com

In [None]:
!pip install nest_asyncio
import nest_asyncio
nest_asyncio.apply()

In [None]:
import chessdotcom as chess
import json
import requests
import pandas as pd
import numpy as np
# import chess as ch
# import re
# import itertools

In [None]:
# Define player profile
player_profile = 'ACENash'

## **Get general user statistics** 

In [None]:
# Get player stats
def player_stats(profile):
    """
    Return a dataframe of player's statistics

    Args:
        profile: chess.com profile of the player we want to analyze
    """
    # Get data from chess.com
    player_stats = chess.get_player_stats(player_profile).json

    # Format payload
    ps_df = pd.DataFrame.from_dict(player_stats['stats'])
    ps_df = (ps_df.drop(columns=['fide', 'tactics', 'lessons', 'puzzle_rush'], errors='ignore').
             dropna().
             transpose()['record'].
             reset_index().
             rename(columns={'index': 'game_type'}
                    )
             )

    ps_df = pd.concat([ps_df.drop(['record'], axis=1), ps_df['record'].apply(pd.Series)], axis=1)

    # Compute basic statistics
    ps_df['winning% '] = ps_df['win'] / (ps_df['win'] + ps_df['loss'] + ps_df['draw'])*100
    ps_df['losing% '] = ps_df['loss'] / (ps_df['win'] + ps_df['loss'] + ps_df['draw'])*100
    ps_df['drawing% '] = ps_df['draw'] / (ps_df['win'] + ps_df['loss'] + ps_df['draw'])*100

    return ps_df

# Get total amout of game played
def global_player_statistic(player_stat_df):
    total_games = sum(player_stat_df['win']) + sum(player_stat_df['loss']) + sum(player_stat_df['draw'])
    total_win = sum(player_stat_df['win'])
    total_loss = sum(player_stat_df['loss'])
    total_draw = sum(player_stat_df['draw'])
    winnnig_perc = total_win/total_games*100
    losing_perc = total_loss/total_games*100
    drawing_perc = total_draw/total_games*100
    return total_games, total_win, total_loss, total_draw, winnnig_perc, losing_perc, drawing_perc

## **Get games history**

In [None]:
# Récupérer la liste de tous les mois
# Déterminer depuis combien de mois le users joue

def get_player_timeline(player_profile):
    list_archives_games = chess.get_player_game_archives(player_profile).json['archives']
    return [i.split('/')[-2:] for i in list_archives_games]

In [None]:
# Convertir un fichier pgn (str) en une liste

def extract_pgn_to_list(pgn_str):
  split_pgn_in_lines = str(pgn_str).replace('[',"").replace(']',"").replace('"',"").split('\n')
  split_pgn_by_column_value = [item.split(' ', maxsplit=1) for item in split_pgn_in_lines[:-2]]
  append_to_list = split_pgn_by_column_value.append(['Moves', split_pgn_in_lines[-2]])
  
  return split_pgn_by_column_value

In [None]:
# Extraction de l'historique et chargement dans un dataframe 
def get_all_player_games(player_profile, player_timeline):
  game_df = pd.DataFrame()

  for month_year in player_timeline:
      monthly_game = chess.get_player_games_by_month(player_profile, month_year[0], month_year[1])

      #Create df with games of the given month
      monthly_game_df = pd.DataFrame(monthly_game.json['games'])
      game_df = pd.concat([game_df,monthly_game_df])
  return game_df



def convert_pgn_columns(df):
  game_df = df
  game_df = game_df[game_df['rules'] != 'bughouse']

  game_df['pgn_2'] = game_df['pgn'].apply(extract_pgn_to_list)

  concat_pgn_df = pd.DataFrame()

  for item in game_df['pgn_2']:
    columns = [subitem[0] for subitem in item]
    values = [subitem[-1] for subitem in item]
    item_concat_pgn_df = pd.DataFrame(values).transpose()
    item_concat_pgn_df.columns = columns
    concat_pgn_df = pd.concat([concat_pgn_df, item_concat_pgn_df])
  
  return concat_pgn_df

In [None]:
def data_transformation(df):
    df_1 = df
    df_1['isProfilePlayerWon'] = df_1['Termination'].str.contains(player_profile)
    df_1['isProfilePlayerLost'] = df_1['Termination'].str.contains('won') & ~df_1['Termination'].str.contains(player_profile, na=False)
    df_1['isProfilePlayerDrawn'] = df_1['Termination'].str.contains('drawn')
    df_1['terminationType'] = df_1['Termination'].apply(lambda x: str(x).split('drawn')[-1] if str(x).split(' won ')[-1] is None else str(x).split(' won ')[-1])
    df_1['Opening'] = df['ECOUrl'].str.replace('https://www.chess.com/openings/','')
    #df_1['whiteAccuracy'] = df_1['accuracies'].apply(pd.Series)['white']
    #df_1['blackAccuracy'] = df_1['accuracies'].apply(pd.Series)['black'] -- cf cellule suivante

    #Rajouter une transformation pour avoir le ELO du profil player et de l'opponent pour chq game

    return df_1


In [None]:
# Ca ne sert à rien de récupérer l'accuracy car >78% de ce champ est vide
#d['blackAccuracy'].isna().sum() / d.shape[0]
#d.sort_values('EndDate', ascending=True)['blackAccuracy'].iloc[5000:].isna().sum() / d.iloc[5000:].shape[0]

In [None]:
player_stats = player_stats(player_profile)

total_games, total_win, total_loss, total_draw, winnnig_perc, losing_perc, drawing_perc = global_player_statistic(player_stats)

print('Number of games played:', total_games)
print('Number of won games: ', total_win)
print('Number of lost games: ', total_loss)
print('Number of draw games: ', total_draw)
print('Winning % :', winnnig_perc)
print('Losing % :', losing_perc)
print('Drawing % :', drawing_perc)

Number of games played: 1227.0
Number of won games:  582.0
Number of lost games:  602.0
Number of draw games:  43.0
Winning % : 47.43276283618582
Losing % : 49.06275468622657
Drawing % : 3.5044824775876124


In [None]:
player_stats

Unnamed: 0,game_type,win,loss,draw,time_per_move,timeout_percent,winning%,losing%,drawing%
0,chess_daily,29.0,14.0,2.0,13812.0,0.0,64.444444,31.111111,4.444444
1,chess_rapid,35.0,29.0,6.0,,,50.0,41.428571,8.571429
2,chess_bullet,8.0,18.0,0.0,,,30.769231,69.230769,0.0
3,chess_blitz,510.0,541.0,35.0,,,46.961326,49.815838,3.222836


In [None]:
player_timeline = get_player_timeline(player_profile)
print(player_timeline)

[['2019', '08'], ['2019', '09'], ['2019', '10'], ['2019', '11'], ['2019', '12'], ['2020', '01'], ['2020', '02'], ['2020', '03'], ['2020', '04'], ['2020', '05'], ['2020', '06'], ['2020', '09'], ['2020', '10'], ['2020', '11'], ['2020', '12'], ['2021', '01'], ['2021', '02'], ['2021', '03'], ['2021', '04'], ['2021', '05'], ['2021', '06'], ['2021', '07'], ['2021', '08'], ['2021', '09'], ['2021', '10'], ['2021', '11'], ['2021', '12'], ['2022', '01'], ['2022', '02'], ['2022', '03'], ['2022', '04']]


In [None]:
game_df = pd.DataFrame()
game_df = get_all_player_games(player_profile, player_timeline)

In [None]:
game_df['rules'].unique()

array(['chess', 'chess960'], dtype=object)

In [None]:
converted_pgn = convert_pgn_columns(game_df)

In [None]:
# Concatenation of columns extracted from pgn to game df
converted_pgn.reset_index(inplace = True, drop = True)
game_df.reset_index(inplace = True, drop = True)

full_game_df = pd.DataFrame()
full_game_df = game_df.merge(converted_pgn, how='left', left_on='url', right_on='Link')
# full_game_df = pd.concat([game_df, converted_pgn], axis=1)

In [None]:
 # Final transformations

full_game_df = data_transformation(full_game_df)
#d.head()

  import sys


In [None]:
full_game_df_selected_col = full_game_df[['url', 'time_control', 'EndDate', 'isProfilePlayerWon', 'isProfilePlayerLost',
       'isProfilePlayerDrawn', 'terminationType', 'Opening']]

    # Other headers available:
    #    , 'pgn_2', 'Event', 'Site', 'Date', 'Round', 'White',
    #    'Black', 'Result', 'CurrentPosition', 'Timezone', 'ECO', 
    #    'UTCDate', 'UTCTime', 'WhiteElo', 'BlackElo', 'TimeControl',
    #    'Termination', 'Link', 'Moves',
    #    'start_time'

In [None]:
# If a csv file is needed

# full_game_df_selected_col.to_csv('dataw.csv')

In [None]:
from gspread_dataframe import set_with_dataframe
from google.colab import auth
auth.authenticate_user()

import gspread

from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

#sh = gc.create('A new spreadsheet')
#data = f.to_csv('data.csv')

# Open our new sheet and add some data.
wb = gc.open_by_key('1m9YVjSN98lrzNDRuoDBrQ8O_jFAcLIEwYzdxe98JGZU')
ws = wb.worksheet('raw data')

#ws.update(data)
#ws.update([f.columns.values.tolist()] + f.values.tolist())

set_with_dataframe(worksheet=ws, dataframe=full_game_df_selected_col, include_index=False,
include_column_header=True, resize=True)


In [None]:
# KPI

In [None]:
#from pickle import TRUE
print("You have played:", full_game_df_selected_col.shape[0],
      ", Since :", full_game_df_selected_col['EndDate'].min())

print('Winning % :', winnnig_perc)

print("Your prefered format is :", (full_game_df.groupby(by = 'time_control')['url'].count()).idxmax(),
      "Which represents :", round((full_game_df.groupby(by = 'time_control')['url'].count()/full_game_df_selected_col.shape[0]*100).max(),2), "% of your games played")

print("Most commun terminaion :", (full_game_df.groupby(by = 'terminationType')['url'].count()).idxmax(),
      "% of time :", round((full_game_df.groupby(by = 'terminationType')['url'].count()/full_game_df_selected_col.shape[0]*100).max(),2), "%")


#at least 20 openings played *
index = full_game_df.groupby(by = 'Opening')['url'].count()[full_game_df.groupby(by = 'Opening')['url'].count()>20].index

print("Your top 3 opennings :",
    (full_game_df.groupby(by = 'Opening')['isProfilePlayerWon'].sum()/full_game_df.groupby(by = 'Opening')['url'].count()*100)[index].sort_values(ascending=False).head(3))

print("Your worst 3 opennings :",
    (full_game_df.groupby(by = 'Opening')['isProfilePlayerWon'].sum()/full_game_df.groupby(by = 'Opening')['url'].count()*100)[index].sort_values(ascending=False).tail(3))

#DataFrame.groupby(by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=NoDefault.no_default, observed=False, dropna=True)

You have played: 1248 , Since : 2019.08.29
Winning % : 47.43276283618582
Your prefered format is : 180+2 Which represents : 54.89 % of your games played
Most commun terminaion : by resignation % of time : 40.62 %
Your top 3 opennings : Opening
Scotch-Game                                50.0
Scotch-Game-3...exd4-4.Nxd4-Nxd4-5.Qxd4    50.0
Slav-Defense                               50.0
dtype: float64
Your worst 3 opennings : Opening
Scotch-Game...4.Nxd4-Nxd4-5.Qxd4-d6          41.666667
Center-Game-Accepted-Paulsen-Attack          40.909091
Caro-Kann-Defense-Hillbilly-Attack-2...d5    33.333333
dtype: float64
