# Lichess Games: Data Analysis

In [None]:
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mpc
from matplotlib.lines import Line2D
import seaborn as sns
from matplotlib import rcParams
import mplcatppuccin
from mplcatppuccin.colormaps import get_colormap_from_list

## Global Matplotlib Settings

In [None]:
rcParams['figure.figsize'] = (12,  6)
rcParams['figure.dpi'] = 200

In [None]:
df = pd.read_csv('chess-games.csv')
df.head()

## Exploratory Data Analysis

In [None]:
# Calculates the average ELO of the game
def average_game_elo(row):
    return round((row['white_rating'] + row['black_rating']) / 2)

In [None]:
# Checks if one of the players castled during the game
def is_castle(row):
    if row['moves'].find("O-O") != -1:
        return True
    else:
        return False

In [None]:
# Detect how many "checks" occured in each game
check_pattern = re.compile("\+")
def check_count(moves):
  moves_list = moves.split(" ")
  n_checks = 0
  for move in moves_list:
    match = check_pattern.search(move)
    if match:
      n_checks = n_checks + 1
  
  return n_checks

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.duplicated().sum()

In [None]:
df.drop_duplicates()

In [None]:
df.drop(["created_at", "last_move_at"], axis=1, inplace=True)

In [None]:
df.rename(columns={"opening_ply": "opening_move_nbr"}, inplace=True)
df.head()

In [None]:
df['increment_code'] = df['increment_code'].astype(str)

In [None]:
df['game_duration'] = df.apply(lambda x: x.increment_code.split("+")[0], axis=1)
df.head()

In [None]:
df.game_duration.value_counts()

In [None]:
plt.style.use("mocha")
df.game_duration.value_counts().sort_values(ascending=False).plot(kind="bar")
plt.savefig("PopularGameDuration.png")

In [None]:
df.victory_status.value_counts()

In [None]:
plt.style.use("mocha")
df.victory_status.value_counts().sort_values(ascending=False).plot(kind="bar")
plt.savefig("VictoryStatus.png")

In [None]:
df.winner.value_counts()

In [None]:
plt.style.use("mocha")
df.winner.value_counts().plot(kind="bar")
plt.xticks([0,1,2], ["White", "Black", "Draw"])
plt.savefig("Winner.png")

In [None]:
df.opening_eco.value_counts()

In [None]:
plt.style.use("mocha")
df.opening_eco.value_counts()[:100].plot(kind="bar")
plt.xticks(fontsize=8, rotation=90)
plt.savefig("PopularOpening.png")

In [None]:
df.game_duration.value_counts().sort_values(ascending=False)

In [None]:
df["did_castle"] = df.apply(lambda x: is_castle(x), axis=1)
df.head()

In [None]:
df.did_castle.value_counts()

In [None]:
plt.style.use("mocha")
df.did_castle.value_counts().plot(kind="bar")
plt.xticks([0,1], ["Castle", "No Castle"], rotation=0)
plt.savefig("OneCastle.png")

In [None]:
df[["black_rating", "white_rating"]].max(axis=1).sort_values(ascending=False)

In [None]:
df[["black_rating", "white_rating"]].max(axis=1).sort_values(ascending=False).value_counts()

In [None]:
df['check_count'] = df.apply(lambda x: check_count(x['moves']), axis=1)
df.head()

In [None]:
df.check_count.value_counts()

In [None]:
plt.style.use("mocha")
df.check_count.value_counts().plot(kind="bar")
plt.savefig("CheckCount.png")

In [None]:
df['average_game_elo'] = df.apply(lambda x: average_game_elo(x), axis=1)

In [None]:
df["winner"].replace(["white", "black","draw"], [1, 0, 2], inplace=True)

In [None]:
df.head()

In [None]:
df.winner.value_counts()

In [None]:
df.average_game_elo.max()

In [None]:
df[df["check_count"] == 51]

In [None]:
df.game_duration.max()

In [None]:
df.check_count.max()

In [None]:
df.rated.value_counts()

In [None]:
plt.style.use("mocha")
df.rated.value_counts().plot(kind="bar")
plt.savefig("RatedvNonRated.png")

In [None]:
df.victory_status.value_counts()

In [None]:
df.victory_status.unique()

In [None]:
sns.scatterplot(x=df["white_rating"], y=df["black_rating"], hue=df["victory_status"])

plt.ylabel("Black Elo Rating", fontdict={"size": 8, "weight": "medium"})
plt.xlabel("White Elo Rating", fontdict={"size": 8, "weight": "medium"})
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
plt.style.use("mocha")
plt.legend(['Out of Time', 'Resign', 'Mate', 'Draw'], loc='best')

plt.savefig("RelationShipElo.png")

In [None]:
sns.set_style("whitegrid")
sns.set_context("paper")

def NonLinCdict(steps, hexcol_array):
    cdict = {'red': (), 'green': (), 'blue': ()}
    for s, hexcol in zip(steps, hexcol_array):
        rgb =mpc.hex2color(hexcol)
        cdict['red'] = cdict['red'] + ((s, rgb[0], rgb[0]),)
        cdict['green'] = cdict['green'] + ((s, rgb[1], rgb[1]),)
        cdict['blue'] = cdict['blue'] + ((s, rgb[2], rgb[2]),)
    return cdict

hc = ['#dfe7ff', '#a1b6ff', '#7386fd', '#413fec', '#2d2ba8']
th = [0, 0.1, 0.5, 0.9, 1]

cdict = NonLinCdict(th, hc)
cm = mpc.LinearSegmentedColormap('test', cdict)

# labels = ["rated", "turns", "white elo", "black elo", "opening move nbr", "did castle", "both castled", "nbr of checks", "average elo", "winner"]

plt.figure()
sns.heatmap(
        cbar=False,
        vmin=0.0,
        vmax=1.0,
        annot=True,
        square=True, 
        annot_kws={"size": 8},
        data=df.corr(numeric_only=True),
        xticklabels=["rated", "turns", "white elo", "black elo", "opening move nbr", "did castle", "both castled", "nbr of checks", "average elo", "winner"],
        yticklabels=["rated", "turns", "white elo", "black elo", "opening move nbr", "did castle", "both castled", "nbr of checks", "average elo", "winner"],
        cmap=cm,
        linewidths=0.75)


plt.xticks(fontsize=8)
plt.yticks(fontsize=8)

plt.savefig("HeatmapCorrelation.png")

In [None]:
plt.style.use(style="mocha")
sns.displot(df["white_rating"], kde=True)
plt.xlabel("White Elo Rating", fontdict={"size": 8,})
plt.ylabel("Count", fontdict={"size": 8})
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)

plt.savefig("WhiteEloDistribution.png")

In [None]:
plt.style.use(style="mocha")
sns.displot(df["black_rating"], kde=True, color="#f48ba8")
plt.xlabel("Black Elo Rating", fontdict={"size": 8,})
plt.ylabel("Count", fontdict={"size": 8})
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)

plt.savefig("BlackEloDistribution.png")