In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('games.csv')
'''
id =                Game ID
rated =             Rated (T/F)
created_at =        Start Time
last_move_at =      End Time
turns =             Number of Turns
victory_status =    Game Status (how game was won)
winner =            Winner
increment_code =    Time Increment
white_id =          White Player ID
white_rating =      White Player Rating
black_id =          Black Player ID
black_rating =      Black Player Rating
moves =             All Moves in Standard Chess Notation
opening_eco =       Opening Eco (Standardised Code for any given opening)
opening_name =      Opening Name
opening_ply =       Opening Ply (Number of moves in the opening phase)
'''

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
pd.set_option('display.max_rows', 10)
df.select_dtypes(include=['number'])

In [None]:
df.select_dtypes(include=['object'])

In [None]:
df.isnull().sum()

In [None]:
print(df.white_rating.max())
#is same as 
print(df['white_rating'].max())

In [None]:
df[df.white_rating == df.white_rating.max()] #inside brackets is the condition

In [None]:
highestRatedPlayerWin = df[df.white_rating == df.white_rating.max()]
highestRatedPlayerWin.winner

In [None]:
df[df.white_rating >= 2622]

In [None]:
df[df.winner == 'white']

In [None]:
df.groupby('winner').get_group('white')

In [None]:
df.groupby('winner').white_rating.describe()

In [None]:
actWin = ['black', 'white']
win = df[df.winner.isin(actWin)]
print(win)

win.groupby('winner').white_rating.describe()

In [None]:
#barplot of most common openings
pd.set_option("display.max_rows", None, "display.max_columns", None)
counts = df.opening_name.value_counts()[:20]

plot = sns.barplot(x = counts.index, y = counts)
plot.set_xticklabels(plot.get_xticklabels(), rotation = 90)
plt.show()

In [None]:
df.boxplot(column=['white_rating','black_rating'])

In [None]:
print(pd.pivot_table(df, index=['rated'], values=['white_rating']))
print(pd.pivot_table(df, index=['rated'], values=['black_rating']))

In [None]:
print(pd.pivot_table(df, index=['winner'], values=['turns']))

In [None]:
corr = df.corr()
mask = np.triu(np.ones_like(corr, dtype=bool)) 
sns.heatmap(corr, mask=mask)

In [None]:
vc = df['increment_code'].value_counts()[:40]
chart = sns.barplot(x=vc.index, y=vc)
chart.set_xticklabels(chart.get_xticklabels(), rotation=90)
plt.show()

In [None]:
df.columns

In [None]:
df_cats = df[['id', 'rated', 'turns', 'victory_status', 'winner', 'increment_code', 'opening_name',
              'white_rating', 'black_rating']]
for i in df_cats.columns:
    cat_num = df_cats[i].value_counts()[:40]
    print(f"graph for {i}")
    chart = sns.barplot(x = cat_num.index, y = cat_num)
    chart.set_xticklabels(chart.get_xticklabels(), rotation=90)
    plt.show()