In [None]:
# imports
import pandas as pd
import matplotlib.pyplot as plt

# set filepath to data source
filepath = "games.csv"


In [None]:
# load data into dataframe
games_df = pd.read_csv(filepath)
games_df.head(1)

In [None]:
# make bins for player rating ranges
games_df["white_range"] = pd.cut(games_df["white_rating"], range(0, 3001, 200))
games_df["black_range"] = pd.cut(games_df["black_rating"], range(0, 3001, 200))

In [None]:
# High ELO_diff implies white should win;  low (negative) values imply black should win
games_df["elo_diff"] = games_df["white_rating"]-games_df["black_rating"]

## A – Flank openings

* White first moves other than 1.e4, 1.d4 (A00–A39)
* 1.d4 without 1...d5, 1...Nf6 or 1...f5: Atypical replies to 1.d4 (A40–A44)
* 1.d4 Nf6 without 2.c4: Atypical replies to 1...Nf6 (A45–A49)
* 1.d4 Nf6 2.c4 without 2...e6 or 2...g6: Atypical [[Indian defences|Indian systems]] (A50–A79)
* 1.d4 f5: [[Dutch Defence]] (A80–A99)

## B – Semi-Open Games other than the French Defense

* 1.e4 without 1...c5, 1...e6 or 1...e5 (B00–B19)
* 1.e4 c5: [[Sicilian Defence]] (B20–B99)

## C – Open Games and the French Defense

* 1.e4 e6: [[French Defence]] (C00–C19)
* 1.e4 e5: [[Open Game]] (C20–C99)

## D – Closed Games and Semi-Closed Games

* 1.d4 d5: [[Closed Game]] (D00–D69)
* 1.d4 Nf6 2.c4 g6 with 3...d5: [[Grünfeld Defence]] (D70–D99)

## E – Indian Defenses

* 1.d4 Nf6 2.c4 e6: Indian systems with ...e6 (E00–E59)
* 1.d4 Nf6 2.c4 g6 without 3...d5: Indian systems with ...g6 (except Grünfeld) (E60–E99)

In [None]:
# get general summary data 

## overall wins for white/black
total_wins = games_df["winner"].value_counts()["white"]
total_losses = games_df["winner"].value_counts()["black"]
total_draws = games_df["winner"].value_counts()["draw"]
## counts of openings
openings = games_df["opening_eco"].value_counts()
# get Letter category for opening ECO
games_df["opening_cat"] = games_df["opening_eco"].map(lambda x: x[0])

## graphs to visualize these
fig, axes = plt.subplots(2, 3, figsize = (10,10))
ax1 = axes[0][0]
ax1.pie(x = games_df["winner"].value_counts(), labels = games_df["winner"].value_counts().index.map(str.capitalize), autopct = "%.1f%%")
ax1.set_title("Overall")

for index, value in enumerate(games_df.groupby("opening_cat")["winner"].value_counts().index.get_level_values(0).unique()):
    ax2.bar(height = games_df.groupby("opening_cat")["winner"].value_counts(), x = games_df.groupby("opening_cat")["winner"].value_counts().index, autopct = "%.1f%%")
    ax2.set_title("Win rates by opening category")

plt.show()

In [None]:
# grouping by opening
# find the win rates for each opening/varient
gby = games_df.groupby("opening_eco")

# graph these


In [None]:
# find the number of pieces taken by white/black each game

In [None]:
# grouping by opening, winner
# find out how many pieces the winner had taken 
#   vs the count of pieces the opp. had taken

# graph this seperately for white as winner and black as winner

In [None]:
# graph % of times the lower rated player wins per opening per color
white_ud = (games_df["elo_diff"] < 0) & (games_df["winner"] == "white")
black_ud = (games_df["elo_diff"] > 0) & (games_df["winner"] == "black")

fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (10,10))

ax1.pie(x = white_ud.value_counts(), labels = white_ud.value_counts().index.map({True : "Wins", False : "Losses"}), autopct = "%.1f%%")
ax1.set_title("Percent times white won\ndespite lower ELO rating")

ax2.pie(x = black_ud.value_counts(), labels = black_ud.value_counts().index.map({True : "Wins", False : "Losses"}), autopct = "%.1f%%")
ax2.set_title("Percent times black won\ndespite lower ELO rating")

plt.show()

In [None]:
white_eco_ud_wins = games_df[white_ud].groupby("opening_eco")["winner"].count()
white_eco_ud_losses = games_df[~white_ud].groupby("opening_eco")["winner"].count()


In [None]:
# repeat above work, but with the data binned by ranges of player ratings
games_df[white_ud].groupby("white_range")["winner"].count()

In [None]:
games_df[black_ud].groupby("black_range")["winner"].count()