In [9]:
import numpy as np
import pandas as pd
import chess
import matplotlib.pyplot as plt
%matplotlib inline

In [17]:
df = pd.read_csv('data/chess-openings.csv')

elo_bins = [0, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 3000]
elo_labels = ['<1200', '1200-1399', '1400-1599', '1600-1799', '1800-1999', '2000-2199', '2200-2399', '2400+']
## probably the same
df['white_group'] = pd.cut(df['white_rating'], bins=elo_bins, labels=elo_labels)
df['black_group'] = pd.cut(df['black_rating'], bins=elo_bins, labels=elo_labels)

df.head()

Unnamed: 0,white_username,black_username,white_id,black_id,white_rating,black_rating,white_result,black_result,time_class,time_control,rules,rated,fen,pgn,opening,white_group,black_group
0,-Amos-,miniman2804,https://api.chess.com/pub/player/-amos-,https://api.chess.com/pub/player/miniman2804,1708,1608,win,checkmated,daily,1/259200,chess,True,r2r4/p2p1p1p/b6R/n1p1kp2/2P2P2/3BP3/PP5P/4K2R ...,"[Event ""Enjoyable games 2 - Round 1""]\n[Site ""...",Nimzo-Indian-Defense-Spielmann-Variation,1600-1799,1600-1799
1,-Amos-,koltcho69,https://api.chess.com/pub/player/-amos-,https://api.chess.com/pub/player/koltcho69,1726,1577,win,resigned,daily,1/172800,chess,True,8/5Q1k/4n1pp/8/7P/2N2b2/PP3P2/5K2 b - - 1 33,"[Event ""Rapid Rats - Board 5""]\n[Site ""Chess.c...",Giuoco-Piano-Game-Main-Line,1600-1799,1400-1599
2,-Amos-,enhmandah,https://api.chess.com/pub/player/-amos-,https://api.chess.com/pub/player/enhmandah,1727,842,win,resigned,daily,1/172800,chess,True,rn1q1b1r/kb2p1pp/2p5/p1Q5/N1BP2n1/4PN2/1P3PPP/...,"[Event ""CHESS BOARD CLASH - Round 1""]\n[Site ""...",Queens-Pawn-Opening-1...d5-2.e3,1600-1799,<1200
3,enhmandah,-Amos-,https://api.chess.com/pub/player/enhmandah,https://api.chess.com/pub/player/-amos-,819,1727,checkmated,win,daily,1/172800,chess,True,r3kb1r/pp3ppp/3p1n2/2pKp3/P3P3/1P6/4qP1P/QNB5 ...,"[Event ""CHESS BOARD CLASH - Round 1""]\n[Site ""...",Sicilian-Defense-Snyder-Variation,<1200,1600-1799
4,-Amos-,Shalllow-Blue,https://api.chess.com/pub/player/-amos-,https://api.chess.com/pub/player/shalllow-blue,1729,1116,win,resigned,daily,1/172800,chess,True,r3b2r/pp6/2pPpR1k/4n3/2P3Q1/3B4/PP4PP/R5K1 b -...,"[Event ""CHESS BOARD CLASH - Round 1""]\n[Site ""...",Queens-Pawn-Opening-Horwitz-Defense-2.c4,1600-1799,<1200


In [12]:
total_counts = {k:v for k,v in dict(df['opening'].value_counts()).items()}
total_counts = dict(sorted(total_counts.items(), key=lambda item: item[1], reverse=True))
top10 = {k: v for i, (k, v) in enumerate(total_counts.items()) if i < 10}
top10

{'Bishops-Opening': 1206,
 'Scandinavian-Defense': 1018,
 'Queens-Pawn-Opening-Accelerated-London-System': 966,
 'Vant-Kruijs-Opening': 927,
 'Sicilian-Defense-Bowdler-Attack': 849,
 'Queens-Pawn-Opening-1...d5-2.e3': 782,
 'Kings-Pawn-Opening-Owens-Defense': 709,
 'Scandinavian-Defense-Mieses-Kotrc-Variation': 690,
 'Vant-Kruijs-Opening-1...e5': 668,
 'Philidor-Defense-3.Bc4': 661}

In [13]:
white = df.groupby('white_group')['opening'].apply(lambda x: x.mode()[0])
black = df.groupby('black_group')['opening'].apply(lambda x: x.mode()[0])

In [14]:
dict(white)

{'<1200': 'Scandinavian-Defense',
 '1200-1399': 'Bishops-Opening',
 '1400-1599': 'Sicilian-Defense-Bowdler-Attack',
 '1600-1799': 'Scandinavian-Defense-Mieses-Kotrc-Variation',
 '1800-1999': 'Queens-Pawn-Opening-Zukertort-Variation',
 '2000-2199': 'Sicilian-Defense-Hyperaccelerated-Dragon-Fianchetto-Variation-3...cxd4-4.Nxd4',
 '2200-2399': 'Caro-Kann-Defense-Advance-Tal-Variation',
 '2400+': 'Alekhines-Defense-Modern-Main-Line'}

In [15]:
dict(black)

{'<1200': 'Scandinavian-Defense',
 '1200-1399': 'Bishops-Opening',
 '1400-1599': 'Sicilian-Defense-Bowdler-Attack',
 '1600-1799': 'Bishops-Opening',
 '1800-1999': 'Kings-Fianchetto-Opening',
 '2000-2199': 'Indian-Game',
 '2200-2399': 'Caro-Kann-Defense-Advance-Tal-Variation',
 '2400+': 'Queens-Pawn-Opening-Zukertort-Variation'}

In [165]:
total = df.groupby('opening').size().sort_values()
mean = total.mean()
total = total[total > mean]

white = df[df['white_result'] == 'win'].groupby('opening').size()
black = df[df['black_result'] == 'win'].groupby('opening').size()

white_rates = (white / total).dropna().sort_values()
black_rates = (black / total).dropna().sort_values()

In [166]:
total

opening
Alekhines-Defense-Mokele-Mbembe-Variation                   22
French-Defense-Exchange-Monte-Carlo-Variation-4...Nf6       22
Kings-Pawn-Opening-Kings-Knight-Elephant-Gambit-3.Nxe5      22
Polish-Opening-Czech-Defense                                22
Italian-Game-Blackburne-Shilling-Gambit-4.Nxe5-Qg5          22
                                                          ... 
Sicilian-Defense-Bowdler-Attack                            849
Vant-Kruijs-Opening                                        927
Queens-Pawn-Opening-Accelerated-London-System              966
Scandinavian-Defense                                      1018
Bishops-Opening                                           1206
Length: 533, dtype: int64

In [167]:
white_rates

opening
Unknown                                                              0.138940
Ruy-Lopez-Opening-Birds-Defense-4.Nxd4-exd4-5.d3                     0.208333
Birds-Opening-Dutch-Variation                                        0.222222
Grob-Opening                                                         0.244898
Sicilian-Defense-Open-Accelerated-Dragon-Modern-Variation-5...Bg7    0.250000
                                                                       ...   
Scandinavian-Defense-Modern-Scandinavian-Gambit                      0.708333
Kings-Indian-Attack                                                  0.730769
Sicilian-Defense-McDonnell-Attack                                    0.739130
Petrovs-Defense-Classical-Damiano-Variation-4.Qe2                    0.742857
Kings-Pawn-Opening                                                   0.876866
Length: 533, dtype: float64

In [168]:
black_rates

opening
Kings-Pawn-Opening                                   0.093284
Petrovs-Defense-Classical-Damiano-Variation-4.Qe2    0.228571
Kings-Indian-Attack                                  0.230769
Giuoco-Piano-Game-4.Nc3                              0.238095
Scandinavian-Defense-Modern-Scandinavian-Gambit      0.250000
                                                       ...   
Sicilian-Defense-Delayed-Alapin-Variation-3...Nf6    0.727273
Grob-Opening                                         0.734694
Grob-Opening-1...e5                                  0.740741
Ruy-Lopez-Opening-Birds-Defense-4.Nxd4-exd4-5.d3     0.750000
Unknown                                              0.835466
Length: 533, dtype: float64