In [1]:
import chess.pgn
import chess
import chess.engine
import math
import numpy
import pandas as pd
import os
import pyarrow
import logging
import re

# Configure logging
logging.basicConfig(filename='process.log', level=logging.INFO, 
                    format='%(asctime)s - %(message)s', datefmt='%H:%M:%S')

final_path = './lichess_2016-09_valid_games.feather'

df = pd.read_feather(final_path)
len(df)

3744304

In [2]:
df = df.dropna(subset=['WhiteRatingDiff', 'BlackRatingDiff'])
len(df)

3742370

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3742370 entries, 2 to 6813118
Data columns (total 14 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   Event            object
 1   Site             object
 2   White            object
 3   Black            object
 4   Result           object
 5   WhiteElo         object
 6   BlackElo         object
 7   WhiteRatingDiff  object
 8   BlackRatingDiff  object
 9   ECO              object
 10  Opening          object
 11  TimeControl      object
 12  Termination      object
 13  Moves            object
dtypes: object(14)
memory usage: 428.3+ MB


In [4]:
df.drop(['Event', 'Site', 'Black', 'BlackRatingDiff', 'ECO', 'TimeControl', 'Termination', 'Moves'], axis=1, inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3742370 entries, 2 to 6813118
Data columns (total 6 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   White            object
 1   Result           object
 2   WhiteElo         object
 3   BlackElo         object
 4   WhiteRatingDiff  object
 5   Opening          object
dtypes: object(6)
memory usage: 199.9+ MB


In [5]:
df['Result'] = df['Result'].replace({'1-0': 1, '0-1': -1, '1/2-1/2': 0})
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3742370 entries, 2 to 6813118
Data columns (total 6 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   White            object
 1   Result           int64 
 2   WhiteElo         object
 3   BlackElo         object
 4   WhiteRatingDiff  object
 5   Opening          object
dtypes: int64(1), object(5)
memory usage: 199.9+ MB


  df['Result'] = df['Result'].replace({'1-0': 1, '0-1': -1, '1/2-1/2': 0})


In [6]:
df['WhiteElo'] = df['WhiteElo'].astype(int)
df['BlackElo'] = df['BlackElo'].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3742370 entries, 2 to 6813118
Data columns (total 6 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   White            object
 1   Result           int64 
 2   WhiteElo         int64 
 3   BlackElo         int64 
 4   WhiteRatingDiff  object
 5   Opening          object
dtypes: int64(3), object(3)
memory usage: 199.9+ MB


In [7]:
df['WhiteRatingDiff'] = df['WhiteRatingDiff'].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3742370 entries, 2 to 6813118
Data columns (total 6 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   White            object
 1   Result           int64 
 2   WhiteElo         int64 
 3   BlackElo         int64 
 4   WhiteRatingDiff  int64 
 5   Opening          object
dtypes: int64(4), object(2)
memory usage: 199.9+ MB


In [8]:
int_columns = df.select_dtypes(include=['int64']).columns

for col in int_columns:
    df[col] = pd.to_numeric(df[col], downcast='integer')

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3742370 entries, 2 to 6813118
Data columns (total 6 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   White            object
 1   Result           int8  
 2   WhiteElo         int16 
 3   BlackElo         int16 
 4   WhiteRatingDiff  int16 
 5   Opening          object
dtypes: int16(3), int8(1), object(2)
memory usage: 110.6+ MB


In [9]:
df['White'].value_counts()

White
bobificher      999
valmasia3       912
chaabanesami    886
r2d227          826
philippe941     784
               ... 
joaovitordf       1
HelderViana       1
Ayre              1
raezhugo          1
kamburmen         1
Name: count, Length: 115117, dtype: int64

In [10]:
games_played_per = df['White'].value_counts()
lt_fifty_games_played = games_played_per[games_played_per < 50]
lt_fifty_games_played.sum()

1117487

In [11]:
games_played_per = df['White'].value_counts()

lt_fifty_games_played_player = games_played_per[games_played_per < 50].index

df_filtered = df[~df['White'].isin(lt_fifty_games_played_player)]
df_filtered['White'].value_counts()

White
bobificher      999
valmasia3       912
chaabanesami    886
r2d227          826
philippe941     784
               ... 
Kent85           50
jerr68           50
szaszynkowo      50
SirOchsen        50
MrLight          50
Name: count, Length: 23924, dtype: int64

In [12]:
df = df_filtered

In [13]:
df = df[(df['Opening'] != '?')]
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2623531 entries, 2 to 6813118
Data columns (total 6 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   White            object
 1   Result           int8  
 2   WhiteElo         int16 
 3   BlackElo         int16 
 4   WhiteRatingDiff  int16 
 5   Opening          object
dtypes: int16(3), int8(1), object(2)
memory usage: 77.6+ MB


In [14]:
df = df.copy()
df.loc[:, 'EloDiff'] = df['WhiteElo'] - df['BlackElo']

In [15]:
df.drop(['WhiteElo', 'BlackElo', 'WhiteRatingDiff'], axis=1, inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2623531 entries, 2 to 6813118
Data columns (total 4 columns):
 #   Column   Dtype 
---  ------   ----- 
 0   White    object
 1   Result   int8  
 2   Opening  object
 3   EloDiff  int16 
dtypes: int16(1), int8(1), object(2)
memory usage: 67.6+ MB


In [16]:
df = df[(df['Result'] != 0)]
len(df)

2506025

In [17]:
df['Opening'] = df['Opening'].str.split(':').str[0].str.rstrip()
df['Opening'] = df['Opening'].str.split(',').str[0].str.rstrip()
df['Opening'] = df['Opening'].str.split('#').str[0].str.rstrip()
pd.set_option('display.max_rows', None)
df['Opening'].value_counts()

Opening
Sicilian Defense                     319040
French Defense                       177550
Queen's Pawn Game                    160411
Scandinavian Defense                 116909
King's Pawn Game                     104525
Italian Game                          95941
Philidor Defense                      94445
Caro-Kann Defense                     79739
English Opening                       75713
Ruy Lopez                             69981
Bishop's Opening                      59904
Scotch Game                           49585
Modern Defense                        45906
Van't Kruijs Opening                  44521
Queen's Gambit Declined               44010
Indian Game                           42283
Pirc Defense                          39476
Zukertort Opening                     34594
Horwitz Defense                       33573
King's Gambit Accepted                32273
Nimzowitsch Defense                   31642
Slav Defense                          28799
Queen's Gambit Accepted 

In [18]:
pd.reset_option('display.max_rows')

In [19]:
games_played_per = df['Opening'].value_counts()

lt_300_games_played = games_played_per[games_played_per < 300].index

df_filtered = df[~df['Opening'].isin(lt_300_games_played)]
df_filtered['Opening'].value_counts()
df = df_filtered

In [20]:
len(df)

2502866

In [21]:
player_opening_games_df = pd.pivot_table(df, index='White', columns='Opening', aggfunc='size', fill_value=0)
player_opening_games_df

Opening,Alekhine Defense,Amar Opening,Amazon Attack,Anderssen Opening,Barnes Defense,Benko Gambit,Benko Gambit Accepted,Benko Gambit Declined,Benoni Defense,Bird Opening,...,Torre Attack,Trompowsky Attack,Van Geet Opening,Van't Kruijs Opening,Vienna Game,Wade Defense,Ware Defense,Ware Opening,Yusupov-Rubinstein System,Zukertort Opening
White,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-ArtanS-,0,0,0,0,0,0,0,0,3,0,...,5,0,0,0,0,0,0,0,0,0
-LEXX-,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
-chessnoob-,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
-sayen23-,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
0203,1,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zztopillo,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,1,0,1,0,0,0
zzulu,3,0,0,0,1,0,0,0,0,0,...,0,0,1,0,17,0,0,0,0,0
zzxc,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzz___zzz,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
temp_df = df[(df['Result'] == 1)]
len(temp_df)

1313983

In [23]:
player_opening_wins_df = pd.pivot_table(temp_df, index='White', columns='Opening', aggfunc='size', fill_value=0)
player_opening_wins_df

Opening,Alekhine Defense,Amar Opening,Amazon Attack,Anderssen Opening,Barnes Defense,Benko Gambit,Benko Gambit Accepted,Benko Gambit Declined,Benoni Defense,Bird Opening,...,Torre Attack,Trompowsky Attack,Van Geet Opening,Van't Kruijs Opening,Vienna Game,Wade Defense,Ware Defense,Ware Opening,Yusupov-Rubinstein System,Zukertort Opening
White,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-ArtanS-,0,0,0,0,0,0,0,0,1,0,...,4,0,0,0,0,0,0,0,0,0
-LEXX-,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
-chessnoob-,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
-sayen23-,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
0203,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zztopillo,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
zzulu,1,0,0,0,1,0,0,0,0,0,...,0,0,0,0,14,0,0,0,0,0
zzxc,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzz___zzz,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
all_openings = set(player_opening_games_df.columns)

for opening in all_openings:
    if opening not in player_opening_wins_df.columns:
        player_opening_wins_df[opening] = 0

player_opening_games_df = player_opening_games_df[sorted(player_opening_games_df.columns)]
player_opening_wins_df = player_opening_wins_df[sorted(player_opening_wins_df.columns)]

merged_df = player_opening_games_df.merge(player_opening_wins_df, left_index=True, right_index=True, suffixes=('_g', '_w'))
merged_df

Opening,Alekhine Defense_g,Amar Opening_g,Amazon Attack_g,Anderssen Opening_g,Barnes Defense_g,Benko Gambit_g,Benko Gambit Accepted_g,Benko Gambit Declined_g,Benoni Defense_g,Bird Opening_g,...,Torre Attack_w,Trompowsky Attack_w,Van Geet Opening_w,Van't Kruijs Opening_w,Vienna Game_w,Wade Defense_w,Ware Defense_w,Ware Opening_w,Yusupov-Rubinstein System_w,Zukertort Opening_w
White,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-ArtanS-,0,0,0,0,0,0,0,0,3,0,...,4,0,0,0,0,0,0,0,0,0
-LEXX-,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
-chessnoob-,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
-sayen23-,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
0203,1,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zztopillo,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
zzulu,3,0,0,0,1,0,0,0,0,0,...,0,0,0,0,14,0,0,0,0,0
zzxc,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzz___zzz,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 23922 entries, -ArtanS- to zzzombie
Columns: 258 entries, Alekhine Defense_g to Zukertort Opening_w
dtypes: int64(258)
memory usage: 47.3+ MB


In [26]:
player_opening_wins_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 23922 entries, -ArtanS- to zzzombie
Columns: 129 entries, Alekhine Defense to Zukertort Opening
dtypes: int64(129)
memory usage: 23.7+ MB


In [27]:
player_opening_games_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 23924 entries, -ArtanS- to zzzombie
Columns: 129 entries, Alekhine Defense to Zukertort Opening
dtypes: int64(129)
memory usage: 23.7+ MB


In [28]:
for opening in all_openings:
    merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']

merged_df.drop([col for col in merged_df if '_g' in col or '_w' in col], axis=1, inplace=True)

  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']
  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']
  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']
  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']
  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']
  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']
  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']
  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']
  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']
  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']
  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']
  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']
  merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']

In [29]:
merged_df = merged_df[sorted(merged_df.columns)].fillna(0)
merged_df

Opening,Alekhine Defense,Amar Opening,Amazon Attack,Anderssen Opening,Barnes Defense,Benko Gambit,Benko Gambit Accepted,Benko Gambit Declined,Benoni Defense,Bird Opening,...,Torre Attack,Trompowsky Attack,Van Geet Opening,Van't Kruijs Opening,Vienna Game,Wade Defense,Ware Defense,Ware Opening,Yusupov-Rubinstein System,Zukertort Opening
White,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-ArtanS-,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,...,0.8,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
-LEXX-,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
-chessnoob-,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
-sayen23-,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,1.0
0203,0.000000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zztopillo,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,1.0,0.000000,0.0,1.0,0.0,0.0,0.0
zzulu,0.333333,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.823529,0.0,0.0,0.0,0.0,0.0
zzxc,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
zzz___zzz,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0


In [30]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 23922 entries, -ArtanS- to zzzombie
Columns: 129 entries, Alekhine Defense to Zukertort Opening
dtypes: float64(129)
memory usage: 23.7+ MB


In [31]:
float_columns = merged_df.select_dtypes(include=['float64']).columns

for col in float_columns:
    merged_df[col] = pd.to_numeric(merged_df[col], downcast='float')

merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 23922 entries, -ArtanS- to zzzombie
Columns: 129 entries, Alekhine Defense to Zukertort Opening
dtypes: float32(129)
memory usage: 12.0+ MB


In [32]:
merged_df

Opening,Alekhine Defense,Amar Opening,Amazon Attack,Anderssen Opening,Barnes Defense,Benko Gambit,Benko Gambit Accepted,Benko Gambit Declined,Benoni Defense,Bird Opening,...,Torre Attack,Trompowsky Attack,Van Geet Opening,Van't Kruijs Opening,Vienna Game,Wade Defense,Ware Defense,Ware Opening,Yusupov-Rubinstein System,Zukertort Opening
White,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-ArtanS-,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,...,0.8,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
-LEXX-,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
-chessnoob-,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
-sayen23-,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,1.0
0203,0.000000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zztopillo,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,1.0,0.000000,0.0,1.0,0.0,0.0,0.0
zzulu,0.333333,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.823529,0.0,0.0,0.0,0.0,0.0
zzxc,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
zzz___zzz,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0


In [34]:
engineered_path = './lichess_2016-09_engineered_df.feather'
merged_df.to_feather(engineered_path)

In [35]:
test_df = pd.read_feather(engineered_path)
test_df

Opening,Alekhine Defense,Amar Opening,Amazon Attack,Anderssen Opening,Barnes Defense,Benko Gambit,Benko Gambit Accepted,Benko Gambit Declined,Benoni Defense,Bird Opening,...,Torre Attack,Trompowsky Attack,Van Geet Opening,Van't Kruijs Opening,Vienna Game,Wade Defense,Ware Defense,Ware Opening,Yusupov-Rubinstein System,Zukertort Opening
White,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-ArtanS-,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,...,0.8,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
-LEXX-,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
-chessnoob-,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
-sayen23-,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,1.0
0203,0.000000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zztopillo,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,1.0,0.000000,0.0,1.0,0.0,0.0,0.0
zzulu,0.333333,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.823529,0.0,0.0,0.0,0.0,0.0
zzxc,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
zzz___zzz,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
