In [1]:
import math
import numpy
import pandas as pd
from eco import simplify_eco, eco_decode


final_path = './lichess_2016-09_valid_games.feather'

df = pd.read_feather(final_path)
len(df)

3744304

In [2]:
df = df.dropna(subset=['WhiteRatingDiff', 'BlackRatingDiff'])
len(df)

3742370

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3742370 entries, 2 to 6813118
Data columns (total 14 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   Event            object
 1   Site             object
 2   White            object
 3   Black            object
 4   Result           object
 5   WhiteElo         object
 6   BlackElo         object
 7   WhiteRatingDiff  object
 8   BlackRatingDiff  object
 9   ECO              object
 10  Opening          object
 11  TimeControl      object
 12  Termination      object
 13  Moves            object
dtypes: object(14)
memory usage: 428.3+ MB


In [4]:
df = df[(df['Opening'] != '?')]
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3740561 entries, 2 to 6813118
Data columns (total 14 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   Event            object
 1   Site             object
 2   White            object
 3   Black            object
 4   Result           object
 5   WhiteElo         object
 6   BlackElo         object
 7   WhiteRatingDiff  object
 8   BlackRatingDiff  object
 9   ECO              object
 10  Opening          object
 11  TimeControl      object
 12  Termination      object
 13  Moves            object
dtypes: object(14)
memory usage: 428.1+ MB


In [5]:
df.drop(['Event', 'Site', 'Black', 'BlackRatingDiff', 'Opening', 'TimeControl', 'Termination', 'Moves'], axis=1, inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3740561 entries, 2 to 6813118
Data columns (total 6 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   White            object
 1   Result           object
 2   WhiteElo         object
 3   BlackElo         object
 4   WhiteRatingDiff  object
 5   ECO              object
dtypes: object(6)
memory usage: 199.8+ MB


In [6]:
df['Result'] = df['Result'].replace({'1-0': 1, '0-1': -1, '1/2-1/2': 0})
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3740561 entries, 2 to 6813118
Data columns (total 6 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   White            object
 1   Result           int64 
 2   WhiteElo         object
 3   BlackElo         object
 4   WhiteRatingDiff  object
 5   ECO              object
dtypes: int64(1), object(5)
memory usage: 199.8+ MB


  df['Result'] = df['Result'].replace({'1-0': 1, '0-1': -1, '1/2-1/2': 0})


In [7]:
df['WhiteElo'] = df['WhiteElo'].astype(int)
df['BlackElo'] = df['BlackElo'].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3740561 entries, 2 to 6813118
Data columns (total 6 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   White            object
 1   Result           int64 
 2   WhiteElo         int64 
 3   BlackElo         int64 
 4   WhiteRatingDiff  object
 5   ECO              object
dtypes: int64(3), object(3)
memory usage: 199.8+ MB


In [8]:
df['WhiteRatingDiff'] = df['WhiteRatingDiff'].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3740561 entries, 2 to 6813118
Data columns (total 6 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   White            object
 1   Result           int64 
 2   WhiteElo         int64 
 3   BlackElo         int64 
 4   WhiteRatingDiff  int64 
 5   ECO              object
dtypes: int64(4), object(2)
memory usage: 199.8+ MB


In [9]:
int_columns = df.select_dtypes(include=['int64']).columns

for col in int_columns:
    df[col] = pd.to_numeric(df[col], downcast='integer')

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3740561 entries, 2 to 6813118
Data columns (total 6 columns):
 #   Column           Dtype 
---  ------           ----- 
 0   White            object
 1   Result           int8  
 2   WhiteElo         int16 
 3   BlackElo         int16 
 4   WhiteRatingDiff  int16 
 5   ECO              object
dtypes: int16(3), int8(1), object(2)
memory usage: 110.6+ MB


In [10]:
df['White'].value_counts()

White
bobificher      999
valmasia3       912
chaabanesami    886
r2d227          826
philippe941     783
               ... 
Medwalid          1
rozzilla          1
naterko           1
altoz             1
kamburmen         1
Name: count, Length: 115112, dtype: int64

### Drop games from players with < 50 games played. Too little data per player will hurt clustering performance

In [11]:
games_played_per = df['White'].value_counts()
lt_fifty_games_played = games_played_per[games_played_per < 50]
lt_fifty_games_played.sum()

1117296

In [12]:
games_played_per = df['White'].value_counts()

lt_fifty_games_played_player = games_played_per[games_played_per < 50].index

df_filtered = df[~df['White'].isin(lt_fifty_games_played_player)]
df_filtered['White'].value_counts()

White
bobificher        999
valmasia3         912
chaabanesami      886
r2d227            826
philippe941       783
                 ... 
JANISH             50
leshaleshevich     50
sonder1515         50
SZAMAN             50
TheUnknownOnex     50
Name: count, Length: 23918, dtype: int64

In [13]:
df = df_filtered

In [14]:
df['ECO'].value_counts()

ECO
A00    131066
B01    122158
C00    119281
D00    107405
C41     98804
        ...  
A78         3
E96         2
B66         1
A79         1
E03         1
Name: count, Length: 492, dtype: int64

In [15]:
df = df.copy()
df.loc[:, 'EloDiff'] = df['WhiteElo'] - df['BlackElo']

In [16]:
df.drop(['WhiteElo', 'BlackElo', 'WhiteRatingDiff'], axis=1, inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2623265 entries, 2 to 6813118
Data columns (total 4 columns):
 #   Column   Dtype 
---  ------   ----- 
 0   White    object
 1   Result   int8  
 2   ECO      object
 3   EloDiff  int16 
dtypes: int16(1), int8(1), object(2)
memory usage: 67.5+ MB


In [17]:
df = df[(df['Result'] != 0)]
len(df)

2505775

In [18]:
# df['Opening'] = df['Opening'].str.split(':').str[0].str.rstrip()
# df['Opening'] = df['Opening'].str.split(',').str[0].str.rstrip()
# df['Opening'] = df['Opening'].str.split('#').str[0].str.rstrip()
# pd.set_option('display.max_rows', None)
# df['Opening'].value_counts()

In [19]:
# pd.reset_option('display.max_rows')

In [20]:
df['ECO'] = df['ECO'].apply(simplify_eco)
df['ECO'].value_counts()

ECO
B20    316910
C00    189933
A00    125577
B01    116889
A40    106005
        ...  
E10      2041
E01      1271
D70      1204
E11      1080
A47       559
Name: count, Length: 64, dtype: int64

In [21]:
games_played_per = df['ECO'].value_counts()

lt_300_games_played = games_played_per[games_played_per < 300].index
len(lt_300_games_played)

0

In [22]:
# df_filtered = df[~df['ECO'].isin(lt_300_games_played)]
# df_filtered['ECO'].value_counts()
# df = df_filtered

In [23]:
len(df)

2505775

In [24]:
player_opening_games_df = pd.pivot_table(df, index='White', columns='ECO', aggfunc='size', fill_value=0)
player_opening_games_df

ECO,A00,A01,A02,A04,A10,A40,A42,A43,A45,A47,...,D50,D70,D80,E00,E01,E10,E11,E12,E20,E60
White,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-ArtanS-,1,0,0,0,0,25,0,3,5,0,...,3,0,1,0,0,0,0,0,2,1
-LEXX-,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
-chessnoob-,0,0,0,0,1,18,1,0,0,0,...,1,0,0,2,0,0,0,0,0,9
-sayen23-,6,0,0,1,0,9,0,0,2,0,...,0,0,0,0,0,0,0,0,0,0
0203,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zztopillo,1,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzulu,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzxc,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzz___zzz,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
temp_df = df[(df['Result'] == 1)]
len(temp_df)

1315522

In [26]:
player_opening_wins_df = pd.pivot_table(temp_df, index='White', columns='ECO', aggfunc='size', fill_value=0)
player_opening_wins_df

ECO,A00,A01,A02,A04,A10,A40,A42,A43,A45,A47,...,D50,D70,D80,E00,E01,E10,E11,E12,E20,E60
White,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-ArtanS-,1,0,0,0,0,19,0,3,4,0,...,0,0,0,0,0,0,0,0,1,1
-LEXX-,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
-chessnoob-,0,0,0,0,1,5,0,0,0,0,...,1,0,0,1,0,0,0,0,0,1
-sayen23-,3,0,0,1,0,4,0,0,2,0,...,0,0,0,0,0,0,0,0,0,0
0203,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zztopillo,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzulu,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzxc,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzz___zzz,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
all_openings = set(player_opening_games_df.columns)

for opening in all_openings:
    if opening not in player_opening_wins_df.columns:
        player_opening_wins_df[opening] = 0

player_opening_games_df = player_opening_games_df[sorted(player_opening_games_df.columns)]
player_opening_wins_df = player_opening_wins_df[sorted(player_opening_wins_df.columns)]

merged_df = player_opening_games_df.merge(player_opening_wins_df, left_index=True, right_index=True, suffixes=('_g', '_w'))
merged_df

ECO,A00_g,A01_g,A02_g,A04_g,A10_g,A40_g,A42_g,A43_g,A45_g,A47_g,...,D50_w,D70_w,D80_w,E00_w,E01_w,E10_w,E11_w,E12_w,E20_w,E60_w
White,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-ArtanS-,1,0,0,0,0,25,0,3,5,0,...,0,0,0,0,0,0,0,0,1,1
-LEXX-,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
-chessnoob-,0,0,0,0,1,18,1,0,0,0,...,1,0,0,1,0,0,0,0,0,1
-sayen23-,6,0,0,1,0,9,0,0,2,0,...,0,0,0,0,0,0,0,0,0,0
0203,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zztopillo,1,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzulu,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzxc,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzz___zzz,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 23916 entries, -ArtanS- to zzzombie
Columns: 128 entries, A00_g to E60_w
dtypes: int64(128)
memory usage: 23.5+ MB


In [29]:
for opening in all_openings:
    merged_df[opening] = merged_df[opening + '_w'] / merged_df[opening + '_g']

merged_df.drop([col for col in merged_df if '_g' in col or '_w' in col], axis=1, inplace=True)

In [30]:
merged_df = merged_df[sorted(merged_df.columns)].fillna(0)
merged_df

ECO,A00,A01,A02,A04,A10,A40,A42,A43,A45,A47,...,D50,D70,D80,E00,E01,E10,E11,E12,E20,E60
White,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-ArtanS-,1.0,0.0,0.0,0.0,0.0,0.760000,0.0,1.0,0.8,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.000000
-LEXX-,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
-chessnoob-,0.0,0.0,0.0,0.0,1.0,0.277778,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.111111
-sayen23-,0.5,0.0,0.0,1.0,0.0,0.444444,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
0203,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zztopillo,1.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
zzulu,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
zzxc,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
zzz___zzz,0.0,0.0,0.0,0.0,0.0,1.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000


In [31]:
float_columns = merged_df.select_dtypes(include=['float64']).columns

for col in float_columns:
    merged_df[col] = pd.to_numeric(merged_df[col], downcast='float')

In [32]:
merged_df.head()

ECO,A00,A01,A02,A04,A10,A40,A42,A43,A45,A47,...,D50,D70,D80,E00,E01,E10,E11,E12,E20,E60
White,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-ArtanS-,1.0,0.0,0.0,0.0,0.0,0.76,0.0,1.0,0.8,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0
-LEXX-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
-chessnoob-,0.0,0.0,0.0,0.0,1.0,0.277778,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.111111
-sayen23-,0.5,0.0,0.0,1.0,0.0,0.444444,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0203,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
merged_df = merged_df.T
merged_df

White,-ArtanS-,-LEXX-,-chessnoob-,-sayen23-,0203,1--,10H01,1111112222,1taurus,234234,...,zzalimm,zzchess,zzippy,zzoora,zzpp,zztopillo,zzulu,zzxc,zzz___zzz,zzzombie
ECO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A00,1.0,0.0,0.000000,0.5,0.0,0.0,0.0,0.0,0.0,0.318584,...,0.0,0.0,1.0,0.0,0.000000,1.0,0.0,0.0,0.0,0.0
A01,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
A02,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.500000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
A04,0.0,0.0,0.000000,1.0,0.0,0.0,0.0,0.0,0.0,1.000000,...,0.0,0.0,0.0,0.5,0.800000,0.0,0.0,0.0,0.0,0.0
A10,0.0,0.0,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,1.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
E10,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,1.000000,0.0,0.0,0.0,0.0,0.0
E11,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
E12,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
E20,0.5,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,1.000000,0.0,0.0,0.0,0.0,0.0


In [34]:
engineered_path = './lichess_2016-09_engineered_df_eco.feather'
merged_df.to_feather(engineered_path)

In [35]:
test_df = pd.read_feather(engineered_path)
test_df

White,-ArtanS-,-LEXX-,-chessnoob-,-sayen23-,0203,1--,10H01,1111112222,1taurus,234234,...,zzalimm,zzchess,zzippy,zzoora,zzpp,zztopillo,zzulu,zzxc,zzz___zzz,zzzombie
ECO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A00,1.0,0.0,0.000000,0.5,0.0,0.0,0.0,0.0,0.0,0.318584,...,0.0,0.0,1.0,0.0,0.000000,1.0,0.0,0.0,0.0,0.0
A01,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
A02,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.500000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
A04,0.0,0.0,0.000000,1.0,0.0,0.0,0.0,0.0,0.0,1.000000,...,0.0,0.0,0.0,0.5,0.800000,0.0,0.0,0.0,0.0,0.0
A10,0.0,0.0,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,1.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
E10,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,1.000000,0.0,0.0,0.0,0.0,0.0
E11,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
E12,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
E20,0.5,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,1.000000,0.0,0.0,0.0,0.0,0.0


In [36]:
merged_df.index

Index(['A00', 'A01', 'A02', 'A04', 'A10', 'A40', 'A42', 'A43', 'A45', 'A47',
       'A48', 'A50', 'A51', 'A53', 'A56', 'A57', 'A60', 'A80', 'B00', 'B01',
       'B02', 'B06', 'B07', 'B10', 'B20', 'C00', 'C20', 'C21', 'C23', 'C25',
       'C30', 'C40', 'C41', 'C42', 'C44', 'C45', 'C46', 'C47', 'C50', 'C51',
       'C53', 'C55', 'C60', 'D00', 'D01', 'D02', 'D03', 'D04', 'D06', 'D07',
       'D10', 'D20', 'D30', 'D43', 'D50', 'D70', 'D80', 'E00', 'E01', 'E10',
       'E11', 'E12', 'E20', 'E60'],
      dtype='object', name='ECO')