# **Exploring Hikaru Dataset Metadata**

## 0. Analysis setup

In [12]:
# Import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
sys.path.append('../Core')
sys.path.append('../Utils')

# Import data classes
from metadata import MetaData
from movedata import MoveData

# Import cleaner classes
from data_cleaning import MetaDataCleaner

# Set plot style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Load move and metadata

In [13]:
from joblib import load
import pandas as pd

# Load the moves dictionary
hikaru_mv_dict = load('../Data/hikaru_moves_dictionary.joblib')

# Load the metadata dataframe
hikaru_meta_df_raw = pd.read_csv('../Data/hikaru_metadata_dataframe.csv', index_col='Game_ID')
hikaru_meta_df = MetaDataCleaner.clean_metadata(hikaru_meta_df_raw)

## 2. Quick data check

### I. Metadata

In [14]:
display(hikaru_meta_df.iloc[9700:9703])

Unnamed: 0_level_0,Event,Site,Date,Round,White,Black,Result,CurrentPosition,Timezone,ECO,...,TimeControl,Termination,StartTime,EndDate,EndTime,Link,SetUp,FEN,Variant,Tournament
Game_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9701,Live Chess,Chess.com,NaT,,Hikaru,Keranke,1-0,3r2k1/p2P1p1p/6p1/4q3/8/1P3P2/P2Q2PP/3R2K1 b - -,UTC,A01,...,60,Hikaru won by resignation,0 days 16:32:22,NaT,0 days 16:33:32,https://www.chess.com/game/live/1628869910,,,,
9702,Live Chess,Chess.com,NaT,,Keranke,Hikaru,0-1,r1q1k1nr/ppp1pp2/3p2p1/1P5p/2P5/2bP1PP1/P4PKP/...,UTC,A10,...,60,Hikaru won by resignation,0 days 16:33:41,NaT,0 days 16:34:06,https://www.chess.com/game/live/1628871326,,,,
9703,Live Chess,Chess.com,NaT,,Hikaru,Keranke,1-0,r3r3/p1B2kpp/1p6/3P1p2/2P1n3/1P2P1PP/P1R3K1/R7...,UTC,A01,...,60,Hikaru won by resignation,0 days 16:34:16,NaT,0 days 16:35:17,https://www.chess.com/game/live/1628871923,,,,


In [15]:
print(hikaru_meta_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 50146 entries, 1 to 50146
Data columns (total 25 columns):
 #   Column           Non-Null Count  Dtype          
---  ------           --------------  -----          
 0   Event            50130 non-null  string         
 1   Site             50130 non-null  string         
 2   Date             0 non-null      datetime64[ns] 
 3   Round            0 non-null      Int64          
 4   White            50130 non-null  string         
 5   Black            50130 non-null  string         
 6   Result           50146 non-null  string         
 7   CurrentPosition  50130 non-null  string         
 8   Timezone         50130 non-null  string         
 9   ECO              49676 non-null  string         
 10  ECOUrl           49676 non-null  string         
 11  UTCDate          0 non-null      datetime64[ns] 
 12  UTCTime          50130 non-null  timedelta64[ns]
 13  WhiteElo         50130 non-null  Int64          
 14  BlackElo         50130 non-

## 3. Metadata Column Exploration

### I. Event

In [25]:
# Unique Event values
print(f'Unique event values: {hikaru_meta_df['Event'].nunique()}')
# event_count = hikaru_meta_df['Event'].value_counts()
# for count in event_count:
#     print(count)

print(f'Event value counts: \n{hikaru_meta_df['Event'].value_counts().head(7)}')

Unique event values: 20
Event value counts: 
Event
Live Chess                       48063
Live Chess - Odds Chess            777
Live Chess - Chess960              704
Let's Play!                        174
Live Chess - Three-Check           167
Live Chess - Crazyhouse            140
Live Chess - King of the Hill       91
Name: count, dtype: Int64
