### Create a dataframe of game information which will be used in testing my chess implementation

In [1]:
from constants import *
from notation_interpreter import NotationInterpreter

import pandas as pd
from tqdm import tqdm

##### reading games that were originally in csv format:

In [2]:
games_df = pd.read_csv('game_data/games.csv')

In [3]:
print("All columns:", games_df.columns)
games_df['moves'].head()

All columns: Index(['id', 'rated', 'created_at', 'last_move_at', 'turns', 'victory_status',
       'winner', 'increment_code', 'white_id', 'white_rating', 'black_id',
       'black_rating', 'moves', 'opening_eco', 'opening_name', 'opening_ply'],
      dtype='object')


0    d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5...
1    d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6...
2    e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc...
3    d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O...
4    e4 e5 Nf3 d6 d4 Nc6 d5 Nb4 a3 Na6 Nc3 Be7 b4 N...
Name: moves, dtype: object

I only care about the 'moves' column

In [4]:
games_df.iloc[25]['moves']

'd4 d5 Nc3 Nf6 Bf4 Bf5 Nf3 e6 e3 Be7 Bb5+ c6 Bd3 Bxd3 Qxd3 O-O Bg5 Ng4 Bxe7 Qxe7 O-O Qg5 h3 Qf6 hxg4 Qh6 Na4 b5 Nc5 Kh8 Nb7 g6 Nd6 Na6 Ne5 Kg8 Ndxf7 Rxf7 Nxf7 Kxf7 f4 g5 fxg5+ Kg7 gxh6+ Kxh6 Rf6+ Kg7 g5 h6 Qg6+ Kh8 Qxh6+ Kg8 Rxe6 Kf7 Qf6+ Kg8 Re8+ Rxe8 Qg6+ Kf8 Qh6+ Kg8 g6 Nb4 Rf1 Nxc2 Qh7#'

I don't need to worry about the 'moves' string containing things that aren't moves

#### Parsing moves:

In [5]:
notation_buddy = NotationInterpreter()

def parse_moves_from_row(moves):
    game_moves = []
    for turn, move in enumerate(moves.split()):
        move_data = notation_buddy.parse_move(move, turn)
        game_moves.append(move_data)
    return game_moves

def create_move_df(game_df):
    df_precursor = []
    for game_id, row in tqdm(game_df['moves'].items()):
        game_moves = parse_moves_from_row(row)
        
        # unnecessary loop, but probably not too bad if I only ever run it once
        for move_data in game_moves:
            move_data['game_id'] = game_id
        
        df_precursor.extend(game_moves)
    
    return pd.DataFrame(df_precursor)

In [6]:
move_df = create_move_df(games_df)



20058it [00:06, 3243.22it/s]


In [7]:
move_df

Unnamed: 0,piece_type,piece_indicator,to_square,is_take,is_check,is_checkmate,is_promotion,is_king_side_castle,is_queen_side_castle,turn,team,game_id
0,p,"(None, None)","(4, 4)",False,False,False,False,False,False,0,white,0
1,p,"(None, None)","(4, 5)",False,False,False,False,False,False,1,black,0
2,p,"(None, None)","(3, 4)",False,False,False,False,False,False,2,white,0
3,p,"(None, None)","(3, 6)",False,False,False,False,False,False,3,black,0
4,p,"(3, None)","(4, 5)",True,False,False,False,False,False,4,white,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1212822,q,"(None, None)","(1, 6)",False,True,False,False,False,False,73,black,20057
1212823,k,"(None, None)","(4, 7)",False,False,False,False,False,False,74,white,20057
1212824,q,"(None, None)","(2, 5)",False,True,False,False,False,False,75,black,20057
1212825,k,"(None, None)","(4, 8)",False,False,False,False,False,False,76,white,20057


#### Looks good

In [14]:
pd.to_pickle(move_df, 'test_data/game_info')