# **Setup**

### **Libraries**

In [51]:
import pandas as pd
from tqdm.auto import tqdm 

### **Data**

In [57]:
games_df = pd.read_csv('../data/processed_2019_06_df.csv')
games_df = games_df[['user_name', 'user_elo', 'opening_code', 'opening_name', 'pgn']]
games_df.head(3)

Unnamed: 0,user_name,user_elo,opening_code,opening_name,pgn
0,bkrrhanife2,1772,D02,Queen's Pawn Game: London System,1. d4 { [%eval 0.25] [%clk 0:10:00] } 1... d5 ...
1,Chessbullets,1828,B08,"Pirc Defense: Classical Variation, Schlechter ...",1. e4 { [%eval 0.24] [%clk 0:10:00] } 1... g6 ...
2,Tweaker,1407,C21,Danish Gambit,1. e4 { [%eval 0.24] [%clk 0:00:30] } 1... e5 ...


In [61]:
len(games_df)

97255

# **Code**

### **Utils**

In [28]:
def get_substring(start_char, end_char, text):
    '''
    Helper function for unpack_game()
    '''

    search_string = start_char + '(.+?)' + end_char
    result = re.search(search_string, text)

    if result:
        return result.group(1)
    else:
        return None 

### **Data Exploration**

#### Unpacking PGN 

In [17]:
sample_pgn = games_df.iloc[5]['pgn']
moves = sample_pgn.split('. ')
moves[0:5]

['1',
 'e4 { [%eval 0.24] [%clk 0:03:00] } 1..',
 'c5 { [%eval 0.2] [%clk 0:03:00] } 2',
 'Nf3 { [%eval 0.21] [%clk 0:02:58] } 2..',
 'Nc6 { [%eval 0.08] [%clk 0:02:59] } 3']

In [20]:
moves[-5:]

['Qh1+ { [%eval -68.09] [%clk 0:00:34] } 36',
 'Rh2 { [%eval -27.75] [%clk 0:00:15] } 36..',
 'Qf1+ { [%eval -15.03] [%clk 0:00:32] } 37',
 'Rg2 { [%eval -13.73] [%clk 0:00:12] } 37..',
 'Re2 { [%eval -12.83] [%clk 0:00:27] } 0-1\n']

In [48]:
def unpack_moves(pgn_str):
    ''' 
    Returns a list of tuples, where each tuples is 
    (move, evaluation at move)
    '''
    moves = pgn_str.split('. ')[1:]
    unpacked_moves = []
    for move_str in moves:
        move = get_substring('', ' {', move_str)
        eval = get_substring('eval ', '] ', move_str)

        if eval == None:
            pass
        elif '#' in eval:
            eval = float(eval[1:])
        else:
            eval = float(eval)
        
        unpacked_moves.append((move, eval))

    return unpacked_moves

In [49]:
sample_unpacked_moves = unpack_moves(sample_pgn)
sample_unpacked_moves[:5]

[('e4', 0.24), ('c5', 0.2), ('Nf3', 0.21), ('Nc6', 0.08), ('Bb5', 0.0)]

#### Applying to Pandas DF 

In [64]:
games_df['unpacked_moves'] = games_df.apply(lambda row: unpack_moves(row['pgn']), axis = 1)
games_df[['unpacked_moves']].head(3)

Unnamed: 0,unpacked_moves
0,"[(d4, 0.25), (d5, 0.25), (Bf4, 0.0), (Nf6, 0.0..."
1,"[(e4, 0.24), (g6, 0.46), (d4, 0.28), (Bg7, 0.3..."
2,"[(e4, 0.24), (e5, 0.37), (d4, -0.11), (exd4, 0..."
