# **Setup**

### **Libraries**

In [15]:
import pandas as pd
from tqdm.auto import tqdm 
import re 

### **Data**

In [4]:
games_df = pd.read_csv('../data/processed_2019_06_df.csv')
games_df = games_df[['user_name', 'user_elo', 'opening_code', 'opening_name', 'pgn']]

print('Number of Rows :', len(games_df))

games_df.head(3)

Number of Rows : 97255


Unnamed: 0,user_name,user_elo,opening_code,opening_name,pgn
0,bkrrhanife2,1772,D02,Queen's Pawn Game: London System,1. d4 { [%eval 0.25] [%clk 0:10:00] } 1... d5 ...
1,Chessbullets,1828,B08,"Pirc Defense: Classical Variation, Schlechter ...",1. e4 { [%eval 0.24] [%clk 0:10:00] } 1... g6 ...
2,Tweaker,1407,C21,Danish Gambit,1. e4 { [%eval 0.24] [%clk 0:00:30] } 1... e5 ...


# **Code**

### **Utils**

In [16]:
def get_substring(start_char, end_char, text):
    '''
    Helper function for unpack_game()
    '''

    search_string = start_char + '(.+?)' + end_char
    result = re.search(search_string, text)

    if result:
        return result.group(1)
    else:
        return None 

### **Data Exploration**

#### Unpacking PGN 

In [17]:
sample_pgn = games_df.iloc[5]['pgn']
moves = sample_pgn.split('. ')
moves[0:5]

['1',
 'e4 { [%eval 0.24] [%clk 0:03:00] } 1..',
 'c5 { [%eval 0.2] [%clk 0:03:00] } 2',
 'Nf3 { [%eval 0.21] [%clk 0:02:58] } 2..',
 'Nc6 { [%eval 0.08] [%clk 0:02:59] } 3']

In [18]:
moves[-5:]

['Qh1+ { [%eval -68.09] [%clk 0:00:34] } 36',
 'Rh2 { [%eval -27.75] [%clk 0:00:15] } 36..',
 'Qf1+ { [%eval -15.03] [%clk 0:00:32] } 37',
 'Rg2 { [%eval -13.73] [%clk 0:00:12] } 37..',
 'Re2 { [%eval -12.83] [%clk 0:00:27] } 0-1\n']

In [19]:
def unpack_moves(pgn_str):
    ''' 
    Returns a list of tuples, where each tuples is 
    (move, evaluation at move)
    '''
    moves = pgn_str.split('. ')[1:]
    unpacked_moves = []
    for move_str in moves:
        move = get_substring('', ' {', move_str)
        eval = get_substring('eval ', '] ', move_str)

        if eval == None:
            pass
        elif '#' in eval:
            eval = float(eval[1:])
        else:
            eval = float(eval)
        
        unpacked_moves.append((move, eval))

    return unpacked_moves

In [42]:
sample_unpacked_moves = unpack_moves(sample_pgn)
sample_unpacked_moves[:5]

[('e4', 0.24), ('c5', 0.2), ('Nf3', 0.21), ('Nc6', 0.08), ('Bb5', 0.0)]

#### Feature Extraction

For a single tuple

In [43]:
def get_square(move_str):
    for i, c in enumerate(move_str):
        if c.isdigit():
            return move_str[i-1:i+1]
    
    return '!' 

def get_move_attributes(move_tuple):
    move = move_tuple[0]
    if move == None:
        return ('!', 0, 0, 0)

    capture = int('x' in move)
    check = int(('+' in move) or ('#' in move))
    pawn_density = 0 

    piece = move[0]
    if piece not in ['K', 'Q', 'R', 'B', 'N']:
        piece = 'P'

    square = get_square(move)

    central_squares = ['c3', 'c4', 'c5', 'c6', 
                       'd3', 'd4', 'd5', 'd6',  
                       'e3', 'e4', 'e5', 'e6',
                       'f3', 'f4', 'f5', 'f6']

    if piece == 'P' and square in central_squares:
        pawn_density += 1

    return (piece, capture, check, pawn_density)

In [44]:
get_move_attributes(('exe4+', 0.1))

('P', 1, 1, 1)

For a list of tuples

In [56]:
def get_move_attributes_game(move_lst):
    piece_movements = {'K': 0, 'Q': 0, 'R': 0, 'B': 0, 'N': 0, 'P': 0}
    captures = 0
    checks = 0
    pawn_density = 0

    attributes_5 = [0,0,0,0,0,0,0,0,0]
    attributes_10 = [0,0,0,0,0,0,0,0,0]
    attributes_15 = [0,0,0,0,0,0,0,0,0]
    attributes_final = []

    for move_num, move in enumerate(move_lst):
        piece, capture, check, pawn_density = get_move_attributes(move)

        if piece == '!':
            continue 

        piece_movements[piece] += 1
        captures += capture
        checks += check
        pawn_density += pawn_density

        if move_num == 4:
            attributes_5 = [piece_movements[piece] for piece in piece_movements.keys()]
            attributes_5 += [captures, checks, pawn_density]
        if move_num == 9:
            attributes_10 = [piece_movements[piece] for piece in piece_movements.keys()]
            attributes_10 += [captures, checks, pawn_density]
        if move_num == 14:
            attributes_15 = [piece_movements[piece] for piece in piece_movements.keys()]
            attributes_15 += [captures, checks, pawn_density]

    attributes_final = [piece_movements[piece] for piece in piece_movements.keys()]
    attributes_final += [captures, checks, pawn_density]

    return attributes_5, attributes_10, attributes_15, attributes_final

In [57]:
get_move_attributes_game(sample_unpacked_moves)

([0, 0, 0, 1, 2, 2, 0, 0, 0],
 [0, 0, 0, 3, 2, 5, 2, 0, 0],
 [0, 1, 0, 3, 2, 9, 4, 0, 0],
 [2, 19, 16, 11, 7, 19, 16, 4, 0])

#### Applying to Pandas DF 

Unpack Moves

In [30]:
games_df['unpacked_moves'] = games_df.apply(lambda row: unpack_moves(row['pgn']), axis = 1)
games_df[['unpacked_moves']].head(3)

Unnamed: 0,unpacked_moves
0,"[(d4, 0.25), (d5, 0.25), (Bf4, 0.0), (Nf6, 0.0..."
1,"[(e4, 0.24), (g6, 0.46), (d4, 0.28), (Bg7, 0.3..."
2,"[(e4, 0.24), (e5, 0.37), (d4, -0.11), (exd4, 0..."


Get move attributes

In [58]:
games_df['game_attributes'] = games_df.apply(lambda row: get_move_attributes_game(row['unpacked_moves']), axis = 1)
games_df[['game_attributes']].head(3)

Unnamed: 0,game_attributes
0,"([0, 0, 0, 1, 2, 2, 0, 0, 0], [0, 0, 1, 2, 2, ..."
1,"([0, 0, 0, 1, 1, 3, 0, 0, 0], [0, 0, 0, 1, 4, ..."
2,"([0, 0, 0, 0, 0, 5, 1, 0, 2], [0, 2, 0, 1, 0, ..."


#### Featurized Dataframe

In [63]:
games_df['move5'], games_df['move10'], games_df['move15'], games_df['final'] = zip(*games_df['game_attributes'])
games_df['move5_K'], games_df['move5_Q'], games_df['move5_R'], games_df['move5_B'], games_df['move5_N'], games_df['move5_P'], games_df['move5_captures'], games_df['move5_checks'], games_df['move5_pawn_density'] = zip(*games_df['move5'])
games_df['move10_K'], games_df['move10_Q'], games_df['move10_R'], games_df['move10_B'], games_df['move10_N'], games_df['move10_P'], games_df['move10_captures'], games_df['move10_checks'], games_df['move10_pawn_density'] = zip(*games_df['move10'])
games_df['move15_K'], games_df['move15_Q'], games_df['move15_R'], games_df['move15_B'], games_df['move15_N'], games_df['move15_P'], games_df['move15_captures'], games_df['move15_checks'], games_df['move15_pawn_density'] = zip(*games_df['move15'])

feature_df = games_df[['user_name', 'user_elo', 'opening_code', 'opening_name', 'move5_K', 'move5_Q', 'move5_R', 'move5_B', 'move5_N', 'move5_P', 'move5_captures', 'move5_checks', 'move5_pawn_density', 'move10_K', 'move10_Q', 'move10_R', 'move10_B', 'move10_N', 'move10_P', 'move10_captures', 'move10_checks', 'move10_pawn_density', 'move15_K', 'move15_Q', 'move15_R', 'move15_B', 'move15_N', 'move15_P', 'move15_captures', 'move15_checks', 'move15_pawn_density']]
feature_df.head(3)

Unnamed: 0,user_name,user_elo,opening_code,opening_name,move5_K,move5_Q,move5_R,move5_B,move5_N,move5_P,...,move10_pawn_density,move15_K,move15_Q,move15_R,move15_B,move15_N,move15_P,move15_captures,move15_checks,move15_pawn_density
0,bkrrhanife2,1772,D02,Queen's Pawn Game: London System,0,0,0,1,2,2,...,0,0,3,2,3,2,5,4,1,0
1,Chessbullets,1828,B08,"Pirc Defense: Classical Variation, Schlechter ...",0,0,0,1,1,3,...,0,0,0,0,2,4,9,1,0,2
2,Tweaker,1407,C21,Danish Gambit,0,0,0,0,0,5,...,2,0,3,0,2,1,9,2,1,0


#### Write to CSV 

In [68]:
feature_df.to_csv('../data/feature_df.csv', index = False)