In [1]:
import pandas as pd
import numpy as np

### Read Data

In [2]:
data = pd.read_csv("chessData.csv")

In [3]:
data.head()

Unnamed: 0,FEN,Evaluation
0,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,-10
1,rnbqkbnr/pppp1ppp/4p3/8/4P3/8/PPPP1PPP/RNBQKBN...,56
2,rnbqkbnr/pppp1ppp/4p3/8/3PP3/8/PPP2PPP/RNBQKBN...,-9
3,rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPP2PPP/RNBQKB...,52
4,rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPPN1PPP/R1BQK...,-26


In [4]:
data["FEN"][0]

'rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1'

In [48]:
def rank_to_vars(rank):
    lst_rank = list(rank)
    vars_rank = [value if not value.isnumeric() else int(value)*[None] for value in lst_rank ]
    vars_rank = [item for sublist in vars_rank for item in sublist]
    
    return vars_rank
    
    
def board_to_vars(board, var_names):
    ranks = board.split("/")
    vars_board = []
    for rank in ranks:
        vars_rank = rank_to_vars(rank)
        vars_board.append(vars_rank)
    vars_board = np.array(vars_board)
    return pd.Series(vars_board.flatten(), index=var_names)
    
def turn_parsing(turn):
    whiteToMove = 1 if turn == 'w' else 0
    return pd.Series(np.array([whiteToMove]), index=["WhiteToMove"])

def castling_rights_parsing(castling_rights):
    castling_var_names = ['WhiteCanCastleKS', 'WhiteCanCastleQS', 'BlackCanCastleKS', 'BlackCanCastleQS']
    castling_idx_dict = {
        'K': 0,
        'Q': 1,
        'k': 2,
        'q': 3
    }
    if castling_rights == '-':
        return pd.Series(4*[0], index=castling_var_names)
    else:
        castling_vars = 4*[0]
        lst_castling_rights = list(castling_rights)
        for value in lst_castling_rights:
            idx = castling_idx_dict[value]
            castling_vars[idx] = 1
        castling_vars = np.array(castling_vars)
        
        return pd.Series(castling_vars, index=castling_var_names)

def enpassant_square_parsing(enpassant_square):
    fileToCol = {
        "a": 0,
        "b": 1,
        "c": 2,
        "d": 3,
        "e": 4,
        "f": 5,
        "g": 6,
        "h": 7
    }
    if enpassant_square == '-':
        return pd.Series([None], index=['EnPassantSquare'])
    else:
        col = fileToCol[enpassant_square[0]]
        row = 8 - int(enpassant_square[1])
        idx = row*8 + col
        
        return pd.Series([idx], index=['EnPassantSquare'])
    
    
    
def fen_to_vars(fen):
    fen_pieces = fen.split(" ")
    board = fen_pieces[0]
    turn = fen_pieces[1]
    castling_rights = fen_pieces[2]
    enpassant_square = fen_pieces[3]
    halfmove_clock = fen_pieces[4]
    fullmove_counter = fen_pieces[5]
    
    
    board_var_names = ["Sq_{}".format(i) for i in range(64)]
    vars_board = board_to_vars(board, board_var_names)
    whitetoMove = turn_parsing(turn)
    castling_vars = castling_rights_parsing(castling_rights)
    enpassant_square_var = enpassant_square_parsing(enpassant_square)
    move_counter_var = pd.Series([int(fullmove_counter)], index=["MoveCounter"])
    
    return pd.concat([vars_board, whitetoMove, castling_vars, enpassant_square_var, move_counter_var])
    
    
    
    
    

In [49]:
fen = 'rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1'
board = 'rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR'
rank = '4P3'
castling_rights = 'Kkq'

enpassant_square = "e3"
fen_to_vars(fen)

Sq_0                   r
Sq_1                   n
Sq_2                   b
Sq_3                   q
Sq_4                   k
                    ... 
WhiteCanCastleQS       1
BlackCanCastleKS       1
BlackCanCastleQS       1
EnPassantSquare     None
MoveCounter            1
Length: 71, dtype: object

In [50]:
data.shape

(12958035, 2)

In [51]:
data_prepared = data['FEN'].apply(fen_to_vars)

In [None]:
data_prepared["Evaluation"] = data["Evaluation"]