# 1 Preliminaries
## 1.1 Import
Import required libraries.

In [69]:
import os
import math
import numpy as np
from keras import callbacks, models, layers
from xgboost import XGBRegressor
import pandas as pd
import chess.pgn

## 1.2 Load Data
Load the pgn file and create DataFrame. It should have three columns:
- Color
- Board
- Move

In [70]:
def board_to_dataframe(board):
    piece_to_value = {
        chess.PAWN: 1,
        chess.KNIGHT: 2,
        chess.BISHOP: 3,
        chess.ROOK: 4,
        chess.QUEEN: 5,
        chess.KING: 6,
    }
    res = {}
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        
        if piece:
            value = piece_to_value.get(piece.piece_type, 0)
            if piece.color == chess.BLACK:
                value = -value
            res[square] = value
        else:
            res[square] = 0
    return res

def space_to_int(space):
    key = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8}
    return key[space[0]]*10+int(space[1])

def read_data(file):
    res = []
    while True:
        try:
            game = chess.pgn.read_game(file)
            
            if game is None:
                break
                
            board = game.board()
            
            color = "white" if game.headers["White"] == file.name[:-4] else "black"
            
            for move in game.mainline_moves():
                if(color == "white" and board.turn == chess.WHITE) or (color == "black" and board.turn == chess.BLACK):
                    thing = board_to_dataframe(board)
                    thing['color'] = color
                    thing['start'] = space_to_int(str(move)[:2])
                    thing['end'] = space_to_int(str(move)[2:])
                    res.append(thing)
                board.push(move)
        except Exception as e:
            print(e)
    return res

data = []
# directory = 'data'
# for filename in os.listdir(directory):
#     f = os.path.join(directory, filename)
#     
#     if os.path.isfile(f) and filename[-3:] == "pgn":
#         with open(f, 'r') as pgn:
#             print(f)
#             data.append(read_data(pgn))
      
with open('data/damnsaltythatsport.pgn', 'r') as pgn:
    data.extend(read_data(pgn))
            
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,57,58,59,60,61,62,63,color,start,end
0,4,2,3,5,6,3,2,4,1,1,...,-2,-3,-5,-6,-3,-2,-4,black,78,66
1,4,2,3,5,6,3,2,4,1,1,...,-2,-3,-5,-6,-3,0,-4,black,57,56
2,4,0,3,5,6,3,2,4,1,1,...,-2,-3,-5,-6,-3,0,-4,black,68,57
3,4,0,3,5,6,3,2,4,1,1,...,-2,-3,-5,-6,0,0,-4,black,58,78
4,4,0,3,5,6,0,2,4,1,1,...,-2,-3,-5,0,-4,-6,0,black,47,46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2669,0,0,0,0,0,4,0,0,1,1,...,0,0,0,-6,0,0,-4,black,58,78
2670,0,0,0,0,0,4,0,0,1,1,...,0,0,0,0,-4,-6,0,black,27,25
2671,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,-4,-6,0,black,68,58
2672,0,0,0,0,0,0,0,0,1,1,...,0,0,0,-4,0,-6,0,black,44,71


## 1.3 Visualize Data
Visualize the board to make sure we're chilling.


In [71]:
def board_to_val(board):
    char_to_piece = {
        '-1': 'p', '-2': 'r', '-3': 'n', '-4': 'b', '-5': 'q', '-6': 'k',
        '1': 'P', '2': 'R', '3': 'N', '4': 'B', '5': 'Q', '6': 'K',
        '0': '.'
    }
    res = []
    for row in board:
        row_res = []
        for val in row:
            row_res.append(char_to_piece[val])
        res.append(row_res)
    return np.array(res)

def int_to_space(space):
    key = {1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h'}
    return key[math.floor(space/10)]+str(space%10)

# for i in range(5):
#     print(board_to_val(df['board'][i]))
#     print(int_to_space(df['start'][i]) + int_to_space(df['end'][i]))
#     print('\n')

## 1.4 Clean Data
Clean data.

In [72]:
df['color'] = df['color'].apply(lambda x: 1 if x == 'white' else 0)
# df['move'] = df['move'].astype('category')
df.dtypes

0        int64
1        int64
2        int64
3        int64
4        int64
         ...  
62       int64
63       int64
color    int64
start    int64
end      int64
Length: 67, dtype: object

## 1.5 Split Data

In [73]:
df_train = df.sample(frac=0.8)
df_valid = df.drop(df_train.index)

X_train = df_train.drop(['color', 'start', 'end'], axis=1)
X_valid = df_valid.drop(['color', 'start', 'end'], axis=1)
y_train = df_train[['start', 'end']]
y_valid = df_valid[['start', 'end']]

print(X_train.head())
print(y_train.head())

      0   1   2   3   4   5   6   7   8   9   ...  54  55  56  57  58  59  60  \
1449   4   0   3   4   0   0   0   6   1   1  ...  -1  -1   0   0   0  -4   0   
332    0   0   0   0   0   0   0   0   0  -2  ...  -1  -1   0   0   0   0   0   
2102   4   0   0   0   0   4   6   0   1   0  ...  -1   0   0  -4   0  -5   0   
2113   0   0   0   0   0   4   0   0   1   0  ...  -1   0   0   0   0  -2   0   
1254   0   0  -4   0   0   0   0   0   0   0  ...   0   4   0   0   0   0   0   

      61  62  63  
1449  -4  -6   0  
332    0   0   0  
2102  -6   0  -4  
2113  -4  -6   0  
1254   0   0   0  

[5 rows x 64 columns]
      start  end
1449     48   41
332      22   14
2102     68   78
2113     57   55
1254     57   46


# 2 Train Model
## 2.1 Create Model

In [74]:
# early_stopping = callbacks.EarlyStopping(
#     min_delta=0.001,  # minimium amount of change to count as an improvement
#     patience=20,  # how many epochs to wait before stopping
#     restore_best_weights=True,
# )

model = XGBRegressor(n_estimators=500, learning_rate=0.05, n_jobs=4, early_stopping_rounds=5)

# Compile the model
# model.compile(optimizer='adam', 
#               loss={'start_square': 'categorical_crossentropy', 'end_square': 'categorical_crossentropy'},
#               metrics=['accuracy'])


## 2.2 Fit Model

In [76]:
# history = model.fit(
#     X_train, y_train,
#     validation_data=(X_valid, y_valid),
#     batch_size=1,
#     epochs=500,
#     callbacks=[early_stopping],
#     verbose=0,
# )

model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], verbose=False)

# history_df = pd.DataFrame(history.history)
# history_df.loc[:, ['loss', 'val_loss']].plot()

# model.save('deeplearningmodel.h5')