In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import chess

In [None]:
"""
12 types of pieces per 64 squares, 
#16 squares to encode e.p. moves
#4 castling rules
#1 for side to move (0 for white, 1 for black)
"""

squares = 64
features_per_square = 12
#ep_square_features = 16
#castling_rights_features = 4
#side_to_move_features = 1

total_number_of_features = squares * features_per_square #\
#                           + ep_square_features \
#                           + castling_rights_features \
#                           + side_to_move_features

In [None]:
def read_dataset(filepath):
    df = pd.read_csv(filepath)
    return df

In [None]:
def chess_piece_index(piece, board):
    index = int(piece.piece_type) - 1
    if (piece.color != board.turn):
        index += 6
    return index

In [None]:
def en_passant_square_index(square):
    index = chess.square_file(square)
    if (chess.square_rank(square) != 2):
        index += 8
    return index

In [None]:
def row_from_fen(fen):
    board = chess.Board(fen)
    row = [0 for i in range(total_number_of_features)]

    for square in chess.SQUARES:
        if board.piece_at(square) != None:
            feature_index = int(square) * features_per_square \
                            + chess_piece_index(board.piece_at(square), board)
            row[feature_index] = 1
                
    return np.asarray(row, dtype=bool)

In [None]:
def transform_evaluation(eval):
    checkmate_score = 10000
    loss_per_move_to_checkmate = 100
    evaluation = 0
    
    if eval[0] == '\ufeff':
        eval = eval[1:]
    
    if eval[0] == '#':
        moves_to_checkmate = int(eval[2:])
        sign = 1 if eval[1] == '+' else -1
        evaluation = sign * (checkmate_score - (abs(moves_to_checkmate) - 1) * loss_per_move_to_checkmate)
    else:
        evaluation = int(eval)
        
    #normalize    
    evaluation = np.clip(evaluation, -checkmate_score, checkmate_score)
    return evaluation / 10000.0

In [None]:
def bytes_list_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [None]:
def float32_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

In [None]:
def position_example(position, y):
    feature = {'feature': bytes_list_feature(position)}
    feature['y'] = float32_feature(y)
    return tf.train.Example(features=tf.train.Features(feature=feature))

In [None]:
def parse_dataset(record):
    features_names = {'f'+str(i): tf.io.FixedLenFeature([], tf.byte) for i in range(total_number_of_features)}
    output_name = {'y' : tf.io.FixedLenFeature([], tf.float32)}
    features = tf.io.parse_example([record], features=features_names)
    output = tf.io.parse_example([record], features=output_name)
    return features, output

In [None]:
filename = "~/university/SlowChessEngine/nn/chessData.csv"
df = read_dataset(filename)

In [None]:
batch_size = 10000000
batch_index = 1

for index in df.index:
    
    if index % batch_size == 0:
        if batch_index != 1:
            writer.close()
        writer = tf.io.TFRecordWriter(f'/home/mikhail/university/SlowChessEngine/nn/chess_dataset{batch_index}.tfrecord')
        batch_index += 1
    
    evaluation = transform_evaluation(df["Evaluation"][index])
    features = row_from_fen(df["FEN"][index]).tobytes()
    #iterate through csv
    ex = position_example(features, evaluation)
    writer.write(ex.SerializeToString())

writer.close()
