## Data Preprocessing for Chess Evaluation Function

In [1]:
!pip install python-chess --upgrade python-chess

Requirement already up-to-date: python-chess in /usr/local/lib/python3.6/dist-packages (0.31.2)


In [0]:
import chess
import chess.pgn
import csv
import numpy as np
from google.colab import drive

In [0]:
#drive.mount('/content/drive')

In [4]:
# With enough RAM, save all positions and categorized scores from 50,000 games (about 4 million categorized positions)
gamecount = 1500

# Convert board to bitmap for NN
def board_to_bitboard(board):
    bitboard = []
    for i in range(64):
        piece = str(board.piece_at(i))
        if piece == 'p':
            bitboard.append([-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
        if piece == 'r':
            bitboard.append([0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0])
        if piece == 'n':
            bitboard.append([0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
        if piece == 'b':
            bitboard.append([0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
        if piece == 'q':
            bitboard.append([0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0])
        if piece == 'k':
            bitboard.append([0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0])
        if piece == 'P':
            bitboard.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])
        if piece == 'R':
            bitboard.append([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0])
        if piece == 'N':
            bitboard.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])
        if piece == 'B':
            bitboard.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])
        if piece == 'Q':
            bitboard.append([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
        if piece == 'K':
            bitboard.append([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0])
        if piece == 'None':
            bitboard.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    return bitboard


# stockfish scores
def get_scores():
    with open('/content/drive/My Drive/Colab Notebooks/Chess/data/stockfish.csv', newline='') as rawdata:
        scores = []
        gamelengths = []
        datareader = csv.reader(rawdata, delimiter=',')

        i = 0
        for row in datareader:
            gamescores = row[1]
            if len(gamescores.split(" ")) == 2:
                print("Skipping game no. " + str(i) + ": corrupted")
                i += 1
                continue
            scores.extend(gamescores.split(" "))
            gamelengths.append(len(gamescores.split(" ")))
            i += 1
            if i > gamecount:
                break

    dfscores = []
    for i in range(1, len(scores)):
        if scores[i] == 'NA' or scores[i] == '':
            scores[i] = 0
        if int(scores[i]) > 10:
            scores[i] = 10
        if int(scores[i]) < -10:
            scores[i] = -10
        dfscores.append([int(scores[i])])
    dfscores = dfscores[:len(dfscores)]
    # Prevents err
    for i in range(100):
        gamelengths.append(0)

    return dfscores, gamelengths


# Bitmap representation of each position
def get_boards(gamelengths):
    global scores
    boards = []
    pgn = open("/content/drive/My Drive/Colab Notebooks/Chess/data/data.pgn")
    score_ind = 0

    for i in range(gamecount):
        game = chess.pgn.read_game(pgn)
        board = game.board()

        for move in game.mainline_moves():
            board.push(move)
            boards.append(board_to_bitboard(board))
          
        # Skip corrupted games
        length = (board.fullmove_number)*2 - 2 
        if int(length) == 0:
          print("Skipping game no. " + str(i+1) + ": corrupted")
          boards = boards[:score_ind]
          continue

        if i % 100 == 0:
            print("Up to game no. " + str(i) + " saved")
        
        score_ind += gamelengths[i+1]
    
    return boards

scores, gamelengths = np.asarray(get_scores())
positions = np.asarray(get_boards(gamelengths))

print("Saving scores of len: " + str(len(scores)) + " to data/scores.npy")
np.save("/content/drive/My Drive/Colab Notebooks/Chess/data/scores", scores)
print("Saving bitmaps of len: " + str(len(positions)) + " to data/positions.npy")
np.save("/content/drive/My Drive/Colab Notebooks/Chess/data/positions", positions)

print("Data saved successfully")

Skipping game no. 625: corrupted
Up to game no. 0 saved
Up to game no. 100 saved
Up to game no. 200 saved
Up to game no. 300 saved
Up to game no. 400 saved
Up to game no. 500 saved
Up to game no. 600 saved
Skipping game no. 625: corrupted
Up to game no. 700 saved
Up to game no. 800 saved
Up to game no. 900 saved
Up to game no. 1000 saved
Up to game no. 1100 saved
Up to game no. 1200 saved
Up to game no. 1300 saved
Up to game no. 1400 saved
Saving scores of len: 122241 to data/scores.npy
Saving bitmaps of len: 122241 to data/positions.npy
Data saved successfully
