## Data Preprocessing for Chess Evaluation Function

In [1]:
!pip install python-chess --upgrade python-chess

Requirement already up-to-date: python-chess in /usr/local/lib/python3.6/dist-packages (0.31.2)


In [2]:
import chess
import chess.pgn
import csv
import numpy as np
from google.colab import drive
import gc
import math
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# With enough RAM, save all positions and categorized scores from 50,000 games (about 4 million categorized positions)
gamerange = [45000, 50000]
numb = 10
winning_threshold = 15

In [4]:
# Convert board to bitmap for NN
def board_to_bitboard(board):
    bitboard = []
    for i in range(64):
        piece = str(board.piece_at(i))
        if piece == 'p':
            bitboard.append([-1, 0, 0, 0, 0, 0])
        if piece == 'r':
            bitboard.append([0, 0, 0, -1, 0, 0])
        if piece == 'n':
            bitboard.append([0, -1, 0, 0, 0, 0])
        if piece == 'b':
            bitboard.append([0, 0, -1, 0, 0, 0])
        if piece == 'q':
            bitboard.append([0, 0, 0, 0, -1, 0])
        if piece == 'k':
            bitboard.append([0, 0, 0, 0, 0, -1])
        if piece == 'P':
            bitboard.append([1, 0, 0, 0, 0, 0])
        if piece == 'R':
            bitboard.append([0, 0, 0, 1, 0, 0])
        if piece == 'N':
            bitboard.append([0, 1, 0, 0, 0, 0])
        if piece == 'B':
            bitboard.append([0, 0, 1, 0, 0, 0])
        if piece == 'Q':
            bitboard.append([0, 0, 0, 0, 1, 0])
        if piece == 'K':
            bitboard.append([0, 0, 0, 0, 0, 1])
        if piece == 'None':
            bitboard.append([0, 0, 0, 0, 0, 0])
    
    return bitboard

In [5]:
# Stockfish score parser
def get_scores():
    with open('/content/drive/My Drive/Colab Notebooks/Chess/data/stockfish.csv', newline='') as rawdata:
        scores = []
        corrupted = []

        datareader = csv.reader(rawdata, delimiter=',')

        i = 0
        for row in datareader:
            gamescores = row[1]
            if i > gamerange[0]:
              if len(gamescores.split(" ")) == 2:
                  print("Skipping game no. " + str(i) + ": corrupted")
                  corrupted.append(i)
                  i += 1
                  continue
              scores.extend(gamescores.split(" "))
            if i == gamerange[1]:
                break
            i += 1
          
    dfscores = []
    for i in range(0, len(scores)):
        if scores[i] == 'NA' or scores[i] == '':
            scores[i] = 0
        if abs(int(scores[i])) <= winning_threshold:
            scores[i] = 0
        if int(scores[i]) > winning_threshold:
          scores[i] = 1
        if int(scores[i]) < -winning_threshold:
          scores[i] = -1

        dfscores.append([int(scores[i])])

    dfscores = dfscores[:len(dfscores)]
    scores = []
    
    # Prevents err
    corrupted.append(0)

    return dfscores, corrupted

In [6]:
# Bitmap representation of each position
def get_boards(corrupted):
    boards = []
    pgn = open("/content/drive/My Drive/Colab Notebooks/Chess/data/data.pgn")
    count = 0

    for i in range(gamerange[1]):
        game = chess.pgn.read_game(pgn)
        board = game.board()
        skip = False

        if i >= gamerange[0]:
          # Skip corrupted games
          if i+1 == corrupted[count]:
            print("Skipping game no. " + str(i+1) + ": corrupted")
            count += 1
            skip = True

          for move in game.mainline_moves():
              board.push(move)
              if not skip:
                boards.append(board_to_bitboard(board))

          if i % 100 == 0:
              print("Up to game no. " + str(i) + " saved")
    
    return boards

In [7]:
scores, corrupted = np.asarray(get_scores())
positions = np.asarray(get_boards(corrupted))

print("Saving scores of len: " + str(len(scores)) + " to data/scores" + str(numb) + ".npy")
np.save("/content/drive/My Drive/Colab Notebooks/Chess/data/scores" + str(numb), scores)
scores = []
gc.collect()
print("Saving bitmaps of len: " + str(len(positions)) + " to data/positions" + str(numb) + ".npy")
np.save("/content/drive/My Drive/Colab Notebooks/Chess/data/positions" + str(numb), positions)
gc.collect()

print("Data saved successfully")

Skipping game no. 45455: corrupted
Skipping game no. 46212: corrupted
Skipping game no. 47760: corrupted
Skipping game no. 49380: corrupted
Skipping game no. 49761: corrupted
Skipping game no. 49970: corrupted
Up to game no. 45000 saved
Up to game no. 45100 saved
Up to game no. 45200 saved
Up to game no. 45300 saved
Up to game no. 45400 saved
Skipping game no. 45455: corrupted
Up to game no. 45500 saved
Up to game no. 45600 saved
Up to game no. 45700 saved
Up to game no. 45800 saved
Up to game no. 45900 saved
Up to game no. 46000 saved
Up to game no. 46100 saved
Up to game no. 46200 saved
Skipping game no. 46212: corrupted
Up to game no. 46300 saved
Up to game no. 46400 saved
Up to game no. 46500 saved
Up to game no. 46600 saved
Up to game no. 46700 saved
Up to game no. 46800 saved
Up to game no. 46900 saved
Up to game no. 47000 saved
Up to game no. 47100 saved
Up to game no. 47200 saved
Up to game no. 47300 saved
Up to game no. 47400 saved
Up to game no. 47500 saved
Up to game no. 476