In [1]:
!pip install kaggle



In [13]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"devshah2021","key":"7ca39cb5f414dd2979e6b8aa38f5f8dd"}'}

In [14]:
!mkdir ~/.kaggle

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [15]:
! cp kaggle.json ~/.kaggle/

In [16]:
! chmod 600 ~/.kaggle/kaggle.json

In [17]:
! kaggle datasets download arevel/chess-games

Downloading chess-games.zip to /content
100% 1.45G/1.45G [01:04<00:00, 27.8MB/s]
100% 1.45G/1.45G [01:04<00:00, 23.9MB/s]


In [18]:
!pip install chess -q

In [19]:
letter_2_num = {'a': 0, 'b': 1, 'c': 2, 'd':3, 'e':4, 'f':5, 'g':6, 'h':7}

In [20]:
num_2_letter = {0: 'a', 1:'b', 2:'c', 3:'d', 4:'e', 5:'f', 6:'g', 7:'h'}

we will represent the pawns as 1 or -1 depending on whether or not it's black or white. this helps us turn a chess-board to a matrix

In [21]:
import numpy as np
import re

In [22]:
def create_rep_layer(board, type):
  s = str(board) #get string of board
  s = re.sub(f'[^{type}{type.upper()} \n]', '.', s) #first we replace everything except our desired piece with a period
  s = re.sub(f'{type}', '-1', s) #replace black with -1
  s = re.sub(f' {type.upper()}', '1', s) #replace whites with 1
  s = re.sub(f'\.', '0', s) #replace the dots with 0s
  board_mat = []
  for row in s.split('\n'):
    row = row.split(' ')
    row = [int(x) for x in row] #replace string numbers with integers
    board_mat.append(row) # add to matrix
  return np.array(board_mat) #return numpy matrix

In [23]:
def board2rep(board):
  pieces = ['p', 'r', 'n', 'b', 'q', 'k'] ## chess pieces
  layers = []
  for piece in pieces:
    layers.append(create_rep_layer(board, piece)) #create feature map for each type of piece
  board_rep = np.stack(layers) #create a 3d tensor (which we will give to the CNN)
  return board_rep

In [24]:
def move_2_rp(move, board):
  board.push_san(move).uci() # converts the board into UCI format
  # for ex: d4e5 => this implies take the piece in position d4 and move to e5
  move = str(board.pop()) #

  from_output_layer = np.zeros((8,8))
  from_row = 8 - int(move[1])
  from_column = letter_2_num[move[0]]
  from_output_layer[from_row, from_column] = 1

  to_output_layer = np.zeros((8,8))
  to_row = 8 - int(move[3])
  tow_column = letter_2_num[move[2]]
  to_output_layer[to_row, tow_column] = 1

  return np.stack([from_output_layer, to_output_layer])


In [25]:
def create_move_list(s):
  return re.sub('\d*\. ', '',s).split(' ')[:-1]
  # this will give us a list of moves which we can loop through
  # and convert into matrix representation

In [26]:
import pandas as pd

In [27]:
!unzip /content/chess-games.zip

Archive:  /content/chess-games.zip
  inflating: chess_games.csv         


In [29]:
chess_data_raw = pd.read_csv('/content/chess_games.csv', usecols=['AN','WhiteElo'])

In [30]:
chess_data = chess_data_raw[chess_data_raw['WhiteElo']>2000]

In [36]:
chess_data = chess_data[['AN']]
chess_data = chess_data[-chess_data['AN'].str.contains('{')] # filters out the stuff that contains "{"

In [37]:
chess_data = chess_data[chess_data['AN'].str.len()>20] # filter out the short games

In [38]:
print(chess_data.shape[0])

883376


In [41]:
import chess

In [42]:
import torch
from torch.utils.data import Dataset

class ChessDataset(Dataset):
  def __init__(self, games):
    super(ChessDataset, self).__init__()
    self.games = games

  def __len__(self):
    return 40_000

  def __getitm__(self, index):
    game_i = np.random.randint(self.games.shape[0])
    random_game = chess_data['AN'].values[game_i] # pick a random game
    moves = create_move_list(random_game)
    game_state_i = np.random.randint(len(moves)-1)  # pick a random move from the game
    next_move = moves[game_state_i]
    moves = moves[:game_state_i]
    board = chess.Board()
    for move in moves:
      board.push_san(move)
    x = board2rep(board) # convert to matrix
    y = move_2_rp(next_move, board) # convert to matrix
    if game_state_i % 2 == 1: # if the move index is even (black's turn)
      x *= -1  # then we multiply board matrix by -1
      # this way the CNN will always know to play the pieces that are represented by positive values
    return x,y


In [44]:
from torch.utils.data import DataLoader

data_train = ChessDataset(chess_data['AN'])
data_train_loader = DataLoader(data_train, batch_size=32,shuffle=True,drop_last=True)

# drop_last will drop the last mini_batch if there aren't enough examples for the right size