<a href="https://colab.research.google.com/github/nankivel/capstone/blob/main/training_data_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install chess

Collecting chess
  Downloading chess-1.10.0-py3-none-any.whl (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: chess
Successfully installed chess-1.10.0


In [2]:
pip install zstandard

Collecting zstandard
  Downloading zstandard-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: zstandard
Successfully installed zstandard-0.22.0


In [3]:
import os
import datetime
import chess
import chess.engine
import random
import numpy as np
import pydot
from tqdm import tqdm
import io
import json
import graphviz
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display, SVG
import zstandard

In [4]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [5]:
!apt-get install -y stockfish

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
Suggested packages:
  polyglot xboard | scid
The following NEW packages will be installed:
  stockfish
0 upgraded, 1 newly installed, 0 to remove and 39 not upgraded.
Need to get 24.8 MB of archives.
After this operation, 47.4 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 stockfish amd64 14.1-1 [24.8 MB]
Fetched 24.8 MB in 2s (12.3 MB/s)
Selecting previously unselected package stockfish.
(Reading database ... 121753 files and directories currently installed.)
Preparing to unpack .../stockfish_14.1-1_amd64.deb ...
Unpacking stockfish (14.1-1) ...
Setting up stockfish (14.1-1) ...
Processing triggers for man-db (2.10.2-1) ...


In [6]:
# help functions

def stockfish(board, depth):
  with chess.engine.SimpleEngine.popen_uci("/usr/games/stockfish") as sf:
    result = sf.analyse(board, chess.engine.Limit(depth=depth))
    score = result['score'].white().score()
  return score

def board_encoder(board):
  encoded_board = np.zeros([8,8,15]).astype(np.int8)
  fen = board.fen()
  fen_field = fen.split(' ')
  PiecePlacement = fen_field[0].split('/')
  piece_dict = {"R":0, "N":1, "B":2, "Q":3, "K":4, "P":5,
                "r":6, "n":7, "b":8, "q":9, "k":10, "p":11
                }
  for rank in range(8):
    pieces = ''
    for c in PiecePlacement[rank]:
      if c.isnumeric():
        pieces += '-'*int(c)
      else:
        pieces += c
    for file in range(8):
      if pieces[file] != '-':
        encoded_board[rank, file, piece_dict[pieces[file]]] = 1
  # plane 12 encodes all the legal moves of white
  aux = board.turn
  board.turn = chess.WHITE
  for move in board.legal_moves:
    encoded_board[7-np.unravel_index(move.to_square, (8,8))[0], np.unravel_index(move.to_square, (8,8))[1], 12] = 1
  # plane 13 encodes all the legal moves of black
  board.turn = chess.BLACK
  for move in board.legal_moves:
    encoded_board[7-np.unravel_index(move.to_square, (8,8))[0], np.unravel_index(move.to_square, (8,8))[1], 13] = 1
  board.turn = aux
  # plane 14 encodes the current player to move: white is 1, black is 0
  if fen_field[1] == 'w':
    encoded_board[:,:,14] = 1
  else:
    encoded_board[:,:,14] = 0
  return encoded_board

In [7]:
def read_pgn_zst(file_path):
  pgn_data = {}
  with open(file_path, 'rb') as f:
    decompressor = zstandard.ZstdDecompressor()
    with decompressor.stream_reader(f) as reader:
      text_stream = io.TextIOWrapper(reader, encoding='utf-8')
      counter = 0
      for line in text_stream:
        counter += 1
        data = json.loads(line)
        pgn_data[counter] = data
        if counter % 1000000 == 0:
          merged_json = json.dumps(pgn_data)
          with open("/content/drive/MyDrive/lichess_db_eval."+str(counter // 1000000)+".json", 'w') as output:
            output.write(merged_json)
          pgn_data = {}
      print(f"Loaded {counter} FEN positions")

In [8]:
# read_pgn_zst("/content/drive/MyDrive/lichess_db_eval.jsonl.zst")

In [9]:
with open("/content/drive/MyDrive/lichess_db_eval.1.json", 'r') as json_file:
  lichessdata = json.load(json_file)

In [10]:
len(lichessdata)

1000000

In [11]:
fens = ['rn1q1rk1/pbp2ppp/1p1bp3/8/3Pp3/1P2PN2/PBPN1PPP/R2Q1RK1 w - -',
        '3R4/p4pkp/3p2p1/2pP4/3brP2/P5PP/P2B4/7K b - -',
        'rn3rk1/1b3ppp/p3pn2/1p6/1P6/1BB1PN2/1P3PPP/3RK2R w K -',
        'r1bqk2r/2ppbpp1/p1n3np/1p6/3PP3/1B3N2/PP3PPP/RNBQ1RK1 w kq -',
        'r4rk1/pp1b1ppp/2n1p3/2qp4/8/2PBP3/PP1N1PPP/R2QK2R w Q f'
        ]

boards = []
for fen in fens:
  try:
    board = chess.Board(fen=fen)
  except:
    continue
  boards.append(board_encoder(board))

In [12]:
len(boards)

4

In [None]:
dataset_board = []
dataset_v = []
counter = 0
for game in tqdm(lichessdata):
  counter += 1
  try:
    board = chess.Board(fen=lichessdata[game]['fen'])
    v = stockfish(board, 5)
  except:
    continue
  if v is not None:
    dataset_board.append(board_encoder(board))
    dataset_v.append(v)
  if counter % 100000 == 0:
    dataset_board = np.array(dataset_board, dtype=np.int8)
    dataset_v = np.array(dataset_v, dtype=np.int16)
    np.savez("/content/drive/MyDrive/dataset_lichess"+str(counter // 100000)+".Mar21.npz", X=dataset_board, y=dataset_v)
    dataset_board = []
    dataset_v = []

  0%|          | 486/1000000 [01:01<31:28:33,  8.82it/s]