Access to google drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install python-chess
!pip install stockfish
!apt-get update && apt-get install -y stockfish # stockfish engine binary

Collecting python-chess
  Downloading python_chess-1.999-py3-none-any.whl.metadata (776 bytes)
Collecting chess<2,>=1 (from python-chess)
  Downloading chess-1.11.2.tar.gz (6.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m54.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading python_chess-1.999-py3-none-any.whl (1.4 kB)
Building wheels for collected packages: chess
  Building wheel for chess (setup.py) ... [?25l[?25hdone
  Created wheel for chess: filename=chess-1.11.2-py3-none-any.whl size=147775 sha256=19af29927650f09f8a75aee69f52a71a45bfe14bb1f4de696f3485eaa39f3b56
  Stored in directory: /root/.cache/pip/wheels/83/1f/4e/8f4300f7dd554eb8de70ddfed96e94d3d030ace10c5b53d447
Successfully built chess
Installing collected packages: chess, python-chess
Successfully installed chess-1.11.2 python-chess-1.999
Collecting stockfish
  Downloading stockfish-3.28.0-py3-none-any.whl.metadata (12 kB)
Downl

Try to reach the PGN files and read them

In [None]:
import chess.pgn

pgn_file_path = "/content/drive/MyDrive/chess_games/Fischer.pgn"

try:
  pgn = open(pgn_file_path)

  first_game = chess.pgn.read_game(pgn)

  if first_game:
    print("read successful! first game: ")
    print(f"white {first_game.headers['White']}")
    print(f"black {first_game.headers['Black']}")
    print(f"result {first_game.headers['Result']}")

    print("\n =======MOVES: ")
    board = first_game.board()
    for move in first_game.mainline_moves():
      print(board.san(move))
      board.push(move)
  else:
    print("cant read the game from the file")
  pgn.close()

except FileNotFoundError:
  print(f"file not found: {pgn_file_path}")

read successful! first game: 
white Thomason, J.
black Fischer, Robert James
result 0-1

d4
Nf6
c4
g6
Nc3
Bg7
e4
d6
Nf3
O-O
Bd3
Bg4
O-O
Nc6
Be3
Nd7
Be2
Bxf3
Bxf3
e5
d5
Ne7
Be2
f5
f4
h6
Bd3
Kh7
Qe2
fxe4
Nxe4
Nf5
Bd2
exf4
Bxf4
Ne5
Bc2
Nd4
Qd2
Nxc4
Qf2
Rxf4
Qxf4
Ne2+
Kh1
Nxf4


Preprocessing

In [3]:
import chess.pgn
import torch
import os
import time
from stockfish import Stockfish

# config
DRIVE_BASE_PATH = "/content/drive/MyDrive/chess_games/" # games folder path

pgn_files = [ # get pgns
    # "Carlsen.pgn"
    # "Caruana.pgn",
    # "Firouzja.pgn"
    # "Fischer.pgn",
    # "Karpov.pgn",
    "Kasparov.pgn"
]

OUTPUT_FILE_PATH = os.path.join(DRIVE_BASE_PATH, "stockfished_dataset_kasparov.pt") # output path

stockfish_path = "/usr/games/stockfish"
try:
  stockfish = Stockfish(path=stockfish_path, depth=7, parameters={"Skill Level":20})
  print(f"stockfish engine initialized... {stockfish_path}")
  print(f"stockfish params: {stockfish.get_parameters()}")
except Exception as e:
  print(f"ERROR stockfish init not succ. {e}")
  stockfish = None

stockfish engine initialized... /usr/games/stockfish
stockfish params: {'Debug Log File': '', 'Contempt': 0, 'Min Split Depth': 0, 'Ponder': 'false', 'MultiPV': 1, 'Skill Level': 20, 'Move Overhead': 10, 'Minimum Thinking Time': 20, 'Slow Mover': 100, 'UCI_Chess960': 'false', 'UCI_LimitStrength': 'false', 'UCI_Elo': 1350, 'Threads': 1, 'Hash': 16}


In [4]:
# dicts for a piece of mind
piece_map = {
    chess.PAWN: 0,
    chess.KNIGHT: 1,
    chess.BISHOP: 2,
    chess.ROOK: 3,
    chess.QUEEN: 4,
    chess.KING: 5
}

# result_map = {
#     "1-0": 1.0, # white
#     "0-1": -1.0, # black
#     "1/2-1/2": 0.0 # draw
# }

In [5]:
# converting python-chess board obj to a (12,8,8) tensor
# channel 0-5 -> white pieces
# channel 6-11 -> black pieces
def vectorize_board(board):
  board_tensor = torch.zeros(12, 8, 8, dtype=torch.float32)

  for sq in chess.SQUARES:
    piece = board.piece_at(sq)
    if piece:
      channel = piece_map[piece.piece_type]
      if piece.color == chess.BLACK:
        channel += 6

      rank = chess.square_rank(sq)
      file = chess.square_file(sq)

      board_tensor[channel, rank, file] = 1.0
  return board_tensor

# converting result string to a tensor
# def get_label(result_str):
#   label = result_map.get(result_str)
#   if label is not None:
#     return torch.tensor([label], dtype=torch.float32)
#   return None

def get_stockfish_label(board, sf_instance):
  if not sf_instance:
    return None

  try:
    fen = board.fen()
    sf_instance.set_fen_position(fen)

    evaluation = sf_instance.get_evaluation()

    score_cp = None

    if evaluation['type'] == 'cp':
      score_cp = evaluation['value']
      if board.turn == chess.BLACK:
        score_cp = -score_cp
    elif evaluation['type'] == 'mate':
      mate_value = evaluation['value']
      if board.turn == chess.WHITE:
        score_cp = 10000 if mate_value > 0 else -10000
      else:
        score_cp = -10000 if mate_value > 0 else 10000 # i like mate, so my model must love it too..

    if score_cp is None:
      print(f"wtf is this eval type: {evaluation['type']}")

    normalized_score = torch.tanh(torch.tensor(score_cp / 600.0)).item()

    return torch.tensor([normalized_score], dtype=torch.float32)

  except Exception as e:
    print(f"ERROR cant get eval {e}, fen: {board.fen()}")
    import traceback
    traceback.print_exc()
    return None

Processing Loop

In [6]:
# main process
def process_data():
  global stockfish

  if not stockfish:
    print("cant reach stockfish engine, bye!")
    return

  all_board_tensors = []
  all_labels = []
  game_count = 0
  position_count = 0
  start_time = time.time()

  print("STARTING DATA PREPROCESSING... with STOCKFISH...")

  for pgn_filename in pgn_files:
    file_path = os.path.join(DRIVE_BASE_PATH, pgn_filename)
    if not os.path.exists(file_path):
      print(f"ERROR! file not found: {file_path}")
      continue

    print(f"\nprocessing file: {pgn_filename}")
    pgn = open(file_path, encoding="utf-8")

    processed_in_file = 0 # counter for games in pgn

    while True:
      try:
        game = chess.pgn.read_game(pgn)
        if game is None:
          break

        # label = get_label(game.headers.get("Result"))
        # if label is None:
        #   continue

        game_count += 1
        board = game.board()

        for move in game.mainline_moves(): # loop every move of the game
          board.push(move)

          label = get_stockfish_label(board, stockfish)

          if label is not None:
            board_tensor = vectorize_board(board)
            all_board_tensors.append(board_tensor)
            all_labels.append(label)
            position_count += 1

        processed_in_file += 1
        # if game_count % 100 == 0:
        #   print(f"processed {game_count} games.\ntotal Positions: {position_count}")
        if processed_in_file % 50 == 0: # update every 100 games per file
          current_time = time.time()
          elapsed = current_time - start_time
          print(f">>processed {processed_in_file} games from {pgn_filename}..."
                f"\n>>>>total pos: {position_count}, elapsed {elapsed:.0f}s")


      except Exception as e:
        print(f"ERROR! reading a game {e}, skipping to the next game")
        continue
    pgn.close()
    print(f"finished processing {pgn_filename}")

  end_time = time.time()
  print("\n==PREPROCESSING COMPLETE")
  print(f"games processed: {game_count}")
  print(f"total Pos: {position_count}")
  print(f"total time: {end_time - start_time:.2f} seconds")

  print("===")
  print("\nsaving dataset")

  try:
    torch.save((all_board_tensors, all_labels), OUTPUT_FILE_PATH)
    print(f"save successfull, path: {OUTPUT_FILE_PATH}")
  except Exception as e:
    print(f"ERROR! saving dataset. {e}")

In [7]:
process_data()

STARTING DATA PREPROCESSING... with STOCKFISH...

processing file: Kasparov.pgn
>>processed 50 games from Kasparov.pgn...
>>>>total pos: 3522, elapsed 31s
>>processed 100 games from Kasparov.pgn...
>>>>total pos: 7247, elapsed 65s
>>processed 150 games from Kasparov.pgn...
>>>>total pos: 10895, elapsed 95s
>>processed 200 games from Kasparov.pgn...
>>>>total pos: 14373, elapsed 124s
>>processed 250 games from Kasparov.pgn...
>>>>total pos: 17716, elapsed 152s
>>processed 300 games from Kasparov.pgn...
>>>>total pos: 21303, elapsed 185s
>>processed 350 games from Kasparov.pgn...
>>>>total pos: 24901, elapsed 217s
>>processed 400 games from Kasparov.pgn...
>>>>total pos: 28766, elapsed 250s
>>processed 450 games from Kasparov.pgn...
>>>>total pos: 32694, elapsed 282s
>>processed 500 games from Kasparov.pgn...
>>>>total pos: 36871, elapsed 318s
>>processed 550 games from Kasparov.pgn...
>>>>total pos: 40668, elapsed 351s
>>processed 600 games from Kasparov.pgn...
>>>>total pos: 44401, ela