## Encode chess position

In [1]:
import sys
sys.path.insert(1, "lib/")
from dataset_utils import nb_channels, encode_position, store_many_hdf5
from ScoreGetter import ScoreGetter

import pandas as pd
import numpy as np
from tqdm import tqdm
from multiprocessing import current_process

In [4]:
score_getter = ScoreGetter("bin/lc0", evalc="eval", depth1="go depth 1")
score_getter.go_depth1("r1bqkbnr/p1pp1Qpp/1pn5/4p3/2B1P3/8/PPPP1PPP/RNB1K1NR b KQkq - 0 4")
score_getter.get_score("r1bqkbnr/p1pp1Qpp/1pn5/4p3/2B1P3/8/PPPP1PPP/RNB1K1NR b KQkq - 0 4")

[1m[31m       _
|   _ | |
|_ |_ |_|[0m v0.28.0+git.dirty built Nov 25 2021
Found pb network file: bin/752187.pb.gz
Creating backend [cudnn-auto]...
Switching to [cudnn-fp16]...
CUDA Runtime version: 11.5.0
Cudnn version: 8.3.1
Latest version of CUDA supported by the driver: 11.5.0
GPU: NVIDIA GeForce RTX 2080
GPU memory: 7.79218 GiB
GPU clock frequency: 1815 MHz
GPU compute capability: 7.5
bestmove a1a1
EFOEIFEJFOEJF
Final evaluation +-0.000000


ValueError: could not convert string to float: '+-0.000000'

We load a dataset containing millions of unique chess position represented by the [FEN](https://fr.wikipedia.org/wiki/Notation_Forsyth-Edwards) notation.

In [None]:
df = pd.read_csv("E:/IA/Chess/Datasets/fen_dataset.csv")
df = df.sample(frac=1)

We create our train/test dataset.

Each position is encoded as an image of $8\times8$ "pixels" with $15$ channels
($12$ representing each chess pieces, $1$ for the actual player, $1$
for the en-passant square and $1$ for the castling rights). Along this encoded position, we store
the score given by Stockfish.

In [None]:
score_getter = ScoreGetter("bin/stockfish.exe", evalc="eval", depth1="go depth 1")

nb_position = 150000
position = np.zeros((nb_position, 8, 8, nb_channels), dtype=np.float32)
scores = np.zeros(nb_position, dtype=np.float32)

# Status bar configuration
current = current_process()
pos = current._identity[0]-1 if len(current._identity) > 0 else 0
pbar = tqdm(total=nb_position, desc="Encoding", position=pos)

count = 0
for i in range(nb_position):
  pbar.update(1)
  fen = df["board"][i]
  try:
    scores[i] = score_getter.get_score(fen)
    position[i] = encode_position(fen)
    count += 1
  except Exception as e:
    if str(e) == "[Errno 32] Broken pipe":
        score_getter.restart()
    continue
scores = scores[:count].reshape(-1, 1)
position = position[:count]
print(f"Number of positions: {position.shape[0]}")

Encoding: 100%|█████████▉| 149997/150000 [40:50<00:00, 61.02it/s]

Number of positions: 149371


We save the resulting dataset using HDF5.

In [None]:
directory = "E:/IA/Chess/Datasets/SE_ResNet/"
store_many_hdf5(position, scores, directory, tag="_test")

Encoding: 100%|██████████| 150000/150000 [41:03<00:00, 61.02it/s]