In [1]:
from pgn2gif import chess
import numpy as np
from openTSNE import TSNE
from openTSNE.callbacks import ErrorApproximations
from matplotlib import pyplot as plt
from scipy import interpolate
import re
import pandas as pd
from custom_chess_utils.utils import *
from umap.parametric_umap import ParametricUMAP
from matplotlib import pyplot as plt
seed = 0
np.random.seed(seed)

chess_games_path = '/mnt/d/Work/CG Institute/chess/lichess data/lichess_db_standard_rated_2021-08.pgn'
pgn_folder = 'games'
umap_path = 'lichess_param_umap_seed'+str(seed)+'.csv'

2022-01-13 11:39:28.413030: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-01-13 11:39:28.413109: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


### Data loading and preprocessing

#### Extract individual games from PGN file

In [2]:
split_games = extract_individual_games_from_pgn(chess_games_path, pgn_folder, lines_to_read=800000)

#### Filter out games without eval score or without clk

In [3]:
split_games = get_games_with_eval_and_clk(split_games)
print(len(split_games))

2228


#### keep n games

In [4]:
n_games = 250
split_games = split_games[:n_games]
print(len(split_games))

250


#### store individual games as PGN files

In [5]:
store_games_as_pgn(split_games, pgn_folder)
game_paths = [pgn_folder+'/game-{:05d}.pgn'.format(n+1) for n in range(n_games)]

#### Loading games from individual PGN files

In [6]:
game_paths_checked, metadata, metadata_evals, metadata_clks, md_keys = get_metadata_from_pgns(game_paths)
print(md_keys)

['BlackElo', 'TimeControl', 'Round', 'ECO', 'White', 'Termination', 'UTCTime', 'Result', 'WhiteElo', 'Opening', 'Site', 'Event', 'Black', 'UTCDate', 'Date']


In [7]:
game_matrices, games_pgn = game_matrices_from_pgn(pgn_folder, game_paths_checked, first_moves_filter=None)
print(len(game_matrices))

250


#### keep opening moves only
using the list of openings that corresponds to the lichess dataset https://github.com/niklasf/chess-openings

In [8]:
eco_df = get_eco_df('ECOs')

#### add ECO category (A,B,C,D,E,F) to metadata

In [9]:
metadata = create_opening_categories_feature(metadata)

#### filter games with Openings that don't exist in our ECO dataset

In [10]:
game_matrices, metadata, metadata_evals, metadata_clks, games_pgn = filter_unknown_ecos(eco_df, game_matrices, metadata, metadata_evals, metadata_clks, games_pgn)

#### using eco dataframe to determine amount n of moves in the corresponding opening, cutting off each game after n

In [11]:
game_matrices = cut_off_games_after_opening(game_matrices, eco_df, metadata)

#### get captured pieces throughout games and turns

In [12]:
captures, _ = get_captures(game_matrices, games_pgn)

#### concatenate games into final data that will be projected

In [13]:
final_data = np.concatenate(game_matrices)

### Calculating Embeddings / Projections and Writing to Files

#### project using umap

In [14]:
reducer = ParametricUMAP(random_state=seed, verbose=True)

In [15]:
umap_embedding = reducer.fit_transform(final_data)

ParametricUMAP(optimizer=<keras.optimizer_v2.adam.Adam object at 0x7f2522f4b8e0>)
Construct fuzzy simplicial set
Thu Jan 13 11:39:45 2022 Finding Nearest Neighbors
Thu Jan 13 11:39:48 2022 Finished Nearest Neighbor Search
Thu Jan 13 11:39:50 2022 Construct embedding


2022-01-13 11:39:50.953759: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-01-13 11:39:50.953815: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-01-13 11:39:50.953839: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (DESKTOP-857SMRC): /proc/driver/nvidia/version does not exist


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Thu Jan 13 11:39:55 2022 Finished embedding


#### reshape resulting embedding into games, states, 2

In [16]:
umap_embedding_split = reshape_embedding(umap_embedding, game_matrices)

#### create resulting csv file using all metadata and embedding

In [18]:
write_csv(umap_path, md_keys, umap_embedding_split, game_matrices, metadata, captures, metadata_evals, metadata_clks)

['BlackElo', 'TimeControl', 'Round', 'ECO', 'White', 'Termination', 'UTCTime', 'Result', 'WhiteElo', 'Opening', 'Site', 'Event', 'Black', 'UTCDate', 'Date']
