In [1]:
# df_players
# name, elo, state (H/m), move time avg (not possible yet), acpl game avg
# df_games
# p1_ID, p2_ID, winner_ID, date, opening, p1_ELO, p2_ELO
# df_moves
# player_ID, game_ID, time, eval, fen/move, comment

In [2]:
import pandas as pd
import numpy as np

import chess
import chess.pgn
from stockfish import Stockfish
import os

import uuid        # for id generation
import shortuuid

%load_ext autoreload
%autoreload 2

In [3]:
#stockfish = Stockfish(
#    'stockfish_14_linux_x64/stockfish_14_linux_x64/stockfish_14_x64', 
#    parameters={"Threads": 2, 'Min Split Depth': 26, 'Ponder':True}
#)
#stockfish.set_elo_rating(2600)
#stockfish.set_skill_level(30)

In [4]:
%%time

players = {
    'White':[],
    'White_Elo': [],
    'Black': [],
    'Black_Elo': [],
    'WhiteIsComp':[],
}

games = {
    'Game_ID': [],
    'Date' : [],
    'White':[],  # Dummy ID
    'White_Elo': [],
    'Black': [],  # Dummy ID
    'Black_Elo': [],
    'ECO': [],
    'Result': [],
}

moves_log_dict = {
    'Game_ID': [],
    'FEN_moves': [],
    'Bitmap_moves': [],
    #'cpl': [],
    'WhiteIsComp': [],
    'turn': [],
    'Castling_right': [],
    'EP_option': [],
    'Pseudo_EP_option': [],
    'Halfmove_clock': []
    #'Result': [],
}

# Set list of Pieces
PIECES = [chess.Piece.from_symbol('P'),
         chess.Piece.from_symbol('N'),
         chess.Piece.from_symbol('B'),
         chess.Piece.from_symbol('R'),
         chess.Piece.from_symbol('Q'),
         chess.Piece.from_symbol('K'),
         chess.Piece.from_symbol('p'),
         chess.Piece.from_symbol('n'),
         chess.Piece.from_symbol('b'),
         chess.Piece.from_symbol('r'),
         chess.Piece.from_symbol('q'),
         chess.Piece.from_symbol('k')]

# read file
pgn = open("data/Fics_data_pc_data.pgn", encoding='UTF-8')  # always a Comp vs Player
game_counter = 0

while True:  # keep reading games
    try:
        game = chess.pgn.read_game(pgn)
        board = game.board()
        moves = list(game.mainline_moves())
        
        # Player
        players['White_Elo'].append(game.headers['WhiteElo'])
        players['Black_Elo'].append(game.headers['BlackElo'])
        players['White'].append(game.headers['White'])
        players['Black'].append(game.headers['Black'])
        players['WhiteIsComp'].append(game.headers.get('WhiteIsComp', 'No'))
        
        # Games
        games['Game_ID'].append(game.headers['FICSGamesDBGameNo'])
        games['White'].append(game.headers['White'])  # dummy ID
        games['Black'].append(game.headers['Black'])  # dummy ID
        games['White_Elo'].append(game.headers['WhiteElo'])
        games['Black_Elo'].append(game.headers['BlackElo'])
        games['ECO'].append(game.headers['ECO'])
        games['Result'].append(game.headers['Result'])
        games['Date'].append(game.headers['Date'])
        
        # MOVE CYCLE
        white = True
        for move in moves:
            board.push(move)
            #fen_pos.append(board.fen())
            #stockfish.set_fen_position(board.fen())  # load stockfish with current FEN for eval
            #cpl = stockfish.get_evaluation()['value']/100
            
            moves_log_dict['Game_ID'].append(game.headers['FICSGamesDBGameNo'])
            moves_log_dict['FEN_moves'].append(board.fen())
            
            #Generate bitmap representation of FENs
            bitmap_board_dict = {}
            positions = board.piece_map()

            for piece in PIECES:
                bitmap_board = {}
                for position in positions:
                    if positions[position] == piece: 
                        bitmap_board[position] = 1
                    else:
                        bitmap_board[position] = 0
                bitmap_board_dict[str(piece)] = bitmap_board            
            
            moves_log_dict['Bitmap_moves'].append(bitmap_board_dict)
            
            #moves_log_dict['cpl'].append(cpl)
            
            #Turn color and castling availablity
            moves_log_dict['WhiteIsComp'].append(game.headers.get('WhiteIsComp', 'No'))
            if white:
                moves_log_dict['turn'].append('white')
                moves_log_dict['Castling_right'].append(int(board.has_castling_rights(chess.WHITE)))
                white = False
            else:
                moves_log_dict['turn'].append('black')
                moves_log_dict['Castling_right'].append(int(board.has_castling_rights(chess.BLACK)))
                white = True
                
            #(Pseudo) en passant opportunity
            moves_log_dict['EP_option'].append(int(board.has_legal_en_passant()))
            moves_log_dict['Pseudo_EP_option'].append(int(board.has_pseudo_legal_en_passant()))
            
            #Halfmove clock
            moves_log_dict['Halfmove_clock'].append(board.halfmove_clock)
                
        game_counter += 1
        if game_counter == 50:  # number of games to read
            break
    except AttributeError:  # no further games to read
        print('No further games to load.')
        break

print(f'{game_counter} games read.')
#TODO takes ~1 sec to process 5 games, too slow.

50 games read.
CPU times: user 1.12 s, sys: 26.9 ms, total: 1.15 s
Wall time: 1.15 s


## Players

In [5]:
df_players = pd.DataFrame(players)
#TODO player ID
df_players.head()

Unnamed: 0,White,White_Elo,Black,Black_Elo,WhiteIsComp
0,forlat,1970,Geforce,2204,Yes
1,Geforce,2201,forlat,1973,No
2,forlat,1976,Geforce,2198,Yes
3,Geforce,2211,forlat,1963,No
4,forlat,1958,Geforce,2216,Yes


In [6]:
#generates unique IDs from int
def id_generator(id):
    return uuid.uuid4().int

In [7]:
#generates unique IDs containing digits and characters
def short_id_gen(id):
    return shortuuid.ShortUUID().random(length=15)

In [8]:
def game_id(input_df):
    '''generates IDs for df_games'''
    input_df['Game_ID'] = input_df['Game_ID'].apply(short_id_gen)
    df_games = input_df
    return df_games

In [9]:
players_id = pd.DataFrame({'Players': [], 'Player_ID' : []})

def players_id_list(input_df, players_id):
    #extract black and white columns
    black = list(input_df["Black"]) 
    white = list(input_df["White"])
    
    #merge uniqe values from both columns:
    bw_merged = pd.DataFrame(list(set(black + white)), columns=["Players"])
    
    # Player_ID filled with NaNs:
    players_id = players_id.merge(bw_merged, how="outer", left_on=["Players"], right_on=["Players"])
    
    # NaNs replaced with generated IDs
    nans_to_ids = players_id["Player_ID"].fillna(players_id["Player_ID"].apply(id_generator))
    
    #inserting missing IDs to players_id
    players_id["Player_ID"] = nans_to_ids 
    return players_id

In [10]:
players_id = players_id_list(df_players, players_id)

In [11]:
df_dummy = pd.DataFrame({'White' : ["12345", 'DummyName', "1234", "forlat", "Geforce"], "Black" : ['DummyName', "12345", "Dummy", "Geforce", "Bambi"]})

In [12]:
players_id = players_id_list(df_dummy, players_id)

In [13]:
players_id

Unnamed: 0,Player_ID,Players
0,291142699377756093256487506370396219218,Ruvarashe
1,114722629602007041134467691333475683178,chessloon
2,273833712805872301521825141142914753971,Ghannoum
3,48310492917619571390552171320757215805,foggydew
4,326435641966809457708135553238022036280,JMM
5,13327804801251372375971755631012946842,nakshatra
6,310181198934092652974826006265725011278,Notarious
7,155552242241224829058952519077916404462,RamMiguel
8,4262472346154295111845090029420246804,Genobear
9,260608558447694261845486056275578962189,Arsyah


In [14]:
def assign_player_id(input_df): # returns a df with 2 new columns and assigned player ID
    #print("List od IDs has been generated:")
    #print(players_unique)
    m_white = input_df.merge(players_id, left_on=["White"], right_on=['Players'])   #
    m_white['White_ID'] = m_white['Player_ID']
    m_white.drop(columns=['Players', "Player_ID"], inplace=True)
    m_bw = m_white.merge(players_id, left_on=["Black"], right_on=['Players'])
    m_bw['Black_ID'] = m_bw['Player_ID']
    m_bw.drop(columns=['Players', "Player_ID"], inplace=True)
    df_players = m_bw
    return df_players

In [15]:
assign_player_id(df_players)

Unnamed: 0,White,White_Elo,Black,Black_Elo,WhiteIsComp,White_ID,Black_ID
0,forlat,1970,Geforce,2204,Yes,14583382987580554146999788201903199483,243183283348732788289261580904796788658
1,forlat,1976,Geforce,2198,Yes,14583382987580554146999788201903199483,243183283348732788289261580904796788658
2,forlat,1958,Geforce,2216,Yes,14583382987580554146999788201903199483,243183283348732788289261580904796788658
3,forlat,1964,Geforce,2210,Yes,14583382987580554146999788201903199483,243183283348732788289261580904796788658
4,forlat,1970,Geforce,2204,Yes,14583382987580554146999788201903199483,243183283348732788289261580904796788658
5,forlat,1976,Geforce,2198,Yes,14583382987580554146999788201903199483,243183283348732788289261580904796788658
6,forlat,1976,Geforce,2194,Yes,14583382987580554146999788201903199483,243183283348732788289261580904796788658
7,forlat,1984,Geforce,2186,Yes,14583382987580554146999788201903199483,243183283348732788289261580904796788658
8,forlat,1976,Geforce,2186,Yes,14583382987580554146999788201903199483,243183283348732788289261580904796788658
9,forlat,1984,Geforce,2178,Yes,14583382987580554146999788201903199483,243183283348732788289261580904796788658


In [16]:
#set.union(set(df_players['White']),(set(df_players['Black'])))

## Games

In [17]:
df_games = pd.DataFrame(games)
#TODO date to datetime

# def game_id(input_df):
#     '''generates IDs for df_games'''
#     input_df["old_ID"] = input_df["Game_ID"]
#     input_df['Game_ID'] = input_df['Game_ID'].apply(short_id_gen)
#     return df_games

def old_and_new_game_ids():
    df_games["old_ID"] = df_games["Game_ID"]
    df_games["Game_ID"] = df_games['Game_ID'].apply(short_id_gen)
    
    return df_games


#     data = [df_games["Game_ID"], df_games["old_ID"]]
#     headers = ["Game_ID", "old_ID"]
#     df3 = pd.concat(data, axis=1, keys=headers)
#     return df3

# def game_ids(df_games):
#     old_and_new_ids()
    
#     # merge df_games with players_id
#     df_games = df_games.merge(players_id, left_on='White', right_on='Players')
#     df_games = df_games.merge(players_id, left_on='Black', right_on='Players')
#     df_games.drop(columns=['Players_x', 'Players_y'], inplace=True) #optionally drop White, Black columns
#     df_games.rename(columns = {'Game_ID_y' : 'Game_ID', 'Player_ID_x': 'White_ID', 'Player_ID_y': 'Black_ID'}, inplace=True)
    
#     return df_games

# def merging_b_w_ids(df_games):
#     '''returns a df_games with additional White_ID, Black_ID'''
#     df_games["old_ID"] = df_games["Game_ID"]
#     df_games["Game_ID"] = game_id(df_games)
    
#     df_games = df_games.merge(players_id, left_on='White', right_on='Players')
#     df_games = df_games.merge(players_id, left_on='Black', right_on='Players')
#     df_games.drop(columns=['Players_x', 'Players_y'], inplace=True) #optionally drop White, Black columns
#     df_games.rename(columns = {'Game_ID_y' : 'Game_ID', 'Player_ID_x': 'White_ID', 'Player_ID_y': 'Black_ID'}, inplace=True)
#     return df_games

In [18]:
old_and_new_game_ids()

Unnamed: 0,Game_ID,Date,White,White_Elo,Black,Black_Elo,ECO,Result,old_ID
0,7v6th7ohGMbR8WQ,2021.01.31,forlat,1970,Geforce,2204,A01,0-1,490373548
1,7khF2jrwRhTvTJa,2021.01.31,Geforce,2201,forlat,1973,B00,1-0,490373541
2,76cnJvfkuZ3ufcB,2021.01.31,forlat,1976,Geforce,2198,A20,0-1,490373526
3,AZ2qAGUoow6TQyh,2021.01.31,Geforce,2211,forlat,1963,B10,0-1,490373517
4,d7X86fxavRwUwwd,2021.01.31,forlat,1958,Geforce,2216,D30,1/2-1/2,490373487
5,3fJ7NZAa3JppwRG,2021.01.31,exeComp,2717,Ruvarashe,1976,B92,1-0,490373455
6,9TyJ7dzcLxdvVZN,2021.01.31,Geforce,2213,forlat,1961,B00,1-0,490373444
7,AMCmsa5aEHTZJqu,2021.01.31,forlat,1964,Geforce,2210,C28,0-1,490373432
8,8MjDert5M3NcJnN,2021.01.31,Ruvarashe,1976,exeComp,2717,C11,0-1,490373410
9,6bddesENmzkiUbM,2021.01.31,Geforce,2207,forlat,1967,B10,1-0,490373400


## Moves

In [19]:
df_moves = pd.DataFrame(moves_log_dict)

In [42]:
def move_id(df_moves, df_games):  
    data = [df_games["Game_ID"], df_games["old_ID"]]
    headers = ["Game_ID", "old_ID"]
    df = pd.concat(data, axis=1, keys=headers)
    merging = df_moves.merge(df, how="left", left_on="Game_ID", right_on="old_ID")
    merging.drop(columns=["Game_ID_x", "old_ID"], inplace=True)
    merging.rename(columns = {'Game_ID_y' : 'Game_ID'}, inplace = True)
    merging.insert(0, "Moves_ID", merging.apply(lambda row: f"{row.Game_ID }-{row.turn}-{row.Halfmove_clock}", axis=1))
    df_moves = merging
    return df_moves

In [43]:
move_id(df_moves, df_games)

Unnamed: 0,Moves_ID,FEN_moves,Bitmap_moves,WhiteIsComp,turn,Castling_right,EP_option,Pseudo_EP_option,Halfmove_clock,Game_ID
0,7v6th7ohGMbR8WQ-white-0,rnbqkbnr/pppppppp/8/8/8/1P6/P1PPPPPP/RNBQKBNR ...,"{'P': {63: 0, 62: 0, 61: 0, 60: 0, 59: 0, 58: ...",Yes,white,1,0,0,0,7v6th7ohGMbR8WQ
1,7v6th7ohGMbR8WQ-black-0,rnbqkbnr/pppp1ppp/8/4p3/8/1P6/P1PPPPPP/RNBQKBN...,"{'P': {63: 0, 62: 0, 61: 0, 60: 0, 59: 0, 58: ...",Yes,black,1,0,0,0,7v6th7ohGMbR8WQ
2,7v6th7ohGMbR8WQ-white-1,rnbqkbnr/pppp1ppp/8/4p3/8/1PN5/P1PPPPPP/R1BQKB...,"{'P': {63: 0, 62: 0, 61: 0, 60: 0, 59: 0, 58: ...",Yes,white,1,0,0,1,7v6th7ohGMbR8WQ
3,7v6th7ohGMbR8WQ-black-2,r1bqkbnr/pppp1ppp/2n5/4p3/8/1PN5/P1PPPPPP/R1BQ...,"{'P': {63: 0, 62: 0, 61: 0, 60: 0, 59: 0, 58: ...",Yes,black,1,0,0,2,7v6th7ohGMbR8WQ
4,7v6th7ohGMbR8WQ-white-0,r1bqkbnr/pppp1ppp/2n5/4p3/8/1PN1P3/P1PP1PPP/R1...,"{'P': {63: 0, 62: 0, 61: 0, 60: 0, 59: 0, 58: ...",Yes,white,1,0,0,0,7v6th7ohGMbR8WQ
...,...,...,...,...,...,...,...,...,...,...
2672,6gNdNSxyAnvsdxc-white-1,8/5pk1/4p2p/6p1/p2K2PP/3B2P1/1r6/8 b - - 1 41,"{'P': {54: 0, 53: 0, 47: 0, 44: 0, 38: 0, 31: ...",Yes,white,0,0,0,1,6gNdNSxyAnvsdxc
2673,6gNdNSxyAnvsdxc-black-0,8/5pk1/4p2p/6p1/3K2PP/p2B2P1/1r6/8 w - - 0 42,"{'P': {54: 0, 53: 0, 47: 0, 44: 0, 38: 0, 31: ...",Yes,black,0,0,0,0,6gNdNSxyAnvsdxc
2674,6gNdNSxyAnvsdxc-white-1,8/5pk1/4p2p/6p1/6PP/p1KB2P1/1r6/8 b - - 1 42,"{'P': {54: 0, 53: 0, 47: 0, 44: 0, 38: 0, 31: ...",Yes,white,0,0,0,1,6gNdNSxyAnvsdxc
2675,6gNdNSxyAnvsdxc-black-2,8/5pk1/1r2p2p/6p1/6PP/p1KB2P1/8/8 w - - 2 43,"{'P': {54: 0, 53: 0, 47: 0, 44: 0, 41: 0, 38: ...",Yes,black,0,0,0,2,6gNdNSxyAnvsdxc


In [22]:
pgn2 = open("../raw_data/Fics_data_pc_data.pgn", encoding='UTF-8')  # always a Comp vs Player

game_string_list = []
game_counter = 0

while True: 
    
    game = chess.pgn.read_game(pgn2)
    game_string = str(game)
    game_string_list.append(game_string)
    
    game_counter += 1
    if game_counter == 50:  # number of games to read        
        break

print(game_string_list[15])

[Event "FICS rated blitz game"]
[Site "FICS freechess.org"]
[Date "2021.01.31"]
[Round "?"]
[White "Geforce"]
[Black "forlat"]
[Result "1-0"]
[BlackClock "0:03:00.000"]
[BlackElo "1986"]
[BlackIsComp "Yes"]
[BlackRD "0.0"]
[ECO "B00"]
[FICSGamesDBGameNo "490373183"]
[PlyCount "41"]
[Time "22:38:00"]
[TimeControl "180+0"]
[WhiteClock "0:03:00.000"]
[WhiteElo "2194"]
[WhiteRD "0.0"]

1. e4 { [%emt 0.0] } 1... Nc6 { [%emt 0.0] } 2. Nf3 { [%emt 0.581] } 2... e6 { [%emt 0.214] } 3. Nc3 { [%emt 0.561] } 3... Be7 { [%emt 1.164] } 4. d4 { [%emt 2.316] } 4... b6 { [%emt 1.506] } 5. Bd3 { [%emt 0.643] } 5... Nh6 { [%emt 0.951] } 6. a3 { [%emt 1.101] } 6... O-O { [%emt 0.914] } 7. h4 { [%emt 1.955] } 7... Bf6 { [%emt 1.153] } 8. e5 { [%emt 0.306] } 8... Be7 { [%emt 1.599] } 9. Bxh6 { [%emt 0.4] } 9... gxh6 { [%emt 1.764] } 10. Qd2 { [%emt 0.1] } 10... Bb7 { [%emt 1.391] } 11. Qxh6 { [%emt 0.831] } 11... f5 { [%emt 1.205] } 12. exf6 { [%emt 1.531] } 12... Rf7 { [%emt 1.797] } 13. Ng5 { [%emt 0.382

In [23]:
game.headers["Time"]

'14:24:00'

# search

In [24]:
def search_df(df, column, value):
    try:
        index = np.where(df[column] == value)
        df = df.iloc[index]
        if len(df) == 0:
            return f'No games found for {value}.'
        return df
    except KeyError as e:
        print(e, f'not found.')

In [25]:
search_df(df_moves, 'ECO', 'A01')

'ECO' not found.


In [26]:
move1 = chess.Move.from_uci(uci="g8f6")
move1

Move.from_uci('g8f6')

In [27]:
# move = df_moves["FEN_moves"]
# board = chess.Board(move)
# board

In [28]:
#board.lan(move=move1)

In [29]:
positions = board.piece_map()

In [30]:
PIECES = list(set((val) for val in positions.values()))
PIECES

[Piece.from_symbol('P'),
 Piece.from_symbol('B'),
 Piece.from_symbol('K'),
 Piece.from_symbol('p'),
 Piece.from_symbol('r'),
 Piece.from_symbol('k')]

In [31]:
SQUARES = [i for i in range(64)]

In [32]:
for square in SQUARES:
    print(square)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63


In [33]:
54 in positions.keys()

True

In [34]:
bitmap_board_dict = {}

for piece in PIECES:
    bitmap_board = {}
    for square in SQUARES:
        if square in positions.keys():
            if positions[square] == piece: 
                bitmap_board[square] = 1
            else:
                bitmap_board[square] = 0
        else:
            bitmap_board[square] = 0
    bitmap_board_dict[str(piece)] = bitmap_board

In [35]:
bitmap_board_dict

{'P': {0: 0,
  1: 0,
  2: 0,
  3: 0,
  4: 0,
  5: 0,
  6: 0,
  7: 0,
  8: 0,
  9: 0,
  10: 0,
  11: 0,
  12: 0,
  13: 0,
  14: 0,
  15: 0,
  16: 0,
  17: 0,
  18: 0,
  19: 0,
  20: 0,
  21: 0,
  22: 1,
  23: 0,
  24: 0,
  25: 0,
  26: 0,
  27: 0,
  28: 0,
  29: 0,
  30: 1,
  31: 1,
  32: 0,
  33: 0,
  34: 0,
  35: 0,
  36: 0,
  37: 0,
  38: 0,
  39: 0,
  40: 0,
  41: 0,
  42: 0,
  43: 0,
  44: 0,
  45: 0,
  46: 0,
  47: 0,
  48: 0,
  49: 0,
  50: 0,
  51: 0,
  52: 0,
  53: 0,
  54: 0,
  55: 0,
  56: 0,
  57: 0,
  58: 0,
  59: 0,
  60: 0,
  61: 0,
  62: 0,
  63: 0},
 'B': {0: 0,
  1: 0,
  2: 0,
  3: 0,
  4: 0,
  5: 0,
  6: 0,
  7: 0,
  8: 0,
  9: 0,
  10: 0,
  11: 0,
  12: 0,
  13: 0,
  14: 0,
  15: 0,
  16: 0,
  17: 0,
  18: 0,
  19: 0,
  20: 0,
  21: 0,
  22: 0,
  23: 0,
  24: 0,
  25: 0,
  26: 1,
  27: 0,
  28: 0,
  29: 0,
  30: 0,
  31: 0,
  32: 0,
  33: 0,
  34: 0,
  35: 0,
  36: 0,
  37: 0,
  38: 0,
  39: 0,
  40: 0,
  41: 0,
  42: 0,
  43: 0,
  44: 0,
  45: 0,
  46: 0,
  47: 0,
 

In [36]:
board.has_legal_en_passant()

False

In [37]:
board.has_pseudo_legal_en_passant()

False

In [38]:
board.halfmove_clock

3

# Data import with package

In [39]:
player_df2, game_df2, move_df2 = data.ChessData().import_data(data_path='../raw_data/Fics_data_pc_data.pgn', 
                                                              import_lim=50)


NameError: name 'data' is not defined

In [None]:
move_df2

## Binary vector representation for board

In [None]:
df_test = pd.DataFrame(move_df2['Bitmap_moves'][0])

In [None]:
df_test

In [None]:
dict_wide = {}

for index1, i in enumerate(df_test.columns):
    for index2, j in enumerate(df_test.index):
        dict_wide[str(i)+str(j)] = [df_test.iloc[index2, index1]]

In [None]:
dict_wide

In [None]:
pd.DataFrame(dict_wide)

In [None]:
from cc_detector.move import binary_board_df

In [None]:
df_wide = binary_board_df(move_df2)
#very inefficient so far (takes very long, probably needs to be redone differently)

In [None]:
df_wide