In [1]:
# df_players
# name, elo, state (H/m), move time avg (not possible yet), acpl game avg
# df_games
# p1_ID, p2_ID, winner_ID, date, opening, p1_ELO, p2_ELO
# df_moves
# player_ID, game_ID, time, eval, fen/move, comment

In [2]:
import pandas as pd
import numpy as np
import uuid        # for id generation
import shortuuid

import chess.pgn
from stockfish import Stockfish

In [3]:
# stockfish = Stockfish(
#     'stockfish_14_linux_x64/stockfish_14_linux_x64/stockfish_14_x64', 
#     parameters={"Threads": 2, 'Min Split Depth': 26, 'Ponder':True}
# )
# stockfish.set_elo_rating(2600)
# stockfish.set_skill_level(30)

In [4]:
%%time

players = {
    'White':[],
    'White_Elo': [],
    'Black': [],
    'Black_Elo': [],
    'WhiteIsComp':[],
}

games = {
    'Game_ID': [],
    'Date' : [],
    'White':[],  # Dummy ID
    'White_Elo': [],
    'Black': [],  # Dummy ID
    'Black_Elo': [],
    'ECO': [],
    'Result': [],
}

moves_log_dict = {
    'Game_ID': [],
    'White': [],  # Dummy ID
    'Black': [],  # Dummy ID
    'ECO': [],
    'FEN_moves': [],
    #'cpl': [],
    'WhiteIsComp': [],
    'Result': [],
}

# read file
pgn = open("data/Fics_data_pc_data.pgn", encoding='UTF-8')  # always a Comp vs Player
game_counter = 0

while True:  # keep reading games
    try:
        game = chess.pgn.read_game(pgn)
        board = game.board()
        moves = list(game.mainline_moves())
        
        # Player
        players['White_Elo'].append(game.headers['WhiteElo'])
        players['Black_Elo'].append(game.headers['BlackElo'])
        players['White'].append(game.headers['White'])
        players['Black'].append(game.headers['Black'])
        players['WhiteIsComp'].append(game.headers.get('WhiteIsComp', 'No'))
        
        # Games
        games['Game_ID'].append(game.headers['FICSGamesDBGameNo'])
        games['White'].append(game.headers['White'])  # dummy ID
        games['Black'].append(game.headers['Black'])  # dummy ID
        games['White_Elo'].append(game.headers['WhiteElo'])
        games['Black_Elo'].append(game.headers['BlackElo'])
        games['ECO'].append(game.headers['ECO'])
        games['Result'].append(game.headers['Result'])
        games['Date'].append(game.headers['Date'])
        
        # Moves
        fen_pos = []
        acpl = []
        
        moves_log_dict['Game_ID'].append(game.headers['FICSGamesDBGameNo'])  # dummy ID for this dataset only breaks
        moves_log_dict['White'].append(game.headers['White'])  # dummy ID
        moves_log_dict['Black'].append(game.headers['Black'])  # dummy ID
        moves_log_dict['ECO'].append(game.headers['ECO'])
        moves_log_dict['WhiteIsComp'].append(game.headers.get('WhiteIsComp', 'No'))
        moves_log_dict['Result'].append(game.headers['Result'])

        # MOVE CYCLE
        for move in moves:
            board.push(move)
            fen_pos.append(board.fen())
            #stockfish.set_fen_position(board.fen())  # load stockfish with current FEN for eval
#             cpl = stockfish.get_evaluation()['value']/100
#             acpl.append(cpl)
        
        moves_log_dict['FEN_moves'].append(fen_pos)
        #moves_log_dict['cpl'].append(acpl)
        
        game_counter += 1
        if game_counter == 55:  # number of games to read
            break
    except AttributeError:  # no further games to read
        print('No further games to load.')
        break

print(f'{game_counter} games read.')
#TODO takes ~1 sec to process 5 games, too slow.

55 games read.
CPU times: user 528 ms, sys: 1.93 ms, total: 530 ms
Wall time: 528 ms


In [5]:
#generates unique IDs from int
def id_generator(id):
    return uuid.uuid4().int

In [6]:
#generates unique IDs containing digits and characters
def short_id_gen(id):
    return shortuuid.ShortUUID().random(length=15)

## df_players

In [7]:
df_players = pd.DataFrame(players)
df_players.head()

Unnamed: 0,White,White_Elo,Black,Black_Elo,WhiteIsComp
0,forlat,1970,Geforce,2204,Yes
1,Geforce,2201,forlat,1973,No
2,forlat,1976,Geforce,2198,Yes
3,Geforce,2211,forlat,1963,No
4,forlat,1958,Geforce,2216,Yes


In [8]:
#pgn = open("data/Fics_data_pc_data.pgn", encoding='UTF-8') 

In [9]:
#game = chess.pgn.read_game(pgn)

In [10]:
# def players_id_list(input_df, c_black, c_white): # c_white, c_black - columns of respective df, must be a string
#     black = input_df[c_black]
#     white = input_df[c_white]
#     players_names = list(set(list(black) + list(white)))
#     df_pl = {'Players': players_names}
#     players_id = pd.DataFrame(df_pl)
#     players_id['Player_ID'] = players_id['Players'].apply(id_generator)
#     return players_id

#download list? input_df.to_csv("/notebooks/data/players_id")

In [11]:
players_id = pd.DataFrame({'Players': [], 'Player_ID' : []})

def players_id_list(input_df, players_id):
    #extract black and white columns
    black = list(input_df["Black"]) 
    white = list(input_df["White"])
    
    #merge uniqe values from both columns:
    bw_merged = pd.DataFrame(list(set(black + white)), columns=["Players"])
    
    # Player_ID filled with NaNs:
    players_id = players_id.merge(bw_merged, how="outer", left_on=["Players"], right_on=["Players"])
    
    # NaNs replaced with generated IDs
    nans_to_ids = players_id["Player_ID"].fillna(players_id["Player_ID"].apply(id_generator))
    
    #inserting missing IDs to players_id
    players_id["Player_ID"] = nans_to_ids 
    return players_id

In [12]:
players_id = players_id_list(df_players, players_id)

In [13]:
players_id

Unnamed: 0,Player_ID,Players
0,37750120470796774940946021828330853189,JMM
1,47859571126933762240860894077230029103,Notarious
2,39079014072281629375993158042235458802,Geforce
3,116481846209014659875171940727396015342,chessloon
4,94583294762982280763994511429366506669,chesspickle
5,183476485039464053473278531727945236866,Genobear
6,262689058280760320197527459797488794941,sussexman
7,282277557686169518203908625900360828870,exeComp
8,161089932012460008026532780226205483283,Ghannoum
9,51606036498656115262770652016839208777,forlat


In [14]:
df_dummy = pd.DataFrame({'White' : ["12345", 'DummyName', "1234", "forlat", "Geforce"], "Black" : ['DummyName', "12345", "Dummy", "Geforce", "Bambi"]})

In [15]:
df_dummy

Unnamed: 0,White,Black
0,12345,DummyName
1,DummyName,12345
2,1234,Dummy
3,forlat,Geforce
4,Geforce,Bambi


In [16]:
players_id  = players_id_list(df_dummy, players_id)

In [17]:
def assign_player_id(input_df): # returns a df with 2 new columns and assigned player ID
    #print("List od IDs has been generated:")
    #print(players_unique)
    m_white = input_df.merge(players_id, left_on=["White"], right_on=['Players'])   #
    m_white['White_ID'] = m_white['Player_ID']
    m_white.drop(columns=['Players', "Player_ID"], inplace=True)
    m_bw = m_white.merge(players_id, left_on=["Black"], right_on=['Players'])
    m_bw['Black_ID'] = m_bw['Player_ID']
    m_bw.drop(columns=['Players', "Player_ID"], inplace=True)
    df_players = m_bw
    return df_players

In [18]:
assign_player_id(df_dummy)

Unnamed: 0,White,Black,White_ID,Black_ID
0,12345,DummyName,202948767092482358032406826310261639367,135237781651802102581240063477170928970
1,DummyName,12345,135237781651802102581240063477170928970,202948767092482358032406826310261639367
2,1234,Dummy,68773919589610040228499065559604370551,195399587339169746430623563080857449006
3,forlat,Geforce,51606036498656115262770652016839208777,39079014072281629375993158042235458802
4,Geforce,Bambi,39079014072281629375993158042235458802,285356832458736435958496485441421273138


In [19]:
# Fix it:
def update_and_assign_player_IDs(input_df, players_id):
    df_players = assign_player_id(input_df)
    return df_players

In [20]:
update_and_assign_player_IDs(df_dummy, players_id)

Unnamed: 0,White,Black,White_ID,Black_ID
0,12345,DummyName,202948767092482358032406826310261639367,135237781651802102581240063477170928970
1,DummyName,12345,135237781651802102581240063477170928970,202948767092482358032406826310261639367
2,1234,Dummy,68773919589610040228499065559604370551,195399587339169746430623563080857449006
3,forlat,Geforce,51606036498656115262770652016839208777,39079014072281629375993158042235458802
4,Geforce,Bambi,39079014072281629375993158042235458802,285356832458736435958496485441421273138


## df_games

In [27]:
df_games = pd.DataFrame(games)
df_games['Game_ID'] = df_games['Game_ID'].apply(short_id_gen)
##TODO date to datetime
df_games

Unnamed: 0,Game_ID,Date,White,White_Elo,Black,Black_Elo,ECO,Result
0,49BnG7d2JMS5aU6,2021.01.31,forlat,1970,Geforce,2204,A01,0-1
1,9fCcy5SUFPoGi6w,2021.01.31,Geforce,2201,forlat,1973,B00,1-0
2,BMiEYhPxfrxALfa,2021.01.31,forlat,1976,Geforce,2198,A20,0-1
3,3KqgiKQQAGRupp3,2021.01.31,Geforce,2211,forlat,1963,B10,0-1
4,9Crv9n2VKgzenwL,2021.01.31,forlat,1958,Geforce,2216,D30,1/2-1/2
5,BSXf8sWb32eH6dz,2021.01.31,exeComp,2717,Ruvarashe,1976,B92,1-0
6,3isqbDhJVo2aiAi,2021.01.31,Geforce,2213,forlat,1961,B00,1-0
7,3yQSkgCSFuDLhMw,2021.01.31,forlat,1964,Geforce,2210,C28,0-1
8,6erAD3c375HX8A6,2021.01.31,Ruvarashe,1976,exeComp,2717,C11,0-1
9,3edxpyjoJsZjA5a,2021.01.31,Geforce,2207,forlat,1967,B10,1-0


In [None]:
df_games.apply

In [22]:
timestamp = game.headers["Time"]

In [29]:
df_games

Unnamed: 0,Game_ID,Date,White,White_Elo,Black,Black_Elo,ECO,Result,Move_ID
0,49BnG7d2JMS5aU6,2021.01.31,forlat,1970,Geforce,2204,A01,0-1,49BnG7d2JMS5aU6-2021.01.31-forlat-Geforce
1,9fCcy5SUFPoGi6w,2021.01.31,Geforce,2201,forlat,1973,B00,1-0,9fCcy5SUFPoGi6w-2021.01.31-Geforce-forlat
2,BMiEYhPxfrxALfa,2021.01.31,forlat,1976,Geforce,2198,A20,0-1,BMiEYhPxfrxALfa-2021.01.31-forlat-Geforce
3,3KqgiKQQAGRupp3,2021.01.31,Geforce,2211,forlat,1963,B10,0-1,3KqgiKQQAGRupp3-2021.01.31-Geforce-forlat
4,9Crv9n2VKgzenwL,2021.01.31,forlat,1958,Geforce,2216,D30,1/2-1/2,9Crv9n2VKgzenwL-2021.01.31-forlat-Geforce
5,BSXf8sWb32eH6dz,2021.01.31,exeComp,2717,Ruvarashe,1976,B92,1-0,BSXf8sWb32eH6dz-2021.01.31-exeComp-Ruvarashe
6,3isqbDhJVo2aiAi,2021.01.31,Geforce,2213,forlat,1961,B00,1-0,3isqbDhJVo2aiAi-2021.01.31-Geforce-forlat
7,3yQSkgCSFuDLhMw,2021.01.31,forlat,1964,Geforce,2210,C28,0-1,3yQSkgCSFuDLhMw-2021.01.31-forlat-Geforce
8,6erAD3c375HX8A6,2021.01.31,Ruvarashe,1976,exeComp,2717,C11,0-1,6erAD3c375HX8A6-2021.01.31-Ruvarashe-exeComp
9,3edxpyjoJsZjA5a,2021.01.31,Geforce,2207,forlat,1967,B10,1-0,3edxpyjoJsZjA5a-2021.01.31-Geforce-forlat


## df_moves

In [30]:
df_moves = pd.DataFrame(moves_log_dict)
#TODO moves and cpl as features when loading to DB
df_moves['Game_ID'] = df_games['Game_ID']
df_moves.head(20)

Unnamed: 0,Game_ID,White,Black,ECO,FEN_moves,WhiteIsComp,Result
0,49BnG7d2JMS5aU6,forlat,Geforce,A01,[rnbqkbnr/pppppppp/8/8/8/1P6/P1PPPPPP/RNBQKBNR...,Yes,0-1
1,9fCcy5SUFPoGi6w,Geforce,forlat,B00,[rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR...,No,1-0
2,BMiEYhPxfrxALfa,forlat,Geforce,A20,[rnbqkbnr/pppppppp/8/8/2P5/8/PP1PPPPP/RNBQKBNR...,Yes,0-1
3,3KqgiKQQAGRupp3,Geforce,forlat,B10,[rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR...,No,0-1
4,9Crv9n2VKgzenwL,forlat,Geforce,D30,[rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR...,Yes,1/2-1/2
5,BSXf8sWb32eH6dz,exeComp,Ruvarashe,B92,[rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR...,Yes,1-0
6,3isqbDhJVo2aiAi,Geforce,forlat,B00,[rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR...,No,1-0
7,3yQSkgCSFuDLhMw,forlat,Geforce,C28,[rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR...,Yes,0-1
8,6erAD3c375HX8A6,Ruvarashe,exeComp,C11,[rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR...,No,0-1
9,3edxpyjoJsZjA5a,Geforce,forlat,B10,[rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR...,No,1-0


In [35]:
df_moves['Move_ID'] = df_moves.apply(lambda row: f"{row.Game_ID }-{row.White}-{row.Black}", axis=1)

In [43]:
df_moves['FEN_moves'][7]

['rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1',
 'rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 2',
 'rnbqkbnr/pppp1ppp/8/4p3/2B1P3/8/PPPP1PPP/RNBQK1NR b KQkq - 1 2',
 'r1bqkbnr/pppp1ppp/2n5/4p3/2B1P3/8/PPPP1PPP/RNBQK1NR w KQkq - 2 3',
 'r1bqkbnr/pppp1ppp/2n5/4p3/2B1P3/2N5/PPPP1PPP/R1BQK1NR b KQkq - 3 3',
 'r1bqkb1r/pppp1ppp/2n2n2/4p3/2B1P3/2N5/PPPP1PPP/R1BQK1NR w KQkq - 4 4',
 'r1bqkb1r/pppp1ppp/2n2n2/4p3/2B1P3/2NP4/PPP2PPP/R1BQK1NR b KQkq - 0 4',
 'r1bqk2r/pppp1ppp/2n2n2/2b1p3/2B1P3/2NP4/PPP2PPP/R1BQK1NR w KQkq - 1 5',
 'r1bqk2r/pppp1ppp/2n2n2/2b1p3/2B1P3/2NP3N/PPP2PPP/R1BQK2R b KQkq - 2 5',
 'r1bqk2r/pppp1pp1/2n2n1p/2b1p3/2B1P3/2NP3N/PPP2PPP/R1BQK2R w KQkq - 0 6',
 'r1bqk2r/pppp1pp1/2n2n1p/2b1p3/2B1P3/2NP3N/PPP2PPP/R1BQ1RK1 b kq - 1 6',
 'r1bqk2r/ppp2pp1/2np1n1p/2b1p3/2B1P3/2NP3N/PPP2PPP/R1BQ1RK1 w kq - 0 7',
 'r1bqk2r/ppp2pp1/2np1n1p/2b1p3/N1B1P3/3P3N/PPP2PPP/R1BQ1RK1 b kq - 1 7',
 'r2qk2r/ppp2pp1/2np1n1p/2b1p3/N1B1P3/3P3b/PPP2PPP/R1BQ1RK1 w kq - 0 8',
 

# search

In [26]:
def search_df(df, column, value):
    try:
        index = np.where(df[column] == value)
        df = df.iloc[index]
        if len(df) == 0:
            return f'No games found for {value}.'
        return df
    except KeyError as e:
        print(e, f'not found.')