In [156]:
%reset -f
%cd /home/assaf/jupyter/chess


import chess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import pickle

def get_np_board(board_):
    matrix = np.zeros((8, 8), dtype=np.int8)
    piece_values = {
        chess.PAWN: 1,
        chess.KNIGHT: 2,
        chess.BISHOP: 3,
        chess.ROOK: 4,
        chess.QUEEN: 5,
        chess.KING: 6,
    }
    for square, piece in board.piece_map().items():
        row = 7 - chess.square_rank(square)
        col = chess.square_file(square)
        value = piece_values[piece.piece_type]
        matrix[row, col] = value if piece.color == chess.WHITE else -value    
    return matrix

/home/assaf/jupyter/chess


In [157]:

# !rm -fr all_states_ds.pkl

df = pd.read_csv("games.csv")

try:
    with open("all_states_ds.pkl", "rb") as f:
        states_df = pickle.load(f) 
    print('loaded states and pickle from disk successfully')

except Exception:
    print('cannot load states from disk. calculating')
    states = []
    sog = []
    for i in range(len(df)):
        board = chess.Board()
        thisgame = df.loc[i]
        if thisgame['victory_status'] == 'outoftime':
            continue

        sog.append(len(states))

        game_san = thisgame.moves.split()
        for j, step_san in enumerate(game_san):
            move = board.parse_san(step_san)  # Converts SAN to Move
            board.push(move)  # Make the move on the board

            if j<20:
                continue
            state = []
            state.append(i)
            state.append(j+1)
            state.append(thisgame['turns']) 
            state.append(thisgame['victory_status'])  
            state.append(thisgame['winner'])
            state.append(thisgame['white_rating'])
            state.append(thisgame['black_rating'])
            state.append([board.has_queenside_castling_rights(chess.WHITE), board.has_kingside_castling_rights(chess.WHITE)])
            state.append([board.has_queenside_castling_rights(chess.BLACK), board.has_kingside_castling_rights(chess.BLACK)])   
            state.append(get_np_board(board))

            # state.append(str(board).replace(" ", "").replace("\n", "/"))
            # state.append(str(board).replace(" ", "").replace("\n", ""))
            states.append(state)

        # print (i, len(states))
    states_df = pd.DataFrame(states, columns=[ "gameid","turn", "turns", 'victory_status', 'winner', \
                                            'white_rating', 'black_rating',\
                                            'white_castling', 'black_castling', \
                                            'matrix', ])     
    with open("all_states_ds.pkl", "wb") as f:
        pickle.dump(states_df, f)
print (f'dataset length is {len(states_df)} ' )

sog = [sog[i] for i in range(len(sog)) if sog[i] != sog[i-1]]
ttsplit = sog[len(sog)*9 // 10]
#ttsplit = 1080598
train = states_df.iloc[:ttsplit]
test = states_df.iloc[ttsplit:]
display(train.head())
test = test.reset_index(drop=True)
display(test.head())

with open("train_states_ds.pkl", "wb") as f:
    pickle.dump(train, f)
with open("test_states_ds.pkl", "wb") as f:
    pickle.dump(test, f)

loaded states and pickle from disk successfully
dataset length is 735842 


NameError: name 'sog' is not defined

In [180]:
result_str = {
    "1-0": 'white',
    "0-1": 'black',
    "1/2-1/2": 'draw'
}

In [200]:
from pathlib import Path

states = []
sog = []
gameid = 0
for p in Path("data/v0").glob("*.pkl"):
    print(p.resolve())
    with open(p.resolve(), "rb") as f:
        gamelist = pickle.load(f) 
        
        for i in range(len(gamelist)):
            board = chess.Board()
            thisgame = gamelist[i][0]

            sog.append(len(states))
            
            for j, movestr in enumerate(thisgame):
                # print (movestr)
                move = board.parse_uci(movestr)  

                board.push(move)  # Make the move on the board

                if j<20:
                    continue
                state = []
                state.append(gameid)
                state.append(j+1)
                state.append(len(thisgame)) 
                state.append(0)  
                state.append(result_str[gamelist[i][1]])
                state.append(0)
                state.append(0)
                state.append([board.has_queenside_castling_rights(chess.WHITE), board.has_kingside_castling_rights(chess.WHITE)])
                state.append([board.has_queenside_castling_rights(chess.BLACK), board.has_kingside_castling_rights(chess.BLACK)])   
                state.append(get_np_board(board))

                # state.append(str(board).replace(" ", "").replace("\n", "/"))
                # state.append(str(board).replace(" ", "").replace("\n", ""))
                states.append(state)

            gameid+=1    

    states_df = pd.DataFrame(states, columns=[ "gameid","turn", "turns", 'victory_status', 'winner', \
                                            'white_rating', 'black_rating',\
                                            'white_castling', 'black_castling', \
                                            'matrix', ])     


/home/assaf/jupyter/chess/data/v0/20260108-012002-931583_slow.pkl
/home/assaf/jupyter/chess/data/v0/20260107-205007-895759_fast.pkl
/home/assaf/jupyter/chess/data/v0/20260108-012027-683548_slow.pkl
/home/assaf/jupyter/chess/data/v0/20260108-011810-662767_fast.pkl
/home/assaf/jupyter/chess/data/v0/20260108-012008-779362_slow.pkl
/home/assaf/jupyter/chess/data/v0/20260107-205007-892931_slow.pkl
/home/assaf/jupyter/chess/data/v0/20260108-012444-218857_fast.pkl
/home/assaf/jupyter/chess/data/v0/20260107-205007-895610_slow.pkl
/home/assaf/jupyter/chess/data/v0/20260108-011706-890671_fast.pkl
/home/assaf/jupyter/chess/data/v0/20260107-205007-892334_fast.pkl
/home/assaf/jupyter/chess/data/v0/20260107-205007-957322_slow.pkl
/home/assaf/jupyter/chess/data/v0/20260107-205007-912440_fast.pkl
/home/assaf/jupyter/chess/data/v0/20260107-205007-901028_slow.pkl
/home/assaf/jupyter/chess/data/v0/20260107-205007-890649_fast.pkl
/home/assaf/jupyter/chess/data/v0/20260107-205007-891141_fast.pkl
/home/assa

In [197]:
gamelist[i][1]

'1/2-1/2'

In [None]:

dirname = 'v0'

# !rm -fr all_states_ds.pkl

df = pd.read_csv("games.csv")

try:
    with open("all_states_ds.pkl", "rb") as f:
        states_df = pickle.load(f) 
    print('loaded states and pickle from disk successfully')

except Exception:
    print('cannot load states from disk. calculating')
    states = []
    sog = []
    for i in range(len(df)):
        board = chess.Board()
        thisgame = df.loc[i]
        if thisgame['victory_status'] == 'outoftime':
            continue

        sog.append(len(states))

        game_san = thisgame.moves.split()
        for j, step_san in enumerate(game_san):
            move = board.parse_san(step_san)  # Converts SAN to Move
            board.push(move)  # Make the move on the board

            if j<20:
                continue
            state = []
            state.append(i)
            state.append(j+1)
            state.append(thisgame['turns']) 
            state.append(thisgame['victory_status'])  
            state.append(thisgame['winner'])
            state.append(thisgame['white_rating'])
            state.append(thisgame['black_rating'])
            state.append([board.has_queenside_castling_rights(chess.WHITE), board.has_kingside_castling_rights(chess.WHITE)])
            state.append([board.has_queenside_castling_rights(chess.BLACK), board.has_kingside_castling_rights(chess.BLACK)])   
            state.append(get_np_board(board))

            # state.append(str(board).replace(" ", "").replace("\n", "/"))
            # state.append(str(board).replace(" ", "").replace("\n", ""))
            states.append(state)

        # print (i, len(states))
    states_df = pd.DataFrame(states, columns=[ "gameid","turn", "turns", 'victory_status', 'winner', \
                                            'white_rating', 'black_rating',\
                                            'white_castling', 'black_castling', \
                                            'matrix', ])     
    with open("all_states_ds.pkl", "wb") as f:
        pickle.dump(states_df, f)
print (f'dataset length is {len(states_df)} ' )

sog = [sog[i] for i in range(len(sog)) if sog[i] != sog[i-1]]
ttsplit = sog[len(sog)*9 // 10]
#ttsplit = 1080598
train = states_df.iloc[:ttsplit]
test = states_df.iloc[ttsplit:]
display(train.head())
test = test.reset_index(drop=True)
display(test.head())

with open("train_states_ds.pkl", "wb") as f:
    pickle.dump(train, f)
with open("test_states_ds.pkl", "wb") as f:
    pickle.dump(test, f)

Unnamed: 0,gameid,turn,turns,victory_status,winner,white_rating,black_rating,white_castling,black_castling,matrix
0,2,21,61,mate,white,1496,1500,"[True, True]","[False, True]","[[0, 0, -3, -5, -6, -3, -2, -4], [0, 0, 0, 0, ..."
