# **Data Ingestion Test**

It is November 14th, 2025 at 5:47PM and I have the download_user() function, and MoveData and MetaData classes written. I'm testing the data ingestion workflow using these items

In [1]:
# Dependencies
import pandas as pd
import sys
sys.path.append('../..')

from Utils.download_user import download_user
from Core.metadata import MetaData
from Core.movedata import MoveData

In [2]:
# Download user data
username = "BKChessMaster2"
output_file = 'ingestion_test.pgn'
download_user(username,
              end='2023-01',
              output=output_file)

Fetching archives for: bkchessmaster2
Filtering archives in selected date range...
Selected 1 archives:
  → https://api.chess.com/pub/player/bkchessmaster2/games/2022/12

Downloading 1 archives...
  → Downloading 2022/12

Saving PGN...
Done.


In [None]:
# Initialize MetaData parser
meta_parser = MetaData(f'../../Data/PGN/{output_file}')

# Convert to DataFrame and save as CSV
df_meta = meta_parser.to_dataframe()
df_meta.to_csv("../../Data/Raw/ing_test_meta.csv")
display(df_meta.head())

Unnamed: 0_level_0,Event,Site,Date,Round,White,Black,Result,CurrentPosition,Timezone,ECO,...,UTCDate,UTCTime,WhiteElo,BlackElo,TimeControl,Termination,StartTime,EndDate,EndTime,Link
game_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Live Chess,Chess.com,2022.12.17,-,BuddyR0ck,BKChessMaster2,1-0,r2qk2r/2n3b1/2p2N2/p3p2b/1p1P4/4P3/P2N1P1P/R1B...,UTC,D00,...,2022.12.17,02:19:27,695,252,180,BuddyR0ck won on time,02:19:27,2022.12.17,02:25:15,https://www.chess.com/game/live/64982452151
2,Live Chess,Chess.com,2022.12.15,-,BKChessMaster2,BuddyR0ck,0-1,r4r2/pp1bnp1k/7p/2b4P/P3P2R/1PN5/3K1PP1/n4B2 w...,UTC,A00,...,2022.12.15,05:26:40,281,672,180,BuddyR0ck won on time,05:26:40,2022.12.15,05:32:26,https://www.chess.com/game/live/64821109161
3,Live Chess,Chess.com,2022.12.15,-,BuddyR0ck,BKChessMaster2,1-0,rnbqk1nr/1p1p1Qpp/2p5/p3p3/1bB1P3/2N5/PPPP1PPP...,UTC,C25,...,2022.12.15,05:25:29,636,328,180,BuddyR0ck won by checkmate,05:25:29,2022.12.15,05:26:12,https://www.chess.com/game/live/64821094989
4,Live Chess,Chess.com,2022.12.15,-,BKChessMaster2,BuddyR0ck,0-1,r3k1nr/ppp1bRpp/3p4/4p3/P3K2P/1bPPR3/3q2P1/8 w...,UTC,A00,...,2022.12.15,05:20:19,432,568,180,BuddyR0ck won on time,05:20:19,2022.12.15,05:25:01,https://www.chess.com/game/live/64820550035


In [None]:
# Initialize MoveData parser
move_parser = MoveData(f'../../Data/PGN/{output_file}')

# Convert to DataFrame and save as CSV
df_moves = move_parser.to_dataframe()
df_moves.to_csv("../../Data/Raw/ing_test_moves.csv")
display(df_moves.head())

Unnamed: 0,game_id,ply,color,move,clock,eval,uci,fen
0,1,1,white,d4,0:03:00,,d2d4,rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR ...
1,1,2,black,d5,0:03:00,,d7d5,rnbqkbnr/ppp1pppp/8/3p4/3P4/8/PPP1PPPP/RNBQKBN...
2,1,3,white,e3,0:02:58.3,,e2e3,rnbqkbnr/ppp1pppp/8/3p4/3P4/4P3/PPP2PPP/RNBQKB...
3,1,4,black,e6,0:02:51.9,,e7e6,rnbqkbnr/ppp2ppp/4p3/3p4/3P4/4P3/PPP2PPP/RNBQK...
4,1,5,white,Bb5+,0:02:55.7,,f1b5,rnbqkbnr/ppp2ppp/4p3/1B1p4/3P4/4P3/PPP2PPP/RNB...


This part is where I am testing out data cleaning and adding in stockfish evaluations, but eventually this will all be done in an automatic workflow

In [5]:
# -----------------------------
# Clean Meta, Clean Moves, Add Stockfish Eval
# -----------------------------

from Utils.clean_meta import clean_metadata
from Utils.clean_moves import clean_moves
from Utils.add_eval import add_engine_eval

# 1️⃣ Clean metadata
df_meta_clean = clean_metadata(df_meta)
print("MetaData cleaned:")
display(df_meta_clean.head())

# 2️⃣ Clean move data
df_moves_clean = clean_moves(df_moves)
print("MoveData cleaned:")
display(df_moves_clean.head())

MetaData cleaned:


Unnamed: 0_level_0,Event,Site,Date,Round,White,Black,Result,CurrentPosition,Timezone,ECO,...,UTCDate,UTCTime,WhiteElo,BlackElo,TimeControl,Termination,StartTime,EndDate,EndTime,Link
game_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Live Chess,Chess.com,2022-12-17,-,BuddyR0ck,BKChessMaster2,1-0,r2qk2r/2n3b1/2p2N2/p3p2b/1p1P4/4P3/P2N1P1P/R1B...,UTC,D00,...,2022.12.17,02:19:27,695,252,180,BuddyR0ck won on time,02:19:27,2022.12.17,02:25:15,https://www.chess.com/game/live/64982452151
2,Live Chess,Chess.com,2022-12-15,-,BKChessMaster2,BuddyR0ck,0-1,r4r2/pp1bnp1k/7p/2b4P/P3P2R/1PN5/3K1PP1/n4B2 w...,UTC,A00,...,2022.12.15,05:26:40,281,672,180,BuddyR0ck won on time,05:26:40,2022.12.15,05:32:26,https://www.chess.com/game/live/64821109161
3,Live Chess,Chess.com,2022-12-15,-,BuddyR0ck,BKChessMaster2,1-0,rnbqk1nr/1p1p1Qpp/2p5/p3p3/1bB1P3/2N5/PPPP1PPP...,UTC,C25,...,2022.12.15,05:25:29,636,328,180,BuddyR0ck won by checkmate,05:25:29,2022.12.15,05:26:12,https://www.chess.com/game/live/64821094989
4,Live Chess,Chess.com,2022-12-15,-,BKChessMaster2,BuddyR0ck,0-1,r3k1nr/ppp1bRpp/3p4/4p3/P3K2P/1bPPR3/3q2P1/8 w...,UTC,A00,...,2022.12.15,05:20:19,432,568,180,BuddyR0ck won on time,05:20:19,2022.12.15,05:25:01,https://www.chess.com/game/live/64820550035


MoveData cleaned:


Unnamed: 0,game_id,ply,color,move,clock,eval,uci,fen
0,1,1,white,d4,180.0,,d2d4,rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR ...
1,1,2,black,d5,180.0,,d7d5,rnbqkbnr/ppp1pppp/8/3p4/3P4/8/PPP1PPPP/RNBQKBN...
2,1,3,white,e3,178.3,,e2e3,rnbqkbnr/ppp1pppp/8/3p4/3P4/4P3/PPP2PPP/RNBQKB...
3,1,4,black,e6,171.9,,e7e6,rnbqkbnr/ppp2ppp/4p3/3p4/3P4/4P3/PPP2PPP/RNBQK...
4,1,5,white,Bb5+,175.7,,f1b5,rnbqkbnr/ppp2ppp/4p3/1B1p4/3P4/4P3/PPP2PPP/RNB...


In [6]:
# Path to Stockfish engine
# Adjust if running locally or in Docker
stockfish_path = "/usr/games/stockfish"  # Linux/Docker default
# stockfish_path = "C:/path/to/stockfish.exe"  # Windows example

# 3️⃣ Add Stockfish evaluation
# NOTE: Can take a while if many moves
df_moves_eval = add_engine_eval(df_moves_clean, engine_path=stockfish_path, depth=15)
print("MoveData with Stockfish eval:")
display(df_moves_eval.head())

NotImplementedError: 