In [None]:
# prepare_data.py
import chess, chess.pgn
import csv
import sys
from tqdm import tqdm

def split_pgn_to_csv(pgn_path, train_csv, val_csv, flush_every=100):
    pgn = open(pgn_path, encoding="utf-8")
    f_train = open(train_csv, "w", newline="", encoding="utf-8")
    f_val   = open(val_csv,   "w", newline="", encoding="utf-8")
    w_train = csv.writer(f_train)
    w_val   = csv.writer(f_val)
    w_train.writerow(["FEN","value"])
    w_val.writerow(  ["FEN","value"])

    train_buf = []
    val_buf   = []

    game_idx = 0
    pbar = tqdm(desc="Games", unit="game")
    while True:
        game = chess.pgn.read_game(pgn)
        if game is None:
            break
        game_idx += 1
        pbar.update(1)

        res = game.headers.get("Result","")
        if   res == "1-0":          gval =  1.0
        elif res == "0-1":          gval = -1.0
        elif res in ("1/2-1/2","½-½"): gval =  0.0
            continue

        board = game.board()
        target_buf = val_buf if (game_idx % 10) == 0 else train_buf

        for move in game.mainline_moves():
            board.push(move)
            target_buf.append([board.fen(), gval])

        if game_idx % flush_every == 0:
            if train_buf:
                w_train.writerows(train_buf)
                train_buf.clear()
            if val_buf:
                w_val.writerows(val_buf)
                val_buf.clear()
            pbar.set_postfix(flushed_games=game_idx)

    if train_buf:
        w_train.writerows(train_buf)
    if val_buf:
        w_val.writerows(val_buf)

    pgn.close()
    f_train.close()
    f_val.close()
    pbar.close()
    print(f"Done. Processed {game_idx} games → {train_csv} & {val_csv}")


In [3]:
split_pgn_to_csv(
    "C:/Users/forbe/Downloads/lichess_db_standard_rated_2014-07.pgn/lichess_db_standard_rated_2014-07.pgn",
    "train.csv",
    "val.csv"
)

Games: 1048440game [58:46, 297.34game/s, flushed_games=1048400]

Done. Processed 1048440 games → train.csv & val.csv



