In [None]:
import chess.pgn
import pandas as pd

# Open the PGN file
pgn_file = open("games_1990.pgn")

# Initialize a list to store the games
games_data = []

while True:
    game = chess.pgn.read_game(pgn_file)
    if game is None:
        break

    # Extract game information including ELO
    game_data = {
        "White": game.headers.get("White", None),
        "Black": game.headers.get("Black", None),
        "Result": game.headers.get("Result", None),
        "Event": game.headers.get("Event", None),
        "Date": game.headers.get("Date", None),
        "Moves": " ".join([move.uci() for move in game.mainline_moves()]),  # Converts moves to UCI format for consistency
        "WhiteElo": game.headers.get("WhiteElo", None),
        "BlackElo": game.headers.get("BlackElo", None)
    }
    games_data.append(game_data)

# Convert to DataFrame
df = pd.DataFrame(games_data)

# Display the first 20 rows for verification
print(df.head(20))

# Display the number of rows
print('Number of rows:', len(df))

# Data Cleaning
# Drop the first row if needed
df.drop(index=0, inplace=True)

# Remove rows containing "kampflos" or "een" in the "Moves" column
mask = df["Moves"].str.contains("kampflos", na=False) | df["Moves"].str.contains("een", na=False)
df = df[~mask]

# Convert 'Date' column to datetime, handle invalid dates by setting errors='coerce'
df['Date'] = pd.to_datetime(df['Date'], format='%Y.%m.%d', errors='coerce')

# Replace NaT in 'Date' with "1990-01-01" as a default date if needed
df['Date'] = df['Date'].fillna(pd.to_datetime("1990-01-01"))

# Convert 'WhiteElo' and 'BlackElo' columns to numeric, handling any missing values
df['WhiteElo'] = pd.to_numeric(df['WhiteElo'], errors='coerce')
df['BlackElo'] = pd.to_numeric(df['BlackElo'], errors='coerce')

# Check column types after cleaning
print(df.dtypes)

# Convert to CSV
df.to_csv("games_1990_with_ELO_try.csv", index=False)
