In [None]:
import pandas as pd
import glob
import os

def combine_csv_files_with_unique_gameid(input_directory, output_file):
    """
    Combines all analyzed CSV files in the specified directory into a single CSV file,
    adjusting the GameID to ensure uniqueness across all files.

    Parameters:
    - input_directory (str): The directory containing the CSV files to combine.
    - output_file (str): The path to the output CSV file.
    """
    # Define patterns to match your file names
    patterns = [
        #os.path.join(input_directory, "twic*_analyzed.csv"),
        os.path.join(input_directory, "twic*_*_processed.csv"),
    ]

    # Collect all matching files
    csv_files = []
    for pattern in patterns:
        matched_files = glob.glob(pattern)
        csv_files.extend(matched_files)

    # Remove duplicates (if any)
    csv_files = list(set(csv_files))

    # Sort files by name to maintain order
    csv_files.sort()

    # Initialize variables
    df_list = []
    total_games_processed = 0  # Keeps track of the total number of games processed so far

    for file in csv_files:
        try:
            df = pd.read_csv(file)

            # Check if 'GameID' column exists
            if 'GameID' not in df.columns:
                print(f"'GameID' column not found in {file}. Skipping file.")
                continue

            # Get the number of unique games in this file
            unique_game_ids = df['GameID'].nunique()

            # Adjust the GameID by adding total_games_processed
            df['GameID'] += total_games_processed

            # Update total_games_processed
            total_games_processed += unique_game_ids

            df_list.append(df)
            print(f"Processed {file}: {unique_game_ids} games.")
        except Exception as e:
            print(f"Failed to read {file}: {e}")

    # Concatenate all dataframes
    if df_list:
        combined_df = pd.concat(df_list, ignore_index=True)
        combined_df.to_csv(output_file, index=False)
        print(f"Combined {len(df_list)} files into {output_file} with {total_games_processed} unique games.")
    else:
        print("No files were combined.")

# Example usage:
input_directory = "../Cleaned_Analyzed_Games"  # Replace with your directory containing the CSV files
output_file = "../huge_analyzed_games/combined_analyzed_games.csv"
os.makedirs("../huge_analyzed_games/", exist_ok=True)

combine_csv_files_with_unique_gameid(input_directory, output_file)


Processed ../Cleaned_Analyzed_Games/twic1200_16_processed.csv: 2930 games.
Processed ../Cleaned_Analyzed_Games/twic1201_16_processed.csv: 2077 games.
Processed ../Cleaned_Analyzed_Games/twic1202_16_processed.csv: 2874 games.
Processed ../Cleaned_Analyzed_Games/twic1203_16_processed.csv: 2872 games.
Processed ../Cleaned_Analyzed_Games/twic1204_16_processed.csv: 2416 games.
Processed ../Cleaned_Analyzed_Games/twic1205_16_processed.csv: 3284 games.
Processed ../Cleaned_Analyzed_Games/twic1206_16_processed.csv: 5665 games.
Processed ../Cleaned_Analyzed_Games/twic1207_16_processed.csv: 1117 games.
Processed ../Cleaned_Analyzed_Games/twic1208_16_processed.csv: 7811 games.
Processed ../Cleaned_Analyzed_Games/twic1209_16_processed.csv: 3786 games.
Processed ../Cleaned_Analyzed_Games/twic1210_16_processed.csv: 2174 games.
Processed ../Cleaned_Analyzed_Games/twic1211_16_processed.csv: 2123 games.
Processed ../Cleaned_Analyzed_Games/twic1212_16_processed.csv: 3372 games.
Processed ../Cleaned_Anal

  df = pd.read_csv(file)


Processed ../Cleaned_Analyzed_Games/twic1275_15_processed.csv: 4379 games.
Processed ../Cleaned_Analyzed_Games/twic1276_15_processed.csv: 5400 games.
Processed ../Cleaned_Analyzed_Games/twic1277_15_processed.csv: 2309 games.
Processed ../Cleaned_Analyzed_Games/twic1278_15_processed.csv: 4385 games.
Processed ../Cleaned_Analyzed_Games/twic1279_15_processed.csv: 2470 games.
Processed ../Cleaned_Analyzed_Games/twic1350_16_processed.csv: 5501 games.
Processed ../Cleaned_Analyzed_Games/twic1351_16_processed.csv: 5537 games.
Processed ../Cleaned_Analyzed_Games/twic1352_16_processed.csv: 5616 games.
Processed ../Cleaned_Analyzed_Games/twic1353_16_processed.csv: 2952 games.
Processed ../Cleaned_Analyzed_Games/twic1354_16_processed.csv: 5361 games.
Processed ../Cleaned_Analyzed_Games/twic1355_16_processed.csv: 3646 games.
Processed ../Cleaned_Analyzed_Games/twic1356_16_processed.csv: 4911 games.
Processed ../Cleaned_Analyzed_Games/twic1357_16_processed.csv: 3478 games.
Processed ../Cleaned_Anal

  df = pd.read_csv(file)


Processed ../Cleaned_Analyzed_Games/twic1375_16_processed.csv: 6058 games.
Processed ../Cleaned_Analyzed_Games/twic1376_16_processed.csv: 5804 games.
Processed ../Cleaned_Analyzed_Games/twic1377_16_processed.csv: 5390 games.
Processed ../Cleaned_Analyzed_Games/twic1378_16_processed.csv: 5771 games.
Processed ../Cleaned_Analyzed_Games/twic1379_16_processed.csv: 8402 games.
Processed ../Cleaned_Analyzed_Games/twic1380_16_processed.csv: 5268 games.
Processed ../Cleaned_Analyzed_Games/twic1381_16_processed.csv: 3282 games.
Processed ../Cleaned_Analyzed_Games/twic1382_16_processed.csv: 3155 games.
Processed ../Cleaned_Analyzed_Games/twic1383_16_processed.csv: 3491 games.
Processed ../Cleaned_Analyzed_Games/twic1384_16_processed.csv: 8464 games.
Processed ../Cleaned_Analyzed_Games/twic1385_16_processed.csv: 3413 games.
Processed ../Cleaned_Analyzed_Games/twic1386_16_processed.csv: 7008 games.
Processed ../Cleaned_Analyzed_Games/twic1387_16_processed.csv: 7549 games.
Processed ../Cleaned_Anal

  df = pd.read_csv(file)


Processed ../Cleaned_Analyzed_Games/twic921_15_processed.csv: 1614 games.
Processed ../Cleaned_Analyzed_Games/twic922_15_processed.csv: 2462 games.
Processed ../Cleaned_Analyzed_Games/twic923_15_processed.csv: 6254 games.
Processed ../Cleaned_Analyzed_Games/twic924_15_processed.csv: 4294 games.
Processed ../Cleaned_Analyzed_Games/twic935_15_processed.csv: 1358 games.
Processed ../Cleaned_Analyzed_Games/twic936_15_processed.csv: 2227 games.
Processed ../Cleaned_Analyzed_Games/twic937_15_processed.csv: 3095 games.
Processed ../Cleaned_Analyzed_Games/twic938_15_processed.csv: 1274 games.
Processed ../Cleaned_Analyzed_Games/twic939_15_processed.csv: 3897 games.
Processed ../Cleaned_Analyzed_Games/twic940_15_processed.csv: 2174 games.
Processed ../Cleaned_Analyzed_Games/twic941_15_processed.csv: 1987 games.
Processed ../Cleaned_Analyzed_Games/twic942_15_processed.csv: 2472 games.
Processed ../Cleaned_Analyzed_Games/twic943_15_processed.csv: 4240 games.
Processed ../Cleaned_Analyzed_Games/tw

OSError: Cannot save file into a non-existent directory: '../huge_analyzed_games'

In [1]:
import os

def combine_pgn_files(input_directory, output_file, start_num=920, end_num=1560):
    """
    Combines multiple PGN files into a single PGN file.

    Parameters:
    - input_directory (str): Directory where the PGN files are located.
    - output_file (str): Path to the combined PGN output file.
    - start_num (int): Starting number in the file names.
    - end_num (int): Ending number in the file names.
    """
    with open(output_file, 'w', encoding='utf-8') as outfile:
        for num in range(start_num, end_num + 1):
            filename = os.path.join(input_directory, f"twic{num}.pgn")
            if os.path.exists(filename):
                try:
                    with open(filename, 'r', encoding='utf-8') as infile:
                        contents = infile.read()
                        outfile.write(contents)
                        # Ensure separation between games from different files
                        outfile.write('\n\n')
                    print(f"Added {filename}")
                except Exception as e:
                    print(f"Failed to read {filename}: {e}")
            else:
                print(f"File {filename} does not exist. Skipping.")
    print(f"Combined PGN files into {output_file}")

# Example usage:
input_directory = "../utf8_games"  # Replace with the path to your PGN files
output_file = "../combined_games.pgn"          # The output file where combined PGNs will be saved
combine_pgn_files(input_directory, output_file)


Added ../utf8_games\twic920.pgn
Added ../utf8_games\twic921.pgn
Added ../utf8_games\twic922.pgn
Added ../utf8_games\twic923.pgn
Added ../utf8_games\twic924.pgn
Added ../utf8_games\twic925.pgn
Added ../utf8_games\twic926.pgn
Added ../utf8_games\twic927.pgn
Added ../utf8_games\twic928.pgn
Added ../utf8_games\twic929.pgn
Added ../utf8_games\twic930.pgn
Added ../utf8_games\twic931.pgn
Added ../utf8_games\twic932.pgn
Added ../utf8_games\twic933.pgn
Added ../utf8_games\twic934.pgn
Added ../utf8_games\twic935.pgn
Added ../utf8_games\twic936.pgn
Added ../utf8_games\twic937.pgn
Added ../utf8_games\twic938.pgn
Added ../utf8_games\twic939.pgn
Added ../utf8_games\twic940.pgn
Added ../utf8_games\twic941.pgn
Added ../utf8_games\twic942.pgn
Added ../utf8_games\twic943.pgn
Added ../utf8_games\twic944.pgn
Added ../utf8_games\twic945.pgn
Added ../utf8_games\twic946.pgn
Added ../utf8_games\twic947.pgn
Added ../utf8_games\twic948.pgn
Added ../utf8_games\twic949.pgn
Added ../utf8_games\twic950.pgn
Added ..

In [3]:
def count_games_in_pgn_file(pgn_file):
    count = 0
    with open(pgn_file, 'r', encoding='utf-8') as f:
        for line in f:
            if line.startswith('[Event '):
                count += 1
    print(f"Total number of games in {pgn_file}: {count}")
    return count

# Example usage:
pgn_file = "../combined_games.pgn"
count_games_in_pgn_file(pgn_file)


Total number of games in ../combined_games.pgn: 2806708


2806708