### Combining Dataframes

Author: Justin
Date: 07/30/2025

This notebook simply helped me combine the data from each season together to be more easily worked as I wrote
other scripts. You don't need to run this notebook, but it is here for reference.

In [11]:
import os
import pandas as pd

In [12]:
def combine_csvs_for_season(season_path, league):
    """Combine all CSV files in a season directory into a single DataFrame"""
    csv_files = [f for f in os.listdir(season_path) if f.endswith('_pbp.csv') and f.startswith(league)]
    season_df = pd.DataFrame()

    # Read each team-season CSV file and append to the full season DataFrame
    for csv_file in csv_files:
        team_season_csv_path = os.path.join(season_path, csv_file)
        team_season_df = pd.read_csv(team_season_csv_path)

        # Only concat rows with GAME_ID that doesn't already exist in the season_df
        if not season_df.empty:
            team_season_df = team_season_df[~team_season_df['GAME_ID'].isin(season_df['GAME_ID'])]

        season_df = pd.concat([season_df, team_season_df], ignore_index=True)

    # Save the combined DataFrame to a new CSV file
    combined_csv_path = os.path.join(season_path, f'combined_{os.path.basename(season_path)}.csv')
    season_df.to_csv(combined_csv_path, index=False)
    print(f"Combined data for season: {os.path.basename(season_path)} saved to {combined_csv_path}")
    return season_df

In [13]:
def combine_all_seasons(league_dir, league):
    """Combine all CSV files for each season in the league directory"""
    season_paths = [os.path.join(league_dir, season_dir) for season_dir in os.listdir(league_dir) if os.path.isdir(os.path.join(league_dir, season_dir))]
    for season_path in season_paths:
        print(f"Combining data for season: {os.path.basename(season_path)}")
        combine_csvs_for_season(season_path, league)

In [14]:
CHOSEN_LEAGUE = 'wnba'
PBP_PATH = f'../pbp_data/{CHOSEN_LEAGUE}/'

# combine_csvs_for_season(f'{PBP_PATH}/1996-97', CHOSEN_LEAGUE) # For one season
combine_all_seasons(PBP_PATH, CHOSEN_LEAGUE) # For all seasons

Combining data for season: 1997
Combined data for season: 1997 saved to ../pbp_data/wnba/1997\combined_1997.csv
Combining data for season: 1998
Combined data for season: 1998 saved to ../pbp_data/wnba/1998\combined_1998.csv
Combining data for season: 1999
Combined data for season: 1999 saved to ../pbp_data/wnba/1999\combined_1999.csv
Combining data for season: 2000
Combined data for season: 2000 saved to ../pbp_data/wnba/2000\combined_2000.csv
Combining data for season: 2001
Combined data for season: 2001 saved to ../pbp_data/wnba/2001\combined_2001.csv
Combining data for season: 2002
Combined data for season: 2002 saved to ../pbp_data/wnba/2002\combined_2002.csv
Combining data for season: 2003
Combined data for season: 2003 saved to ../pbp_data/wnba/2003\combined_2003.csv
Combining data for season: 2012
Combined data for season: 2012 saved to ../pbp_data/wnba/2012\combined_2012.csv
Combining data for season: 2013
Combined data for season: 2013 saved to ../pbp_data/wnba/2013\combined_2

  team_season_df = pd.read_csv(team_season_csv_path)


Combined data for season: 2017 saved to ../pbp_data/wnba/2017\combined_2017.csv
Combining data for season: 2018


  team_season_df = pd.read_csv(team_season_csv_path)
  team_season_df = pd.read_csv(team_season_csv_path)
  team_season_df = pd.read_csv(team_season_csv_path)


Combined data for season: 2018 saved to ../pbp_data/wnba/2018\combined_2018.csv
Combining data for season: 2019
Combined data for season: 2019 saved to ../pbp_data/wnba/2019\combined_2019.csv
Combining data for season: 2020
Combined data for season: 2020 saved to ../pbp_data/wnba/2020\combined_2020.csv
Combining data for season: 2021
Combined data for season: 2021 saved to ../pbp_data/wnba/2021\combined_2021.csv
Combining data for season: 2022
Combined data for season: 2022 saved to ../pbp_data/wnba/2022\combined_2022.csv
Combining data for season: 2023
Combined data for season: 2023 saved to ../pbp_data/wnba/2023\combined_2023.csv
Combining data for season: 2024
Combined data for season: 2024 saved to ../pbp_data/wnba/2024\combined_2024.csv
