In [1]:
import pandas as pd
import numpy as np
import os

relative_filepath = "NBA_Data" + os.sep + "csv" + os.sep + "game.csv"
game_df = pd.read_csv(relative_filepath)

# Show all column names
print(game_df.columns)

Index(['season_id', 'team_id_home', 'team_abbreviation_home', 'team_name_home',
       'game_id', 'game_date', 'matchup_home', 'wl_home', 'min', 'fgm_home',
       'fga_home', 'fg_pct_home', 'fg3m_home', 'fg3a_home', 'fg3_pct_home',
       'ftm_home', 'fta_home', 'ft_pct_home', 'oreb_home', 'dreb_home',
       'reb_home', 'ast_home', 'stl_home', 'blk_home', 'tov_home', 'pf_home',
       'pts_home', 'plus_minus_home', 'video_available_home', 'team_id_away',
       'team_abbreviation_away', 'team_name_away', 'matchup_away', 'wl_away',
       'fgm_away', 'fga_away', 'fg_pct_away', 'fg3m_away', 'fg3a_away',
       'fg3_pct_away', 'ftm_away', 'fta_away', 'ft_pct_away', 'oreb_away',
       'dreb_away', 'reb_away', 'ast_away', 'stl_away', 'blk_away', 'tov_away',
       'pf_away', 'pts_away', 'plus_minus_away', 'video_available_away',
       'season_type'],
      dtype='object')


### Drop all null values in data that we will train on ###

In [2]:
filter_out_null_stats_df = game_df.dropna(subset=['fg3m_home', 'fg3a_home', 'fg3_pct_home', 'fg3m_away', 'fg3a_away', 'fg3_pct_away', 'oreb_home', 'dreb_home', 'ast_home', 'stl_home', 'blk_home', 'tov_home', 'pf_home', 'pts_home', 'oreb_away', 'dreb_away', 'ast_away', 'stl_away', 'blk_away', 'tov_away', 'pf_away', 'pts_away'])

### Remove All star and pre-season games, will not count those in our model ###

In [3]:
# Now let us find season type to see what we have to remove

season_types = filter_out_null_stats_df['season_type'].unique()
print(season_types)
print(len(season_types))

filtered_df = filter_out_null_stats_df.loc[~( (filter_out_null_stats_df['season_type'] == 'Pre Season') | (filter_out_null_stats_df['season_type'] == 'All Star') | (filter_out_null_stats_df['season_type'] == 'All-Star'))]

print("After filtering...")
season_types = filtered_df['season_type'].unique()
print(season_types)
print(len(season_types))

filtered_df = filtered_df.reset_index()

['Playoffs' 'All-Star' 'All Star' 'Regular Season' 'Pre Season']
5
After filtering...
['Playoffs' 'Regular Season']
2


In [4]:
print(filtered_df.columns)
print(filtered_df['season_type'].unique())

Index(['index', 'season_id', 'team_id_home', 'team_abbreviation_home',
       'team_name_home', 'game_id', 'game_date', 'matchup_home', 'wl_home',
       'min', 'fgm_home', 'fga_home', 'fg_pct_home', 'fg3m_home', 'fg3a_home',
       'fg3_pct_home', 'ftm_home', 'fta_home', 'ft_pct_home', 'oreb_home',
       'dreb_home', 'reb_home', 'ast_home', 'stl_home', 'blk_home', 'tov_home',
       'pf_home', 'pts_home', 'plus_minus_home', 'video_available_home',
       'team_id_away', 'team_abbreviation_away', 'team_name_away',
       'matchup_away', 'wl_away', 'fgm_away', 'fga_away', 'fg_pct_away',
       'fg3m_away', 'fg3a_away', 'fg3_pct_away', 'ftm_away', 'fta_away',
       'ft_pct_away', 'oreb_away', 'dreb_away', 'reb_away', 'ast_away',
       'stl_away', 'blk_away', 'tov_away', 'pf_away', 'pts_away',
       'plus_minus_away', 'video_available_away', 'season_type'],
      dtype='object')
['Playoffs' 'Regular Season']


### Update all outdated team names to new ones ###

In [5]:
## Convert old team abbreviations to new
name_abbreviation = filtered_df.groupby(['team_name_home', 'team_abbreviation_home']).agg({'game_id': 'count'}).reset_index()
id_abbreviation = filtered_df.groupby(['team_id_home', 'team_abbreviation_home']).agg({'game_id': 'count'}).reset_index()
id_abbreviation_away = filtered_df.groupby(['team_id_away', 'team_abbreviation_away']).agg({'game_id': 'count'}).reset_index()

abbreviation_correction_mapping_dict = {
    'CHH' : 'CHA',
    'GOS' : 'GSW',
    'NJN' : 'BKN',
    'NOH' : 'NOP',
    'NOK' : 'NOP',
    'PHL' : 'PHI',
    'SEA' : 'OKC',
    'SAN' : 'SAS',
    'UTH' : 'UTA',
    'VAN' : 'MEM'
}

id_abbreviation['team_abbreviation_home'] = id_abbreviation['team_abbreviation_home'].replace(abbreviation_correction_mapping_dict)
id_abbreviation = id_abbreviation.groupby(['team_id_home', 'team_abbreviation_home']).sum('game_id').reset_index()

id_abbreviation_away['team_abbreviation_away'] = id_abbreviation_away['team_abbreviation_away'].replace(abbreviation_correction_mapping_dict)
id_abbreviation_away = id_abbreviation_away.groupby(['team_id_away', 'team_abbreviation_away']).sum('game_id').reset_index()

# print(id_abbreviation)
# print(id_abbreviation_away)

teamID_abbreviation_df = id_abbreviation.drop('game_id', axis=1)
teamID_abbreviation_df = teamID_abbreviation_df.set_index('team_id_home')
teamID_abbreviation_dict = teamID_abbreviation_df.to_dict(orient='index')
teamID_abbreviation_dict = { k: v['team_abbreviation_home'] for k, v in teamID_abbreviation_dict.items() }
teamID_newID_dict = { real_team_id : new_team_id  for new_team_id, (real_team_id, team_abbrev) in enumerate(teamID_abbreviation_dict.items()) }

print(teamID_abbreviation_dict)
print(teamID_newID_dict)

teamID_mappings = {'teamID_abbreviation_dict' : teamID_abbreviation_dict, 'teamID_newID_dict': teamID_newID_dict}


{1610612737: 'ATL', 1610612738: 'BOS', 1610612739: 'CLE', 1610612740: 'NOP', 1610612741: 'CHI', 1610612742: 'DAL', 1610612743: 'DEN', 1610612744: 'GSW', 1610612745: 'HOU', 1610612746: 'LAC', 1610612747: 'LAL', 1610612748: 'MIA', 1610612749: 'MIL', 1610612750: 'MIN', 1610612751: 'BKN', 1610612752: 'NYK', 1610612753: 'ORL', 1610612754: 'IND', 1610612755: 'PHI', 1610612756: 'PHX', 1610612757: 'POR', 1610612758: 'SAC', 1610612759: 'SAS', 1610612760: 'OKC', 1610612761: 'TOR', 1610612762: 'UTA', 1610612763: 'MEM', 1610612764: 'WAS', 1610612765: 'DET', 1610612766: 'CHA'}
{1610612737: 0, 1610612738: 1, 1610612739: 2, 1610612740: 3, 1610612741: 4, 1610612742: 5, 1610612743: 6, 1610612744: 7, 1610612745: 8, 1610612746: 9, 1610612747: 10, 1610612748: 11, 1610612749: 12, 1610612750: 13, 1610612751: 14, 1610612752: 15, 1610612753: 16, 1610612754: 17, 1610612755: 18, 1610612756: 19, 1610612757: 20, 1610612758: 21, 1610612759: 22, 1610612760: 23, 1610612761: 24, 1610612762: 25, 1610612763: 26, 161061

### Calculate Rest Days ###

In [6]:
# Now let us calculate the rest days ##

# First we will order the games by game date, and change all season_id to season year
filtered_df['game_date'] = pd.to_datetime(filtered_df['game_date'])
filtered_df = filtered_df.sort_values(by='game_date').reset_index(drop=True)

# Just get season id, game date, and season type dataframe
ordered_season_df = filtered_df['season_id'].drop_duplicates().to_frame().reset_index(drop=True)

# Remove first digit from season id, so that all season id just becomes the year of which the season is played
ordered_season_df['new_season_id'] = ordered_season_df['season_id'].astype(str).str[1:].astype(int)
ordered_season_dict = dict(zip(ordered_season_df['season_id'], ordered_season_df['new_season_id']))
filtered_df['new_season_id'] = filtered_df['season_id'].replace(ordered_season_dict)

In [7]:
# Create dataframe with just team, game date,home/away status, season_id
game_hometeams_df = filtered_df[['new_season_id', 'game_date' , 'team_id_home' ]]
game_hometeams_df.rename(columns ={'team_id_home' :'team_id'}, inplace=True)
game_hometeams_df.loc[:,'team_status'] = 'home'

game_awayteams_df = filtered_df[['new_season_id', 'game_date' , 'team_id_away' ]]
game_awayteams_df.rename(columns ={'team_id_away' :'team_id'}, inplace=True)
game_awayteams_df.loc[:, 'team_status'] = 'away'

games_df = pd.concat([game_hometeams_df, game_awayteams_df]).reset_index(drop=True)
games_df.sort_values(by=['team_id', 'new_season_id',  'game_date'], inplace=True, ignore_index=True)
games_df['previous_game_date'] = games_df.groupby(by=['team_id', 'new_season_id'])[['game_date']].shift(1)

games_df['rest_days'] = (games_df.loc[:,'game_date'] - games_df.loc[:,'previous_game_date']).dt.days

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  game_hometeams_df.rename(columns ={'team_id_home' :'team_id'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  game_hometeams_df.loc[:,'team_status'] = 'home'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  game_awayteams_df.rename(columns ={'team_id_away' :'team_id'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value in

In [8]:
home_games_df = games_df.loc[games_df['team_status'] == 'home'].reset_index(drop=True)
away_games_df = games_df.loc[games_df['team_status'] == 'away'].reset_index(drop=True)

home_games_df.rename(columns={'rest_days' : 'rest_days_home'}, inplace=True)
away_games_df.rename(columns={'rest_days' : 'rest_days_away'}, inplace=True)
home_games_df.rename(columns={'team_id' : 'team_id_home'}, inplace=True)
away_games_df.rename(columns={'team_id' : 'team_id_away'}, inplace=True)

home_games_df = home_games_df.drop(columns=['team_status', 'previous_game_date'])
away_games_df = away_games_df.drop(columns=['team_status', 'previous_game_date'])

In [9]:
pd.set_option('display.max_columns', None)
filtered_df = filtered_df.merge(right=home_games_df, how='left', left_on=['new_season_id', 'game_date', 'team_id_home'], right_on=['new_season_id', 'game_date', 'team_id_home'])
filtered_df = filtered_df.merge(right=away_games_df, how='left' , left_on=['new_season_id', 'game_date', 'team_id_away'], right_on=['new_season_id', 'game_date', 'team_id_away'])

filtered_df[['rest_days_home', 'rest_days_away']] = filtered_df[['rest_days_home', 'rest_days_away']].fillna(0)

In [10]:
print(filtered_df.columns)

Index(['index', 'season_id', 'team_id_home', 'team_abbreviation_home',
       'team_name_home', 'game_id', 'game_date', 'matchup_home', 'wl_home',
       'min', 'fgm_home', 'fga_home', 'fg_pct_home', 'fg3m_home', 'fg3a_home',
       'fg3_pct_home', 'ftm_home', 'fta_home', 'ft_pct_home', 'oreb_home',
       'dreb_home', 'reb_home', 'ast_home', 'stl_home', 'blk_home', 'tov_home',
       'pf_home', 'pts_home', 'plus_minus_home', 'video_available_home',
       'team_id_away', 'team_abbreviation_away', 'team_name_away',
       'matchup_away', 'wl_away', 'fgm_away', 'fga_away', 'fg_pct_away',
       'fg3m_away', 'fg3a_away', 'fg3_pct_away', 'ftm_away', 'fta_away',
       'ft_pct_away', 'oreb_away', 'dreb_away', 'reb_away', 'ast_away',
       'stl_away', 'blk_away', 'tov_away', 'pf_away', 'pts_away',
       'plus_minus_away', 'video_available_away', 'season_type',
       'new_season_id', 'rest_days_home', 'rest_days_away'],
      dtype='object')


### Remove unnecessary colums ###

In [11]:
filtered_df = filtered_df[['new_season_id', 'game_id', 'game_date', 
    'season_id', 'team_id_home', 'wl_home',
    'fgm_home', 'fga_home', 'fg_pct_home', 'fg3m_home', 'fg3a_home',
    'fg3_pct_home', 'ftm_home', 'fta_home', 'ft_pct_home', 'oreb_home',
    'dreb_home', 'reb_home', 'ast_home', 'stl_home', 'blk_home', 'tov_home',
    'pf_home', 'pts_home', 'rest_days_home',
    'team_id_away', 'wl_away', 'fgm_away', 'fga_away', 'fg_pct_away',
    'fg3m_away', 'fg3a_away', 'fg3_pct_away', 'ftm_away', 'fta_away',
    'ft_pct_away', 'oreb_away', 'dreb_away', 'reb_away', 'ast_away',
    'stl_away', 'blk_away', 'tov_away', 
    'pf_away', 'pts_away', 'rest_days_away',
    'season_type']]

In [12]:
print(filtered_df.columns)

Index(['new_season_id', 'game_id', 'game_date', 'season_id', 'team_id_home',
       'wl_home', 'fgm_home', 'fga_home', 'fg_pct_home', 'fg3m_home',
       'fg3a_home', 'fg3_pct_home', 'ftm_home', 'fta_home', 'ft_pct_home',
       'oreb_home', 'dreb_home', 'reb_home', 'ast_home', 'stl_home',
       'blk_home', 'tov_home', 'pf_home', 'pts_home', 'rest_days_home',
       'team_id_away', 'wl_away', 'fgm_away', 'fga_away', 'fg_pct_away',
       'fg3m_away', 'fg3a_away', 'fg3_pct_away', 'ftm_away', 'fta_away',
       'ft_pct_away', 'oreb_away', 'dreb_away', 'reb_away', 'ast_away',
       'stl_away', 'blk_away', 'tov_away', 'pf_away', 'pts_away',
       'rest_days_away', 'season_type'],
      dtype='object')


In [13]:
print(filtered_df.head(5))

   new_season_id   game_id  game_date  season_id  team_id_home wl_home  \
0           1979  47900044 1980-05-07      41979    1610612747       L   
1           1979  47900045 1980-05-10      41979    1610612755       L   
2           1979  47900048 1980-05-16      41979    1610612755       L   
3           1980  48000048 1981-05-05      41980    1610612738       W   
4           1980  48000049 1981-05-07      41980    1610612738       L   

   fgm_home  fga_home  fg_pct_home  fg3m_home  fg3a_home  fg3_pct_home  \
0      48.0      95.0        0.505        0.0        1.0          0.00   
1      45.0      93.0        0.484        1.0        4.0          0.25   
2      47.0      89.0        0.528        0.0        6.0          0.00   
3      41.0      95.0        0.432        0.0        1.0          0.00   
4      41.0      82.0        0.500        0.0        3.0          0.00   

   ftm_home  fta_home  ft_pct_home  oreb_home  dreb_home  reb_home  ast_home  \
0       8.0      12.0        0

### Replace Win/Loss columns with int (1 for win, 0 for loss). Replace season type with int (1 for playoff, 0 for regular season) ###

In [14]:
filtered_df['wl_home'] = filtered_df['wl_home'].replace({'W': 1, 'L': 0})
filtered_df['wl_away'] = filtered_df['wl_away'].replace({'W': 1, 'L': 0})
filtered_df['season_type'] = filtered_df['season_type'].replace({'Regular Season': 0, 'Playoffs': 1})

  filtered_df['wl_home'] = filtered_df['wl_home'].replace({'W': 1, 'L': 0})
  filtered_df['wl_away'] = filtered_df['wl_away'].replace({'W': 1, 'L': 0})
  filtered_df['season_type'] = filtered_df['season_type'].replace({'Regular Season': 0, 'Playoffs': 1})


In [15]:
# filtered_df[['team_id_home', 'team_id_away']] = filtered_df[['team_id_home', 'team_id_away']].replace(teamID_newID_dict)

print(filtered_df.head(5))

   new_season_id   game_id  game_date  season_id  team_id_home  wl_home  \
0           1979  47900044 1980-05-07      41979    1610612747        0   
1           1979  47900045 1980-05-10      41979    1610612755        0   
2           1979  47900048 1980-05-16      41979    1610612755        0   
3           1980  48000048 1981-05-05      41980    1610612738        1   
4           1980  48000049 1981-05-07      41980    1610612738        0   

   fgm_home  fga_home  fg_pct_home  fg3m_home  fg3a_home  fg3_pct_home  \
0      48.0      95.0        0.505        0.0        1.0          0.00   
1      45.0      93.0        0.484        1.0        4.0          0.25   
2      47.0      89.0        0.528        0.0        6.0          0.00   
3      41.0      95.0        0.432        0.0        1.0          0.00   
4      41.0      82.0        0.500        0.0        3.0          0.00   

   ftm_home  fta_home  ft_pct_home  oreb_home  dreb_home  reb_home  ast_home  \
0       8.0      12.0   

### Convert data to team centric data (change perspective from home vs away to primary team vs opposing team):  each sample will have a is-home flag on primary team ###

In [16]:
# Create subject centric dataframe from home team perspective
home_team_subject_centric_df = filtered_df
new_columns = {
        col: col.replace('_home', '_primary').replace('_away', '_opposing')
        for col in home_team_subject_centric_df.columns
    }
home_team_subject_centric_df = home_team_subject_centric_df.rename(columns=new_columns)

# Create subject centric dataframe from away team perspective
away_team_subject_centric_df = filtered_df
new_columns = {
        col: col.replace('_home', '_opposing').replace('_away', '_primary')
        for col in away_team_subject_centric_df.columns
    }
away_team_subject_centric_df = away_team_subject_centric_df.rename(columns=new_columns)

# Add is_home flag
home_team_subject_centric_df[['subject_is_home']] = 1
away_team_subject_centric_df[['subject_is_home']] = 0

# Reorder columns for consistency
away_team_subject_centric_df = away_team_subject_centric_df[home_team_subject_centric_df.columns]

# Concatenate home and away team subject centric dataframes
subject_centric_df = pd.concat([home_team_subject_centric_df, away_team_subject_centric_df], ignore_index=True)
subject_centric_df = subject_centric_df.sort_values(by=['team_id_primary', 'season_id', 'game_date']).reset_index(drop=True)

print(subject_centric_df.head(5))

   new_season_id   game_id  game_date  season_id  team_id_primary  wl_primary  \
0           1985  28500005 1985-10-25      21985       1610612737           0   
1           1985  28500013 1985-10-26      21985       1610612737           0   
2           1985  28500030 1985-10-29      21985       1610612737           1   
3           1985  28500045 1985-11-01      21985       1610612737           0   
4           1985  28500060 1985-11-05      21985       1610612737           0   

   fgm_primary  fga_primary  fg_pct_primary  fg3m_primary  fg3a_primary  \
0         41.0         92.0           0.446           0.0           3.0   
1         27.0         70.0           0.386           0.0           2.0   
2         37.0         86.0           0.430           1.0           3.0   
3         41.0         90.0           0.456           1.0           6.0   
4         41.0         81.0           0.506           0.0           2.0   

   fg3_pct_primary  ftm_primary  fta_primary  ft_pct_primary  

### Drop season_id, we will use new_season_id for dataset creation. Encode team_id to smaller number for training sake ###

In [17]:
subject_centric_df = subject_centric_df.drop(columns=['season_id', 'game_id', 'wl_opposing'])

subject_centric_df[['team_id_primary', 'team_id_opposing']] = subject_centric_df[['team_id_primary', 'team_id_opposing']].replace(teamID_newID_dict)

print(subject_centric_df.head(5))

   new_season_id  game_date  team_id_primary  wl_primary  fgm_primary  \
0           1985 1985-10-25                0           0         41.0   
1           1985 1985-10-26                0           0         27.0   
2           1985 1985-10-29                0           1         37.0   
3           1985 1985-11-01                0           0         41.0   
4           1985 1985-11-05                0           0         41.0   

   fga_primary  fg_pct_primary  fg3m_primary  fg3a_primary  fg3_pct_primary  \
0         92.0           0.446           0.0           3.0            0.000   
1         70.0           0.386           0.0           2.0            0.000   
2         86.0           0.430           1.0           3.0            0.333   
3         90.0           0.456           1.0           6.0            0.167   
4         81.0           0.506           0.0           2.0            0.000   

   ftm_primary  fta_primary  ft_pct_primary  oreb_primary  dreb_primary  \
0          

### Create Temporal Dataset ###

In [18]:
# Function to get the last n games of a team

def get_last_n_games(df_team_games, team_id, current_date, season_id, window_size=5):
    past_games = df_team_games[
        (df_team_games['team_id_primary'] == team_id) &
        (df_team_games['new_season_id'] == season_id) &
        (df_team_games['game_date'] < current_date)
    ].sort_values('game_date', ascending=False).head(window_size)

    return past_games.sort_values('game_date') if len(past_games) == window_size else None

In [19]:
from tqdm.notebook import tqdm

# Create temporal data

# For storing input and labels
X_primary = []
X_opposing = []
y = []

# Set window size
windowsize = 5

# Loop through each game
for _, row in tqdm(subject_centric_df.iterrows(), total=len(subject_centric_df)):
    team_id_primary = row['team_id_primary']
    team_id_opposing = row['team_id_opposing']
    season_id = row['new_season_id']
    current_date = row['game_date']
    
    # Get history for both teams
    primary_history = get_last_n_games(subject_centric_df, team_id_primary, current_date, season_id, window_size=windowsize)
    opposing_history = get_last_n_games(subject_centric_df, team_id_opposing, current_date, season_id, window_size=windowsize)
    
    # Only use this sample if both teams have enough games
    if primary_history is not None and opposing_history is not None:
        # print(primary_history.drop(columns=['game_date', 'new_season_id', 'wl_primary']).columns)
        # Flatten stats of both teams (excluding team_id, date, label columns)
        features_primary = primary_history.drop(columns=['game_date', 'new_season_id', 'wl_primary']).values
        features_opposing = opposing_history.drop(columns=['game_date', 'new_season_id', 'wl_primary']).values
        labels = np.array([row['wl_primary']])

        # print(features_primary.shape)
        # print(features_opposing.shape)
        # print(labels.shape)

        features_primary = np.array(features_primary)
        features_opposing = np.array(features_opposing)
        
        # Append to dataset
        X_primary.append(features_primary)
        X_opposing.append(features_opposing)
        y.append(labels)

print("Number of samples:", len(X_primary))
print("X_primary shape:", np.array(X_primary).shape)
print("X_opposing shape:", np.array(X_opposing).shape)
print("y shape:", np.array(y).shape)




  0%|          | 0/89098 [00:00<?, ?it/s]

Number of samples: 83178
X_primary shape: (83178, 5, 42)
X_opposing shape: (83178, 5, 42)
y shape: (83178, 1)


### Make sure columns and shapes line up ###

In [21]:
X_primary = np.array(X_primary)
X_opposing = np.array(X_opposing)
y = np.array(y)

print("X_primary shape:", X_primary.shape)
print("X_opposing shape:", X_opposing.shape)
print("y shape:", y.shape)

X_primary shape: (83178, 5, 42)
X_opposing shape: (83178, 5, 42)
y shape: (83178, 1)


In [22]:
print(len(list(subject_centric_df.drop(columns=['game_date', 'new_season_id', 'wl_primary']).columns)))

42


### Package dataset and save into pickle file ###

In [24]:
import pickle
import numpy as np

data_bundle = {
    "X_primary": X_primary,
    "X_opposing": X_opposing,
    "y": y,
    "X_cols": list(subject_centric_df.drop(columns=['game_date', 'new_season_id', 'wl_primary']).columns),
    "y_col": 'wl_primary',
    "windowsize" : windowsize, 
    "note": (
        "Temporal dataset for time sequence modeling. "
        "Each sample has a shape of [window_size x num_features] for both primary and opposing teams. "
        "Shapes: X_primary and X_opposing = [num_samples, window_size, num_features], "
        "y = [num_samples, 1]."
        )
}

# Save
with open("NBA_temporal_dataset.pkl", "wb") as f:
    pickle.dump(data_bundle, f)