In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests

In [2]:
df = pd.read_pickle('raw_games_5yrs.pkl')

In [3]:
df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])

In [4]:
df = df[df['GAME_DATE'] > "2022-09-01"].sort_values(by='GAME_DATE', ascending=False)

In [5]:
# games.groupby('TEAM_ID').apply(lambda x: x)

In [6]:
df.reset_index(drop=True, inplace=True)

In [7]:
map_id_name = df[['TEAM_ID', 'TEAM_NAME']].drop_duplicates().reset_index(drop=True)

In [8]:
map_id_name

Unnamed: 0,TEAM_ID,TEAM_NAME
0,1610612760,Oklahoma City Thunder
1,1610612754,Indiana Pacers
2,1610612764,Washington Wizards
3,1610612749,Milwaukee Bucks
4,1610612762,Utah Jazz
5,1610612746,LA Clippers
6,1610612751,Brooklyn Nets
7,1610612752,New York Knicks
8,1610612745,Houston Rockets
9,1610612753,Orlando Magic


In [9]:
df.dtypes.value_counts()

int64             13
float64            9
object             5
int32              1
datetime64[ns]     1
dtype: int64

In [10]:
feat_categorical_nunique = df.select_dtypes(include='object').nunique()

In [11]:
feat_categorical_nunique

SEASON_ID               2
TEAM_ABBREVIATION      30
TEAM_NAME              30
MATCHUP              1517
WL                      2
dtype: int64

In [12]:
df.sort_values(['GAME_DATE', 'GAME_ID', 'HOME_TEAM'], ascending=[False, False, False], inplace=True)

In [13]:
value_counts = df['GAME_ID'].value_counts()

In [14]:
value_counts

22200973    2
22200259    2
22200289    2
22200288    2
22200287    2
           ..
12200027    1
12200008    1
12200025    1
12200002    1
12200038    1
Name: GAME_ID, Length: 1040, dtype: int64

In [15]:
unique_values = value_counts[value_counts == 1].index.tolist()

In [16]:
unique_values

[12200027, 12200008, 12200025, 12200002, 12200038]

In [17]:
df = df[~df['GAME_ID'].isin(unique_values)]

In [18]:
df = df.reset_index(drop=True)

In [19]:
df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME_TEAM
0,22022,1610612764,WAS,Washington Wizards,22200973,2023-03-05,WAS vs. MIL,L,239,111,...,20.0,36.0,56.0,21,8.0,2,9,19,-6.0,1
1,22022,1610612749,MIL,Milwaukee Bucks,22200973,2023-03-05,MIL @ WAS,W,240,117,...,8.0,38.0,46.0,30,5.0,6,12,15,6.0,0
2,22022,1610612746,LAC,LA Clippers,22200970,2023-03-05,LAC vs. MEM,W,239,135,...,13.0,40.0,53.0,25,6.0,2,17,17,6.0,1
3,22022,1610612763,MEM,Memphis Grizzlies,22200970,2023-03-05,MEM @ LAC,L,240,129,...,2.0,24.0,26.0,32,13.0,3,8,23,-6.0,0
4,22022,1610612738,BOS,Boston Celtics,22200969,2023-03-05,BOS vs. NYK,L,289,129,...,17.0,37.0,54.0,27,8.0,5,16,28,-2.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2065,12022,1610612764,WAS,Washington Wizards,12200004,2022-10-02,WAS @ GSW,L,240,95,...,12.0,34.0,46.0,23,9.0,2,16,23,-9.0,0
2066,12022,1610612749,MIL,Milwaukee Bucks,12200003,2022-10-01,MIL vs. MEM,L,240,102,...,17.0,33.0,50.0,22,10.0,3,23,29,-5.0,1
2067,12022,1610612763,MEM,Memphis Grizzlies,12200003,2022-10-01,MEM @ MIL,W,239,107,...,10.0,31.0,41.0,25,13.0,4,22,19,5.0,0
2068,12022,1610612764,WAS,Washington Wizards,12200001,2022-09-30,WAS vs. GSW,L,240,87,...,7.0,37.0,44.0,20,12.0,10,14,27,-9.0,1


## PREPROCESS

In [20]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

In [21]:
scaler = MinMaxScaler()

In [22]:
selected_columns = ['FG_PCT', 'FG3_PCT', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF']

In [23]:
df[selected_columns] = scaler.fit_transform(df[selected_columns])

In [24]:
df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME_TEAM
0,22022,1610612764,WAS,Washington Wizards,22200973,2023-03-05,WAS vs. MIL,L,239,111,...,0.689655,0.454545,0.66,0.28125,0.40,0.105263,0.28,0.407407,-6.0,1
1,22022,1610612749,MIL,Milwaukee Bucks,22200973,2023-03-05,MIL @ WAS,W,240,117,...,0.275862,0.500000,0.46,0.56250,0.25,0.315789,0.40,0.259259,6.0,0
2,22022,1610612746,LAC,LA Clippers,22200970,2023-03-05,LAC vs. MEM,W,239,135,...,0.448276,0.545455,0.60,0.40625,0.30,0.105263,0.60,0.333333,6.0,1
3,22022,1610612763,MEM,Memphis Grizzlies,22200970,2023-03-05,MEM @ LAC,L,240,129,...,0.068966,0.181818,0.06,0.62500,0.65,0.157895,0.24,0.555556,-6.0,0
4,22022,1610612738,BOS,Boston Celtics,22200969,2023-03-05,BOS vs. NYK,L,289,129,...,0.586207,0.477273,0.62,0.46875,0.40,0.263158,0.56,0.740741,-2.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2065,12022,1610612764,WAS,Washington Wizards,12200004,2022-10-02,WAS @ GSW,L,240,95,...,0.413793,0.409091,0.46,0.34375,0.45,0.105263,0.56,0.555556,-9.0,0
2066,12022,1610612749,MIL,Milwaukee Bucks,12200003,2022-10-01,MIL vs. MEM,L,240,102,...,0.586207,0.386364,0.54,0.31250,0.50,0.157895,0.84,0.777778,-5.0,1
2067,12022,1610612763,MEM,Memphis Grizzlies,12200003,2022-10-01,MEM @ MIL,W,239,107,...,0.344828,0.340909,0.36,0.40625,0.65,0.210526,0.80,0.407407,5.0,0
2068,12022,1610612764,WAS,Washington Wizards,12200001,2022-09-30,WAS vs. GSW,L,240,87,...,0.241379,0.477273,0.42,0.25000,0.60,0.526316,0.48,0.703704,-9.0,1


In [25]:
ohe = OneHotEncoder(sparse=False)
ohe.fit_transform(df[['TEAM_ABBREVIATION']])
df[ohe.get_feature_names_out()] = ohe.transform(df[['TEAM_ABBREVIATION']])



In [26]:
df.drop(columns = ["TEAM_ABBREVIATION"], inplace = True)

In [27]:
df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,...,TEAM_ABBREVIATION_OKC,TEAM_ABBREVIATION_ORL,TEAM_ABBREVIATION_PHI,TEAM_ABBREVIATION_PHX,TEAM_ABBREVIATION_POR,TEAM_ABBREVIATION_SAC,TEAM_ABBREVIATION_SAS,TEAM_ABBREVIATION_TOR,TEAM_ABBREVIATION_UTA,TEAM_ABBREVIATION_WAS
0,22022,1610612764,Washington Wizards,22200973,2023-03-05,WAS vs. MIL,L,239,111,46,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,22022,1610612749,Milwaukee Bucks,22200973,2023-03-05,MIL @ WAS,W,240,117,39,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,22022,1610612746,LA Clippers,22200970,2023-03-05,LAC vs. MEM,W,239,135,45,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,22022,1610612763,Memphis Grizzlies,22200970,2023-03-05,MEM @ LAC,L,240,129,50,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,22022,1610612738,Boston Celtics,22200969,2023-03-05,BOS vs. NYK,L,289,129,43,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2065,12022,1610612764,Washington Wizards,12200004,2022-10-02,WAS @ GSW,L,240,95,30,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2066,12022,1610612749,Milwaukee Bucks,12200003,2022-10-01,MIL vs. MEM,L,240,102,37,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2067,12022,1610612763,Memphis Grizzlies,12200003,2022-10-01,MEM @ MIL,W,239,107,38,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2068,12022,1610612764,Washington Wizards,12200001,2022-09-30,WAS vs. GSW,L,240,87,31,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


## COMBINING ROWS

In [28]:
df_in_process = df.drop(columns=['SEASON_ID', 'WL', 'MIN', 'MATCHUP', 'PTS', 'TEAM_ID', 'TEAM_NAME', 'FTM', 'FTA', 'FGM', 'FGA', 'FG3M', 'FG3A', 'HOME_TEAM'])

In [29]:
home_df = df_in_process.iloc[::2].reset_index(drop=True)

In [30]:
away_df = df_in_process.iloc[1::2].reset_index(drop=True)

In [31]:
new_column_names = {}
for col in home_df.columns:
    new_column_names[col] = col + '_h'

# rename the columns using the dictionary
home_df = home_df.rename(columns=new_column_names)

In [32]:
new_column_names = {}
for col in away_df.columns:
    new_column_names[col] = col + '_a'

# rename the columns using the dictionary
away_df = away_df.rename(columns=new_column_names)

In [33]:
home_df.columns

Index(['GAME_ID_h', 'GAME_DATE_h', 'FG_PCT_h', 'FG3_PCT_h', 'FT_PCT_h',
       'OREB_h', 'DREB_h', 'REB_h', 'AST_h', 'STL_h', 'BLK_h', 'TOV_h', 'PF_h',
       'PLUS_MINUS_h', 'TEAM_ABBREVIATION_ATL_h', 'TEAM_ABBREVIATION_BKN_h',
       'TEAM_ABBREVIATION_BOS_h', 'TEAM_ABBREVIATION_CHA_h',
       'TEAM_ABBREVIATION_CHI_h', 'TEAM_ABBREVIATION_CLE_h',
       'TEAM_ABBREVIATION_DAL_h', 'TEAM_ABBREVIATION_DEN_h',
       'TEAM_ABBREVIATION_DET_h', 'TEAM_ABBREVIATION_GSW_h',
       'TEAM_ABBREVIATION_HOU_h', 'TEAM_ABBREVIATION_IND_h',
       'TEAM_ABBREVIATION_LAC_h', 'TEAM_ABBREVIATION_LAL_h',
       'TEAM_ABBREVIATION_MEM_h', 'TEAM_ABBREVIATION_MIA_h',
       'TEAM_ABBREVIATION_MIL_h', 'TEAM_ABBREVIATION_MIN_h',
       'TEAM_ABBREVIATION_NOP_h', 'TEAM_ABBREVIATION_NYK_h',
       'TEAM_ABBREVIATION_OKC_h', 'TEAM_ABBREVIATION_ORL_h',
       'TEAM_ABBREVIATION_PHI_h', 'TEAM_ABBREVIATION_PHX_h',
       'TEAM_ABBREVIATION_POR_h', 'TEAM_ABBREVIATION_SAC_h',
       'TEAM_ABBREVIATION_SAS_h', 'TEAM

In [34]:
away_df.columns

Index(['GAME_ID_a', 'GAME_DATE_a', 'FG_PCT_a', 'FG3_PCT_a', 'FT_PCT_a',
       'OREB_a', 'DREB_a', 'REB_a', 'AST_a', 'STL_a', 'BLK_a', 'TOV_a', 'PF_a',
       'PLUS_MINUS_a', 'TEAM_ABBREVIATION_ATL_a', 'TEAM_ABBREVIATION_BKN_a',
       'TEAM_ABBREVIATION_BOS_a', 'TEAM_ABBREVIATION_CHA_a',
       'TEAM_ABBREVIATION_CHI_a', 'TEAM_ABBREVIATION_CLE_a',
       'TEAM_ABBREVIATION_DAL_a', 'TEAM_ABBREVIATION_DEN_a',
       'TEAM_ABBREVIATION_DET_a', 'TEAM_ABBREVIATION_GSW_a',
       'TEAM_ABBREVIATION_HOU_a', 'TEAM_ABBREVIATION_IND_a',
       'TEAM_ABBREVIATION_LAC_a', 'TEAM_ABBREVIATION_LAL_a',
       'TEAM_ABBREVIATION_MEM_a', 'TEAM_ABBREVIATION_MIA_a',
       'TEAM_ABBREVIATION_MIL_a', 'TEAM_ABBREVIATION_MIN_a',
       'TEAM_ABBREVIATION_NOP_a', 'TEAM_ABBREVIATION_NYK_a',
       'TEAM_ABBREVIATION_OKC_a', 'TEAM_ABBREVIATION_ORL_a',
       'TEAM_ABBREVIATION_PHI_a', 'TEAM_ABBREVIATION_PHX_a',
       'TEAM_ABBREVIATION_POR_a', 'TEAM_ABBREVIATION_SAC_a',
       'TEAM_ABBREVIATION_SAS_a', 'TEAM

In [35]:
home_df = home_df.rename(columns={'GAME_ID_h': 'GAME_ID'})

In [36]:
away_df = away_df.rename(columns={'GAME_ID_a': 'GAME_ID'})

In [37]:
away_df = away_df.rename(columns={'GAME_DATE_h': 'GAME_DATE'})

In [38]:
away_df = away_df.drop(columns=['PLUS_MINUS_a', 'GAME_DATE_a'])

In [39]:
home_df

Unnamed: 0,GAME_ID,GAME_DATE_h,FG_PCT_h,FG3_PCT_h,FT_PCT_h,OREB_h,DREB_h,REB_h,AST_h,STL_h,...,TEAM_ABBREVIATION_OKC_h,TEAM_ABBREVIATION_ORL_h,TEAM_ABBREVIATION_PHI_h,TEAM_ABBREVIATION_PHX_h,TEAM_ABBREVIATION_POR_h,TEAM_ABBREVIATION_SAC_h,TEAM_ABBREVIATION_SAS_h,TEAM_ABBREVIATION_TOR_h,TEAM_ABBREVIATION_UTA_h,TEAM_ABBREVIATION_WAS_h
0,22200973,2023-03-05,0.347578,0.290807,0.881429,0.689655,0.454545,0.66,0.28125,0.40,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,22200970,2023-03-05,0.641026,0.583490,0.918571,0.448276,0.545455,0.60,0.40625,0.30,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,22200969,2023-03-05,0.256410,0.474672,0.828571,0.586207,0.477273,0.62,0.46875,0.40,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,22200968,2023-03-05,0.575499,0.515947,0.740000,0.379310,0.340909,0.38,0.37500,0.45,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,22200967,2023-03-05,0.877493,0.823640,0.802857,0.206897,0.500000,0.42,0.53125,0.50,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1030,12200006,2022-10-02,0.509972,0.510319,0.524286,0.448276,0.727273,0.76,0.25000,0.50,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1031,12200005,2022-10-02,0.760684,0.684803,0.714286,0.275862,0.727273,0.66,0.90625,0.20,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1032,12200004,2022-10-02,0.438746,0.575985,0.524286,0.379310,0.454545,0.48,0.53125,0.30,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1033,12200003,2022-10-01,0.358974,0.465291,0.591429,0.586207,0.386364,0.54,0.31250,0.50,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
away_df

Unnamed: 0,GAME_ID,FG_PCT_a,FG3_PCT_a,FT_PCT_a,OREB_a,DREB_a,REB_a,AST_a,STL_a,BLK_a,...,TEAM_ABBREVIATION_OKC_a,TEAM_ABBREVIATION_ORL_a,TEAM_ABBREVIATION_PHI_a,TEAM_ABBREVIATION_PHX_a,TEAM_ABBREVIATION_POR_a,TEAM_ABBREVIATION_SAC_a,TEAM_ABBREVIATION_SAS_a,TEAM_ABBREVIATION_TOR_a,TEAM_ABBREVIATION_UTA_a,TEAM_ABBREVIATION_WAS_a
0,22200973,0.455840,0.649156,0.675714,0.275862,0.500000,0.46,0.56250,0.25,0.315789,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,22200970,0.680912,0.799250,0.618571,0.068966,0.181818,0.06,0.62500,0.65,0.157895,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,22200969,0.544160,0.493433,0.554286,0.379310,0.613636,0.62,0.18750,0.35,0.210526,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,22200968,0.527066,0.363977,0.735714,0.413793,0.250000,0.32,0.56250,0.25,0.421053,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,22200967,0.450142,0.414634,0.455714,0.448276,0.136364,0.24,0.34375,0.25,0.210526,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1030,12200006,0.054131,0.275797,0.411429,0.413793,0.409091,0.46,0.28125,0.55,0.263158,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1031,12200005,0.122507,0.091932,0.490000,0.551724,0.250000,0.40,0.03125,0.65,0.105263,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1032,12200004,0.139601,0.260788,0.857143,0.413793,0.409091,0.46,0.34375,0.45,0.105263,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1033,12200003,0.541311,0.476548,0.605714,0.344828,0.340909,0.36,0.40625,0.65,0.210526,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [41]:
combined_df = pd.merge(home_df, away_df, on='GAME_ID')
combined_df

Unnamed: 0,GAME_ID,GAME_DATE_h,FG_PCT_h,FG3_PCT_h,FT_PCT_h,OREB_h,DREB_h,REB_h,AST_h,STL_h,...,TEAM_ABBREVIATION_OKC_a,TEAM_ABBREVIATION_ORL_a,TEAM_ABBREVIATION_PHI_a,TEAM_ABBREVIATION_PHX_a,TEAM_ABBREVIATION_POR_a,TEAM_ABBREVIATION_SAC_a,TEAM_ABBREVIATION_SAS_a,TEAM_ABBREVIATION_TOR_a,TEAM_ABBREVIATION_UTA_a,TEAM_ABBREVIATION_WAS_a
0,22200973,2023-03-05,0.347578,0.290807,0.881429,0.689655,0.454545,0.66,0.28125,0.40,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,22200970,2023-03-05,0.641026,0.583490,0.918571,0.448276,0.545455,0.60,0.40625,0.30,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,22200969,2023-03-05,0.256410,0.474672,0.828571,0.586207,0.477273,0.62,0.46875,0.40,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,22200968,2023-03-05,0.575499,0.515947,0.740000,0.379310,0.340909,0.38,0.37500,0.45,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,22200967,2023-03-05,0.877493,0.823640,0.802857,0.206897,0.500000,0.42,0.53125,0.50,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1030,12200006,2022-10-02,0.509972,0.510319,0.524286,0.448276,0.727273,0.76,0.25000,0.50,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1031,12200005,2022-10-02,0.760684,0.684803,0.714286,0.275862,0.727273,0.66,0.90625,0.20,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1032,12200004,2022-10-02,0.438746,0.575985,0.524286,0.379310,0.454545,0.48,0.53125,0.30,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1033,12200003,2022-10-01,0.358974,0.465291,0.591429,0.586207,0.386364,0.54,0.31250,0.50,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
combined_df.columns

Index(['GAME_ID', 'GAME_DATE_h', 'FG_PCT_h', 'FG3_PCT_h', 'FT_PCT_h', 'OREB_h',
       'DREB_h', 'REB_h', 'AST_h', 'STL_h', 'BLK_h', 'TOV_h', 'PF_h',
       'PLUS_MINUS_h', 'TEAM_ABBREVIATION_ATL_h', 'TEAM_ABBREVIATION_BKN_h',
       'TEAM_ABBREVIATION_BOS_h', 'TEAM_ABBREVIATION_CHA_h',
       'TEAM_ABBREVIATION_CHI_h', 'TEAM_ABBREVIATION_CLE_h',
       'TEAM_ABBREVIATION_DAL_h', 'TEAM_ABBREVIATION_DEN_h',
       'TEAM_ABBREVIATION_DET_h', 'TEAM_ABBREVIATION_GSW_h',
       'TEAM_ABBREVIATION_HOU_h', 'TEAM_ABBREVIATION_IND_h',
       'TEAM_ABBREVIATION_LAC_h', 'TEAM_ABBREVIATION_LAL_h',
       'TEAM_ABBREVIATION_MEM_h', 'TEAM_ABBREVIATION_MIA_h',
       'TEAM_ABBREVIATION_MIL_h', 'TEAM_ABBREVIATION_MIN_h',
       'TEAM_ABBREVIATION_NOP_h', 'TEAM_ABBREVIATION_NYK_h',
       'TEAM_ABBREVIATION_OKC_h', 'TEAM_ABBREVIATION_ORL_h',
       'TEAM_ABBREVIATION_PHI_h', 'TEAM_ABBREVIATION_PHX_h',
       'TEAM_ABBREVIATION_POR_h', 'TEAM_ABBREVIATION_SAC_h',
       'TEAM_ABBREVIATION_SAS_h', 'TEAM_A

## X, y

In [49]:
X = combined_df.drop(columns=['PLUS_MINUS_h'])
y = combined_df.PLUS_MINUS_h

In [45]:
# X.drop(columns=['SEASON_ID', 'WL', 'MIN', 'MATCHUP', 'PTS', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'FTM', 'FTA', 'FGM', 'FGA', 'FG3M', 'FG3A'], inplace=True)

In [50]:
X.columns

Index(['GAME_ID', 'GAME_DATE_h', 'FG_PCT_h', 'FG3_PCT_h', 'FT_PCT_h', 'OREB_h',
       'DREB_h', 'REB_h', 'AST_h', 'STL_h', 'BLK_h', 'TOV_h', 'PF_h',
       'TEAM_ABBREVIATION_ATL_h', 'TEAM_ABBREVIATION_BKN_h',
       'TEAM_ABBREVIATION_BOS_h', 'TEAM_ABBREVIATION_CHA_h',
       'TEAM_ABBREVIATION_CHI_h', 'TEAM_ABBREVIATION_CLE_h',
       'TEAM_ABBREVIATION_DAL_h', 'TEAM_ABBREVIATION_DEN_h',
       'TEAM_ABBREVIATION_DET_h', 'TEAM_ABBREVIATION_GSW_h',
       'TEAM_ABBREVIATION_HOU_h', 'TEAM_ABBREVIATION_IND_h',
       'TEAM_ABBREVIATION_LAC_h', 'TEAM_ABBREVIATION_LAL_h',
       'TEAM_ABBREVIATION_MEM_h', 'TEAM_ABBREVIATION_MIA_h',
       'TEAM_ABBREVIATION_MIL_h', 'TEAM_ABBREVIATION_MIN_h',
       'TEAM_ABBREVIATION_NOP_h', 'TEAM_ABBREVIATION_NYK_h',
       'TEAM_ABBREVIATION_OKC_h', 'TEAM_ABBREVIATION_ORL_h',
       'TEAM_ABBREVIATION_PHI_h', 'TEAM_ABBREVIATION_PHX_h',
       'TEAM_ABBREVIATION_POR_h', 'TEAM_ABBREVIATION_SAC_h',
       'TEAM_ABBREVIATION_SAS_h', 'TEAM_ABBREVIATION_TOR_

In [57]:
X.to_pickle('X_basic_df_2022-09-01.pkl')

In [58]:
y.to_pickle('y_basic_df_2022-09-01.pkl')