In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [6]:
gsw_df = pd.read_csv('data/GSW_2023-24.txt')

In [8]:
gsw_df

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Score,Score.1,Score.2,Score.3,Team,...,Opponent.11,Opponent.12,Opponent.13,Opponent.14,Opponent.15,Opponent.16,Opponent.17,Opponent.18,Opponent.19,Opponent.20
0,Rk,Gtm,Date,,Opp,Rslt,Tm,Opp,OT,FG,...,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF
1,1,1,2023-10-24,,PHO,L,104,108,,36,...,17,.765,17,43,60,23,5,7,19,22
2,2,2,2023-10-27,@,SAC,W,122,114,,48,...,29,.690,10,31,41,24,10,3,14,16
3,3,3,2023-10-29,@,HOU,W,106,95,,35,...,19,.947,10,32,42,22,7,4,7,20
4,4,4,2023-10-30,@,NOP,W,130,102,,51,...,26,.615,14,25,39,23,8,3,11,18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,79,79,2024-04-09,@,LAL,W,134,120,,47,...,24,.833,10,30,40,31,8,3,8,14
80,80,80,2024-04-11,@,POR,W,100,92,,36,...,17,.824,20,29,49,22,9,3,13,18
81,81,81,2024-04-12,,NOP,L,109,114,,40,...,16,.625,5,33,38,24,11,5,9,13
82,82,82,2024-04-14,,UTA,W,123,116,,45,...,16,.625,14,34,48,22,6,5,20,20


In [10]:
gsw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84 entries, 0 to 83
Data columns (total 51 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Unnamed: 0   83 non-null     object
 1   Unnamed: 1   83 non-null     object
 2   Unnamed: 2   83 non-null     object
 3   Unnamed: 3   41 non-null     object
 4   Unnamed: 4   83 non-null     object
 5   Score        84 non-null     object
 6   Score.1      84 non-null     object
 7   Score.2      84 non-null     object
 8   Score.3      6 non-null      object
 9   Team         84 non-null     object
 10  Team.1       84 non-null     object
 11  Team.2       84 non-null     object
 12  Team.3       84 non-null     object
 13  Team.4       84 non-null     object
 14  Team.5       84 non-null     object
 15  Team.6       84 non-null     object
 16  Team.7       84 non-null     object
 17  Team.8       84 non-null     object
 18  Team.9       84 non-null     object
 19  Team.10      84 non-null     ob

### Glossary For Me
- First two columns, `rank` and `Gtm` are matches played (should be 82 matches in the season) + a potential 22 games in the playoffs. GSW did not qual in the 2023-24 season
- `@` symbol indicates that GSW played at the opponent's home court
- `FG` = Field Goals
- `FGA` = Field Goals Attempt
- `FG%` = Field Goals Percentage (FG/FGA)
- `eFG%` = Effective Field Goal Percentage (This statistic adjusts for the fact that a 3-point field goal is worth one more point than a 2-point field goal.)
- `ORB` = Offensive Rebounds
- `DRB` = Defensive Robounds
- `TRB` = Total Rebounds
- `PF` = Personal Fouls

NOTE: All columns are strings/characters

### Predicting Golden State Warriors Game Outcomes (2023–24)
This notebook uses post-game statistics to predict whether the Golden State Warriors (GSW) won a game during the 2023–24 NBA season. The goal is to build a simple classification model that learns from historical performance.

In [14]:
col_to_drop = ['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2']

gsw_df = gsw_df.drop(columns = col_to_drop)

gsw_df.drop(index = [0, 83], inplace = True)

In [16]:
print(gsw_df.columns)

Index(['Unnamed: 3', 'Unnamed: 4', 'Score', 'Score.1', 'Score.2', 'Score.3',
       'Team', 'Team.1', 'Team.2', 'Team.3', 'Team.4', 'Team.5', 'Team.6',
       'Team.7', 'Team.8', 'Team.9', 'Team.10', 'Team.11', 'Team.12',
       'Team.13', 'Team.14', 'Team.15', 'Team.16', 'Team.17', 'Team.18',
       'Team.19', 'Team.20', 'Opponent', 'Opponent.1', 'Opponent.2',
       'Opponent.3', 'Opponent.4', 'Opponent.5', 'Opponent.6', 'Opponent.7',
       'Opponent.8', 'Opponent.9', 'Opponent.10', 'Opponent.11', 'Opponent.12',
       'Opponent.13', 'Opponent.14', 'Opponent.15', 'Opponent.16',
       'Opponent.17', 'Opponent.18', 'Opponent.19', 'Opponent.20'],
      dtype='object')


In [18]:
# Renaming columns

rename_dict = {
    'Unnamed: 3': 'location',
    'Unnamed: 4': 'opponent',
    'Score': 'result',
    'Score.1': 'gsw_total_pts',
    'Score.2': 'opp_total_pts',
    'Score.3': 'OT',
    'Team': 'FG_gsw',
    'Team.1': 'FGA_gsw',
    'Team.2': 'FG%_gsw',
    'Team.3': '3P_gsw',
    'Team.4': '3PA_gsw',
    'Team.5': '3P%_gsw',
    'Team.6': '2P_gsw',
    'Team.7': '2PA_gsw',
    'Team.8': '2P%_gsw',
    'Team.9': 'eFG%_gsw',
    'Team.10': 'FT_gsw',
    'Team.11': 'FTA_gsw',
    'Team.12': 'FT%_gsw',
    'Team.13': 'ORB_gsw',
    'Team.14': 'DRB_gsw',
    'Team.15': 'TRB_gsw',
    'Team.16': 'AST_gsw',
    'Team.17': 'STL_gsw',
    'Team.18': 'BLK_gsw',
    'Team.19': 'TOV_gsw',
    'Team.20': 'PF_gsw',
    'Opponent': 'FG_opp',
    'Opponent.1': 'FGA_opp',
    'Opponent.2': 'FG%_opp',
    'Opponent.3': '3P_opp',
    'Opponent.4': '3PA_opp',
    'Opponent.5': '3P%_opp',
    'Opponent.6': '2P_opp',
    'Opponent.7': '2PA_opp',
    'Opponent.8': '2P%_opp',
    'Opponent.9': 'eFG%_opp',
    'Opponent.10': 'FT_opp',
    'Opponent.11': 'FTA_opp',
    'Opponent.12': 'FT%_opp',
    'Opponent.13': 'ORB_opp',
    'Opponent.14': 'DRB_opp',
    'Opponent.15': 'TRB_opp',
    'Opponent.16': 'AST_opp',
    'Opponent.17': 'STL_opp',
    'Opponent.18': 'BLK_opp',
    'Opponent.19': 'TOV_opp',
    'Opponent.20': 'PF_opp'
}

In [20]:
gsw_df = gsw_df.rename(columns = rename_dict)

In [22]:
# Recoding values (Location)
for i in range(gsw_df.shape[0]):
    if gsw_df.iloc[i, 0] == '@':
        gsw_df.iloc[i, 0] = gsw_df.iloc[i, 0].replace('@', 'away')

gsw_df['location'] = gsw_df['location'].fillna('home')

In [24]:
gsw_df

Unnamed: 0,location,opponent,result,gsw_total_pts,opp_total_pts,OT,FG_gsw,FGA_gsw,FG%_gsw,3P_gsw,...,FTA_opp,FT%_opp,ORB_opp,DRB_opp,TRB_opp,AST_opp,STL_opp,BLK_opp,TOV_opp,PF_opp
1,home,PHO,L,104,108,,36,101,.356,10,...,17,.765,17,43,60,23,5,7,19,22
2,away,SAC,W,122,114,,48,87,.552,14,...,29,.690,10,31,41,24,10,3,14,16
3,away,HOU,W,106,95,,35,81,.432,18,...,19,.947,10,32,42,22,7,4,7,20
4,away,NOP,W,130,102,,51,98,.520,15,...,26,.615,14,25,39,23,8,3,11,18
5,home,SAC,W,102,101,,39,81,.481,11,...,23,.783,14,34,48,24,8,4,15,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,home,UTA,W,118,110,,49,92,.533,14,...,21,.810,6,23,29,21,4,6,11,9
79,away,LAL,W,134,120,,47,80,.588,26,...,24,.833,10,30,40,31,8,3,8,14
80,away,POR,W,100,92,,36,77,.468,12,...,17,.824,20,29,49,22,9,3,13,18
81,home,NOP,L,109,114,,40,86,.465,13,...,16,.625,5,33,38,24,11,5,9,13


In [26]:
gsw_df['OT'].unique()

array([nan, 'OT', '2OT'], dtype=object)

In [28]:
# Recoding values (OT)

gsw_df['OT'] = gsw_df['OT'].fillna('no_OT')

In [30]:
gsw_df.iloc[:, [8, 11, 14, 15, 18, 29, 32, 35, 36, 39]] = gsw_df.iloc[:, [8, 11, 14, 15, 18, 29, 32, 35, 36, 39]].astype(float)

In [32]:
non_float = [0, 1, 2, 5, 8, 11, 14, 15, 18, 29, 32, 35, 36, 39]

gsw_df.iloc[:, [i for i in range(gsw_df.shape[1]) if i not in non_float]] = gsw_df.iloc[:, [i for i in range(gsw_df.shape[1]) if i not in non_float]].astype(int)

In [34]:
gsw_cleaned = gsw_df.copy()

In [36]:
gsw_cleaned

Unnamed: 0,location,opponent,result,gsw_total_pts,opp_total_pts,OT,FG_gsw,FGA_gsw,FG%_gsw,3P_gsw,...,FTA_opp,FT%_opp,ORB_opp,DRB_opp,TRB_opp,AST_opp,STL_opp,BLK_opp,TOV_opp,PF_opp
1,home,PHO,L,104,108,no_OT,36,101,0.356,10,...,17,0.765,17,43,60,23,5,7,19,22
2,away,SAC,W,122,114,no_OT,48,87,0.552,14,...,29,0.69,10,31,41,24,10,3,14,16
3,away,HOU,W,106,95,no_OT,35,81,0.432,18,...,19,0.947,10,32,42,22,7,4,7,20
4,away,NOP,W,130,102,no_OT,51,98,0.52,15,...,26,0.615,14,25,39,23,8,3,11,18
5,home,SAC,W,102,101,no_OT,39,81,0.481,11,...,23,0.783,14,34,48,24,8,4,15,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,home,UTA,W,118,110,no_OT,49,92,0.533,14,...,21,0.81,6,23,29,21,4,6,11,9
79,away,LAL,W,134,120,no_OT,47,80,0.588,26,...,24,0.833,10,30,40,31,8,3,8,14
80,away,POR,W,100,92,no_OT,36,77,0.468,12,...,17,0.824,20,29,49,22,9,3,13,18
81,home,NOP,L,109,114,no_OT,40,86,0.465,13,...,16,0.625,5,33,38,24,11,5,9,13


In [None]:
gsw_cleaned.to_csv('GSW_2023-23_cleaned.csv', index = False)