### Import Dependencies

In [1]:
import pandas as pd
import os
import numpy as np
from pprint import pprint
from sqlalchemy import create_engine, inspect
import psycopg2

## Process Stats Data

In [2]:
# Read from CSVs
reg_off_df = pd.read_csv('Data/nba_team_stats_data/nba_off.csv')
reg_def_df = pd.read_csv('Data/nba_team_stats_data/nba_def.csv')
reg_adv_df = pd.read_csv('Data/nba_team_stats_data/nba_adv_stats.csv')
post_off_df = pd.read_csv('Data/nba_team_stats_data/nba_post_off.csv')
post_def_df = pd.read_csv('Data/nba_team_stats_data/nba_post_def.csv')
post_adv_df = pd.read_csv('Data/nba_team_stats_data/nba_post_adv_stats.csv')

In [3]:
# Drop Unused Columns
reg_off_filtered_df = reg_off_df.drop(columns=['Rk'])
reg_def_filtered_df = reg_def_df.drop(columns=['Rk'])
reg_adv_filtered_df = reg_adv_df.drop(columns=['Rk', 'Unnamed: 17', 'Unnamed: 27', 'Arena', 'Attend.'])
post_off_filtered_df = post_off_df.drop(columns=['Rk'])
post_def_filtered_df = post_def_df.drop(columns=['Rk'])
post_adv_filtered_df = post_adv_df.drop(columns=['Rk', 'Unnamed: 15', 'Unnamed: 20'])

In [4]:
# Check column names if so desired

# reg_off_filtered_df.columns
# reg_def_filtered_df.columns
# reg_adv_filtered_df.columns
# post_off_filtered_df.columns
# post_def_filtered_df.columns
# post_adv_filtered_df.columns

### Process Regular Season and Post Season Aggregate Tables

In [5]:
regular_season_df = pd.read_csv('Data/nba_team_stats_data/nba_reg_compiled_stats.csv')
regular_season_df = regular_season_df.drop(['Unnamed: 0'],axis=1)
regular_season_df.columns = regular_season_df.columns.str.lower()

regular_season_df = regular_season_df.rename(columns = {'off_fg%':'off_fg_percent','off_3p%':'off_3p_percent','off_2p%':'off_2p_percent','off_ft%':'off_ft_percent',
                                     'def_fg%':'def_fg_percent','def_3p%':'def_3p_percent','def_2p%':'def_2p_percent',
                                    'def_ft%':'def_ft_percent','w/l%':'w_l_percent','ts%':'ts_percent','off_efg%':'off_efg_percent',
                                    'off_tov%':'off_tov_percent','off_orb%':'off_orb_percent','off_ft/fga':'off_ft_fga','def_efg%':'def_efg_percent',
                                    'def_tov%':'def_tov_percent','def_drb%':'def_drb_percent','def_ft/fga':'def_ft_fga', '3par':'three_par',
                                    'attend./g':'attend_g'})

# regular_season_df.columns

In [6]:
post_season_df = pd.read_csv('Data/nba_team_stats_data/nba_post_compiled_stats.csv')
post_season_df = post_season_df.drop(['Unnamed: 0'],axis=1)
post_season_df.columns = post_season_df.columns.str.lower()

post_season_df = post_season_df.rename(columns = {'off_fg%':'off_fg_percent','off_3p%':'off_3p_percent','off_2p%':'off_2p_percent','off_ft%':'off_ft_percent',
                                     'def_fg%':'def_fg_percent','def_3p%':'def_3p_percent','def_2p%':'def_2p_percent',
                                    'def_ft%':'def_ft_percent','w/l%':'w_l_percent','ts%':'ts_percent','off_efg%':'off_efg_percent',
                                    'off_tov%':'off_tov_percent','off_orb%':'off_orb_percent','off_ft/fga':'off_ft_fga','def_efg%':'def_efg_percent',
                                    'def_tov%':'def_tov_percent','def_drb%':'def_drb_percent','def_ft/fga':'def_ft_fga','3par':'three_par',
                                    'attend./g':'attend_g'})
# post_season_df.columns

### Create Table for Team Names and Abbreviations

In [7]:
teams_df = post_season_df[['team']]
teams_df = teams_df.rename(columns = {'team':'team_name'})
teams_df.head()

Unnamed: 0,team_name
0,Golden State Warriors
1,Memphis Grizzlies
2,Denver Nuggets
3,New Orleans Pelicans
4,Minnesota Timberwolves


In [8]:
team_names_series = regular_season_df["team"]

team_abbv = ["MIN", "MEM", "MIL", "CHA", "PHX", "ATL", "UTA", "SAS", "BKN", "DEN", "LAL", "BOS", "CHI", "IND", "GSW", "SAC", "MIA", "PHI", "HOU", "TOR", "NOP", "WAS", "LAC", "DAL", "CLE", "NYK", "POR", "DET", "ORL", "OKC"]
team_abbv_series = pd.Series(team_abbv)

frame = { 'team_name': team_names_series, 'abbreviation': team_abbv_series }

name_abbv = pd.DataFrame(frame)

name_abbv

Unnamed: 0,team_name,abbreviation
0,Minnesota Timberwolves,MIN
1,Memphis Grizzlies,MEM
2,Milwaukee Bucks,MIL
3,Charlotte Hornets,CHA
4,Phoenix Suns,PHX
5,Atlanta Hawks,ATL
6,Utah Jazz,UTA
7,San Antonio Spurs,SAS
8,Brooklyn Nets,BKN
9,Denver Nuggets,DEN


### Process Regular Season Match Data Scraped from NBA Website

In [9]:
df = pd.read_csv('Data/Scraped/NBA_Data_Reg_Season_2022.csv')
df = df.drop(['Unnamed: 0'],axis=1)
df.head()

Unnamed: 0,team,min,pts,fgm,fga,threepm,threepa,ftm,fta,oreb,dreb,reb,ast,stl,blk,tov,pf,point_diff
0,BOS,240,139,54,99,18,48,13,13,14,42,56,34,5,2,15,20,29
1,CLE,240,133,51,94,19,38,12,17,10,38,48,39,5,5,12,26,18
2,MIL,240,115,39,88,12,30,25,32,8,33,41,27,7,2,12,14,-18
3,ATL,240,130,45,87,21,38,19,24,13,37,50,29,4,4,13,19,16
4,WAS,240,108,42,94,8,23,16,26,17,26,43,25,12,3,11,16,-16


In [10]:
# Determine if game is win or loss
def winloss(row):
    if row['point_diff'] < 0:
        return 'L'
    return 'W'

In [11]:
# Determine if game is away or home
def type(row):
    if row['new'] % 2 == 0:
        return 'vs' #Home
    return '@' # Away

In [12]:
# def team(row):
#     if row['new'] % 2 == 0:
#         return row['new'] - 1
#     return row['new'] + 1

In [13]:
# Add win/loss ('w_l') column
df['w_l'] = df.apply(lambda row: winloss(row), axis=1)

# Move w_l to location that matches other tables
column_to_reorder = df.pop('w_l')
df.insert(1, 'w_l', column_to_reorder)

In [14]:
# Add game type (away or home) column
df['new'] = df.index
df['type'] = df.apply(lambda row: type(row), axis=1)

# Move game type to location that matches other tables
column_to_reorder = df.pop('type')
df.insert(1, 'type', column_to_reorder)

### Process Match Data

In [15]:
regular_matches_df = pd.read_csv('Data/NBA_Data_Reg_Season_2022_plus.csv')
regular_matches_df = regular_matches_df.drop(['Unnamed: 0'],axis=1)
regular_matches_df.head()

Unnamed: 0,team,type,opponent,w_l,min,pts,fgm,fga,threepm,threepa,...,fta,oreb,dreb,reb,ast,stl,blk,tov,pf,point_diff
0,MIN,vs,CHI,L,240,120,46,91,11,31,...,22,9,23,32,30,7,9,13,23,-4
1,CHI,@,MIN,W,240,124,44,83,10,21,...,33,16,32,48,22,9,3,23,22,4
2,PHI,vs,DET,W,240,118,46,88,5,25,...,23,10,32,42,25,13,6,11,23,12
3,DET,@,PHI,L,240,106,38,83,11,34,...,29,15,27,42,26,4,4,20,16,-12
4,MEM,vs,BOS,L,240,110,39,102,15,47,...,27,19,26,45,27,11,6,10,16,-29


In [16]:
post_matches_df = pd.read_csv('Data/NBA_Data_Playoffs_2022_plus.csv')
post_matches_df = post_matches_df.drop(['Unnamed: 0'],axis=1)
# post_matches_df.head()

# Create Engine and Send Tables to SQL

In [17]:
rds_connection_string = "postgres:postgres@localhost:5432/group_2_project_4"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [18]:
insp = inspect(engine)
insp.get_table_names()

['matches']

In [19]:
name_abbv.to_sql(name='teams', con=engine, if_exists='append', index=False)

30

In [20]:
pd.read_sql_query('select * from teams', con=engine)

Unnamed: 0,team_name,abbreviation
0,Minnesota Timberwolves,MIN
1,Memphis Grizzlies,MEM
2,Milwaukee Bucks,MIL
3,Charlotte Hornets,CHA
4,Phoenix Suns,PHX
5,Atlanta Hawks,ATL
6,Utah Jazz,UTA
7,San Antonio Spurs,SAS
8,Brooklyn Nets,BKN
9,Denver Nuggets,DEN


In [21]:
regular_season_df.to_sql(name='reg_season_stats', con=engine, if_exists='append', index=False)

Unnamed: 0,team,mp,off_fg,off_fga,off_fg_percent,off_3p,off_3pa,off_3p_percent,off_2p,off_2pa,...,ts_percent,off_efg_percent,off_tov_percent,off_orb_percent,off_ft_fga,def_efg_percent,def_tov_percent,def_drb_percent,def_ft_fga,attend_g
0,Minnesota Timberwolves,241.2,41.6,91.0,0.457,14.8,41.3,0.358,26.8,49.7,...,0.573,0.539,12.4,24.4,0.198,0.535,14.2,74.9,0.227,16028
1,Memphis Grizzlies,241.2,43.5,94.4,0.461,11.5,32.7,0.353,32.0,61.7,...,0.553,0.522,11.2,30.0,0.18,0.523,13.3,77.8,0.195,15775
2,Milwaukee Bucks,240.9,41.8,89.4,0.468,14.1,38.4,0.366,27.8,51.0,...,0.58,0.546,11.9,23.0,0.199,0.536,11.6,78.6,0.165,17453
3,Charlotte Hornets,242.4,42.8,91.4,0.468,13.9,38.2,0.365,28.8,53.3,...,0.572,0.544,11.6,23.3,0.173,0.544,13.1,74.8,0.187,17092
4,Phoenix Suns,240.6,43.7,90.1,0.485,11.6,31.9,0.364,32.1,58.2,...,0.581,0.549,11.6,22.3,0.176,0.51,13.0,77.1,0.195,16175
5,Atlanta Hawks,240.3,41.5,88.3,0.47,12.9,34.4,0.374,28.6,53.9,...,0.581,0.543,10.8,23.0,0.205,0.543,11.5,76.9,0.177,16408
6,Utah Jazz,240.6,40.6,86.2,0.471,14.5,40.3,0.36,26.0,45.8,...,0.589,0.555,12.7,25.4,0.208,0.521,10.9,78.3,0.164,18306
7,San Antonio Spurs,241.5,43.2,92.7,0.467,11.3,32.0,0.352,32.0,60.7,...,0.556,0.527,11.1,23.7,0.166,0.532,12.1,75.1,0.176,15014
8,Brooklyn Nets,240.9,42.0,88.4,0.475,11.5,31.7,0.361,30.5,56.7,...,0.576,0.54,12.5,23.9,0.198,0.521,11.7,75.1,0.201,17355
9,Denver Nuggets,241.5,41.7,86.3,0.483,12.7,35.9,0.353,29.0,50.4,...,0.59,0.556,13.2,21.9,0.194,0.537,11.7,78.3,0.188,16958


In [22]:
pd.read_sql_query('select * from reg_season_stats', con=engine).head()

Unnamed: 0,team,mp,off_fg,off_fga,off_fg_percent,off_3p,off_3pa,off_3p_percent,off_2p,off_2pa,...,ts_percent,off_efg_percent,off_tov_percent,off_orb_percent,off_ft_fga,def_efg_percent,def_tov_percent,def_drb_percent,def_ft_fga,attend_g
0,Minnesota Timberwolves,241.2,41.6,91.0,0.457,14.8,41.3,0.358,26.8,49.7,...,0.573,0.539,12.4,24.4,0.198,0.535,14.2,74.9,0.227,16028
1,Memphis Grizzlies,241.2,43.5,94.4,0.461,11.5,32.7,0.353,32.0,61.7,...,0.553,0.522,11.2,30.0,0.18,0.523,13.3,77.8,0.195,15775
2,Milwaukee Bucks,240.9,41.8,89.4,0.468,14.1,38.4,0.366,27.8,51.0,...,0.58,0.546,11.9,23.0,0.199,0.536,11.6,78.6,0.165,17453
3,Charlotte Hornets,242.4,42.8,91.4,0.468,13.9,38.2,0.365,28.8,53.3,...,0.572,0.544,11.6,23.3,0.173,0.544,13.1,74.8,0.187,17092
4,Phoenix Suns,240.6,43.7,90.1,0.485,11.6,31.9,0.364,32.1,58.2,...,0.581,0.549,11.6,22.3,0.176,0.51,13.0,77.1,0.195,16175


In [23]:
post_season_df.to_sql(name='post_season_stats', con=engine, if_exists='append', index=False)

16

In [24]:
pd.read_sql_query('select * from post_season_stats', con=engine).head()

Unnamed: 0,team,mp,off_fg,off_fga,off_fg_percent,off_3p,off_3pa,off_3p_percent,off_2p,off_2pa,...,three_par,ts_percent,off_efg_percent,off_tov_percent,off_orb_percent,off_ft_fga,def_efg_percent,def_tov_percent,def_drb_percent,def_ft_fga
0,Golden State Warriors,240.0,42.1,85.9,0.49,13.8,36.2,0.382,28.3,49.7,...,0.421,0.599,0.571,13.4,24.2,0.187,0.529,11.6,77.8,0.211
1,Memphis Grizzlies,240.0,39.8,91.3,0.435,13.1,35.8,0.365,26.7,55.5,...,0.392,0.545,0.507,11.9,25.8,0.218,0.53,15.6,77.7,0.211
2,Denver Nuggets,240.0,39.4,82.8,0.476,11.2,31.4,0.357,28.2,51.4,...,0.379,0.586,0.543,14.9,28.0,0.242,0.608,12.2,81.4,0.23
3,New Orleans Pelicans,240.0,39.0,84.3,0.462,9.3,26.3,0.354,29.7,58.0,...,0.312,0.566,0.518,13.0,35.0,0.267,0.575,10.7,80.3,0.193
4,Minnesota Timberwolves,240.0,36.3,82.0,0.443,13.8,35.7,0.388,22.5,46.3,...,0.435,0.579,0.527,15.8,17.1,0.276,0.519,13.6,72.5,0.268


In [25]:
regular_matches_df.to_sql(name='reg_season_matches', con=engine, if_exists='append', index=False)

460

In [26]:
pd.read_sql_query('select * from reg_season_matches', con=engine).head()

Unnamed: 0,team,type,opponent,w_l,min,pts,fgm,fga,threepm,threepa,...,fta,oreb,dreb,reb,ast,stl,blk,tov,pf,point_diff
0,MIN,vs,CHI,L,240,120,46,91,11,31,...,22,9,23,32,30,7,9,13,23,-4
1,CHI,@,MIN,W,240,124,44,83,10,21,...,33,16,32,48,22,9,3,23,22,4
2,PHI,vs,DET,W,240,118,46,88,5,25,...,23,10,32,42,25,13,6,11,23,12
3,DET,@,PHI,L,240,106,38,83,11,34,...,29,15,27,42,26,4,4,20,16,-12
4,MEM,vs,BOS,L,240,110,39,102,15,47,...,27,19,26,45,27,11,6,10,16,-29


In [27]:
post_matches_df.to_sql(name='post_season_matches', con=engine, if_exists='append', index=False)

178

In [28]:
pd.read_sql_query('select * from post_season_matches', con=engine).head()

Unnamed: 0,team,type,opponent,w_l,min,pts,fgm,fga,threepm,threepa,...,fta,oreb,dreb,reb,ast,stl,blk,tov,pf,point_diff
0,GSW,vs,BOS,W,240,107,39,86,15,37,...,20,6,36,42,25,15,2,12,17,19
1,BOS,@,GSW,L,240,88,30,80,15,37,...,17,6,37,43,24,5,7,19,18,-19
2,GSW,vs,BOS,L,240,108,39,88,19,45,...,15,12,27,39,24,8,6,14,16,-12
3,BOS,@,GSW,W,240,120,43,85,21,41,...,16,7,32,39,33,7,6,13,13,12
4,MIA,vs,BOS,L,240,96,37,88,6,30,...,24,7,37,44,16,4,5,10,18,-4
