In [1]:
import pandas as pd

In [2]:
# Player df
playerdf = pd.read_csv("./data/playerTradStats.csv")

acronyms = pd.read_csv("./data/abbreviations.csv", sep = "\t").rename({"Acronym": "TEAM"}, axis = 1)

name_dict = {"CHH":"CHA", "NOH": "NOP", "NJN": "BKN", "NOK": "NOP", "SEA": "OKC", "VAN": "MEM"}
def rename_teams(old_name, dict):
    if old_name in name_dict.keys():
        return name_dict[old_name]
    else:
        return old_name

playerdf.TEAM = playerdf.TEAM.apply(lambda x: rename_teams(x, name_dict))

# NBA statistics df
teamAdvdf = pd.read_csv("./data/teamadvancedStats.csv").set_index(["TEAM", "Season"])
teamOppdf = pd.read_csv("./data/teamOpponentStats.csv").set_index(["TEAM", "Season"])
teamTraddf = pd.read_csv("./data/teamTradStats.csv").set_index(["TEAM", "Season"])

# Age df
teamAgedf = playerdf[["TEAM", "Season", "AGE"]].groupby(["Season", "TEAM"]).mean()
teamAgedf = pd.merge(teamAgedf.reset_index(), acronyms).drop("TEAM", axis = 1).rename({"Team": "TEAM", "AGE": "AVG_PLAYER_AGE"}, axis = 1).set_index(["TEAM", "Season"])

del playerdf

In [3]:
# Awards variables
teamAwardsdf = pd.read_csv("./data/awards.csv").drop(["Player", "Position"], axis = 1).rename({"Team": "TEAM"}, axis = 1).sort_values(["TEAM", "Season"])

teamAwardsdf["Season"] = teamAwardsdf.Season.str.split("-").str[0]
teamAwardsdf.Season = pd.to_numeric(teamAwardsdf.Season)
teamAwardsdf = teamAwardsdf[teamAwardsdf.Season.between(1996, 2022)]
name_dict = {"Charlotte Bobcats":"Charlotte Hornets", "LA Clippers": "Los Angeles Clippers", "New Jersey Nets": "Brooklyn Nets", "New Orleans Hornets": "New Orleans Pelicans", "New Orleans/Oklahoma City Hornets": "New Orleans Pelicans", "Seattle SuperSonics": "Oklahoma City Thunder", "Vancouver Grizzlies": "Memphis Grizzlies", "Washington Bullets": "Washington Wizards", "Dallas Mavericks)" : "Dallas Mavericks", "MIami Heat": "Miami Heat", "Seattle SuperSonic": "Oklahoma City Thunder", "Atlanta Hawks/Philadelphia 76ers" : "Atlanta Hawks"}
teamAwardsdf.TEAM = teamAwardsdf.TEAM.apply(lambda x: rename_teams(x, name_dict))
teamAwardsdf.TEAM = pd.Categorical(teamAwardsdf.TEAM.str.strip())

no_of_awards_won = teamAwardsdf.groupby(["TEAM", "Season"],  observed=False).count().rename({"Award": "N_Awards_Won"}, axis = 1)
no_of_awards_won["awardDummy"] = (no_of_awards_won != 0).values.flatten().astype(int)

In [4]:
# Coach df
coaches = pd.read_csv("./data/coachdf.csv").iloc[:,1:]

name_dict = {"CHH":"CHA", "NOH": "NOP", "NJN": "BKN", "NOK": "NOP", "SEA": "OKC", "VAN": "MEM", "PHO": "PHX", "WSB": "WAS", "BRK": "BKN", "CHO": "CHA"}
coaches.TEAM = coaches.TEAM.apply(lambda x: rename_teams(x, name_dict))
coaches = pd.merge(coaches, acronyms, on = "TEAM").drop("TEAM", axis = 1).rename({"Team": "TEAM"}, axis = 1).set_index(["TEAM", "Season"])

In [8]:
# Join the NBA statistics df
variables_of_interest = ["W", "FGA", "3PA", "FTA", "OREB", "DREB", "AST", "TOV", "STL", "BLK", "BLKA", "PF", "PFD", "PACE", "POSS", "OPP_FGA", "OPP_3PA", "OPP_FTA", "OPP_OREB", "OPP_DREB", "OPP_AST", "OPP_TOV", "OPP_STL", "OPP_BLK", "OPP_BLKA"]

advCols = teamAdvdf.columns.difference(teamTraddf.columns)
finaldf = teamTraddf.join(teamAdvdf[advCols])
oppCols = teamOppdf.columns.difference(finaldf.columns)

finaldf = finaldf.join(teamOppdf[oppCols])
finaldf.columns = finaldf.columns.str.replace("\n", "_")
finaldf = finaldf[variables_of_interest].reset_index()

name_dict = {"Charlotte Bobcats":"Charlotte Hornets", "LA Clippers": "Los Angeles Clippers", "New Jersey Nets": "Brooklyn Nets", "New Orleans Hornets": "New Orleans Pelicans", "New Orleans/Oklahoma City Hornets": "New Orleans Pelicans", "Seattle SuperSonics": "Oklahoma City Thunder", "Vancouver Grizzlies": "Memphis Grizzlies", "Washington Bullets": "Washington Wizards"}
finaldf.TEAM = finaldf.TEAM.apply(lambda x: rename_teams(x, name_dict))

finaldf.TEAM = pd.Categorical(finaldf.TEAM)
finaldf.Season = pd.to_numeric(finaldf.Season)
finaldf = finaldf[finaldf.Season < 2023]
finaldf = finaldf.set_index(["TEAM", "Season"]).sort_index()

finaldf = finaldf.join(teamAgedf)
finaldf = finaldf.join(no_of_awards_won)
finaldf = finaldf.join(coaches)

finaldf.POSS = pd.to_numeric(finaldf.POSS.str.replace(",", ""), errors = "coerce")
finaldf["2PA"] = finaldf.FGA - finaldf["3PA"]
finaldf["OPP_2PA"] = finaldf.OPP_FGA - finaldf["OPP_3PA"]

finaldf.P_G = finaldf.P_G.fillna(0)
finaldf.to_csv("./data/finaldf.csv")

In [609]:
finaldf.columns

Index(['W', 'FGA', '3PA', 'FTA', 'OREB', 'DREB', 'AST', 'TOV', 'STL', 'BLK',
       'BLKA', 'PF', 'PFD', 'PACE', 'POSS', 'OPP_FGA', 'OPP_3PA', 'OPP_FTA',
       'OPP_OREB', 'OPP_DREB', 'OPP_AST', 'OPP_TOV', 'OPP_STL', 'OPP_BLK',
       'OPP_BLKA', 'AVG_PLAYER_AGE', 'N_Awards_Won', 'awardDummy', 'Coach',
       'N_Seasons_TEAM', 'N_Seasons_Overall', 'RS_G', 'RS_W_Perc', 'P_G',
       'P_W_Perc', '2PA', 'OPP_2PA'],
      dtype='object')

In [612]:
import linearmodels as lm
exog = ['3PA', '2PA', 'FTA', 'OREB', 'DREB', 'AST', 'TOV', 'STL', 'BLK',
       'BLKA', 'PF', 'PFD', 'PACE', 'POSS',
       'OPP_2PA', 'OPP_3PA', 'OPP_FTA', 'OPP_OREB', 'OPP_DREB', 'OPP_AST',
       'OPP_TOV', 'OPP_STL', 'OPP_BLK', 'OPP_BLKA', 'AVG_PLAYER_AGE', 
       'N_Seasons_TEAM', 'N_Seasons_Overall', 'RS_G', 'RS_W_Perc', 'P_G',
       'P_W_Perc']

lm.PanelOLS(finaldf["W"], finaldf[exog], entity_effects=True, time_effects=True).fit()

  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  mu = self._frame.groupby(level=level).mean()
  mu = self._frame.groupby(level=level).mean()
  out = self._frame.groupby(level=level).count()
  mu = self._frame.groupby(level=level).mean()
  group_mu = self._frame.groupby(level=level).transform("mean")
  mu = self._frame.groupby(level=level).mean()
  mu = self._frame.groupby(level=level).mean()
  group_mu = self._frame.groupby(level=level).transform("mean")


0,1,2,3
Dep. Variable:,W,R-squared:,0.9176
Estimator:,PanelOLS,R-squared (Between):,-0.1360
No. Observations:,913,R-squared (Within):,0.8929
Date:,"Tue, Mar 19 2024",R-squared (Overall):,-0.0561
Time:,23:38:03,Log-likelihood,-2387.4
Cov. Estimator:,Unadjusted,,
,,F-statistic:,296.91
Entities:,30,P-value,0.0000
Avg Obs:,30.433,Distribution:,"F(31,826)"
Min Obs:,22.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
3PA,0.0017,0.0054,0.3109,0.7560,-0.0089,0.0123
2PA,-0.0074,0.0054,-1.3746,0.1696,-0.0181,0.0032
FTA,0.0013,0.0027,0.4859,0.6271,-0.0041,0.0067
OREB,-0.0005,0.0063,-0.0841,0.9330,-0.0129,0.0118
DREB,0.0524,0.0024,21.814,0.0000,0.0477,0.0571
AST,0.0050,0.0013,3.9095,0.0001,0.0025,0.0075
TOV,-0.0662,0.0061,-10.904,0.0000,-0.0782,-0.0543
STL,0.0053,0.0044,1.1950,0.2324,-0.0034,0.0140
BLK,0.5427,3.8810,0.1398,0.8888,-7.0750,8.1605
