In [148]:
import pandas as pd

In [149]:
# Player df
playerdf = pd.read_csv("./data/playerTradStats.csv")

acronyms = pd.read_csv("./data/abbreviations.csv", sep = "\t").rename({"Acronym": "TEAM"}, axis = 1)

name_dict = {"CHH":"CHA", "NOH": "NOP", "NJN": "BKN", "NOK": "NOP", "SEA": "OKC", "VAN": "MEM"}
def rename_teams(old_name, dict):
    if old_name in name_dict.keys():
        return name_dict[old_name]
    else:
        return old_name

playerdf.TEAM = playerdf.TEAM.apply(lambda x: rename_teams(x, name_dict))

# NBA statistics df
teamAdvdf = pd.read_csv("./data/teamadvancedStats.csv").set_index(["TEAM", "Season"])
teamOppdf = pd.read_csv("./data/teamOpponentStats.csv").set_index(["TEAM", "Season"])
teamTraddf = pd.read_csv("./data/teamTradStats.csv").set_index(["TEAM", "Season"])

# Age df
teamAgedf = playerdf[["TEAM", "Season", "AGE"]].groupby(["Season", "TEAM"]).mean()
teamAgedf = pd.merge(teamAgedf.reset_index(), acronyms).drop("TEAM", axis = 1).rename({"Team": "TEAM", "AGE": "AVG_PLAYER_AGE"}, axis = 1).set_index(["TEAM", "Season"])

del playerdf

In [150]:
# Awards variables
teamAwardsdf = pd.read_csv("./data/awards.csv").drop(["Player", "Position"], axis = 1).rename({"Team": "TEAM"}, axis = 1).sort_values(["TEAM", "Season"])

teamAwardsdf["Season"] = teamAwardsdf.Season.str.split("-").str[0]
teamAwardsdf.Season = pd.to_numeric(teamAwardsdf.Season)
teamAwardsdf = teamAwardsdf[teamAwardsdf.Season.between(1996, 2022)]
name_dict = {"Charlotte Bobcats":"Charlotte Hornets", "LA Clippers": "Los Angeles Clippers", "New Jersey Nets": "Brooklyn Nets", "New Orleans Hornets": "New Orleans Pelicans", "New Orleans/Oklahoma City Hornets": "New Orleans Pelicans", "Seattle SuperSonics": "Oklahoma City Thunder", "Vancouver Grizzlies": "Memphis Grizzlies", "Washington Bullets": "Washington Wizards", "Dallas Mavericks)" : "Dallas Mavericks", "MIami Heat": "Miami Heat", "Seattle SuperSonic": "Oklahoma City Thunder", "Atlanta Hawks/Philadelphia 76ers" : "Atlanta Hawks"}
teamAwardsdf.TEAM = teamAwardsdf.TEAM.apply(lambda x: rename_teams(x, name_dict))
teamAwardsdf.TEAM = pd.Categorical(teamAwardsdf.TEAM.str.strip())

no_of_awards_won = teamAwardsdf.groupby(["TEAM", "Season"],  observed=False).count().rename({"Award": "N_Awards_Won"}, axis = 1)
no_of_awards_won["awardDummy"] = (no_of_awards_won != 0).values.flatten().astype(int)

In [151]:
# Coach df
coaches = pd.read_csv("./data/coachdf.csv").iloc[:,1:]

name_dict = {"CHH":"CHA", "NOH": "NOP", "NJN": "BKN", "NOK": "NOP", "SEA": "OKC", "VAN": "MEM", "PHO": "PHX", "WSB": "WAS", "BRK": "BKN", "CHO": "CHA"}
coaches.TEAM = coaches.TEAM.apply(lambda x: rename_teams(x, name_dict))
coaches = pd.merge(coaches, acronyms, on = "TEAM").drop("TEAM", axis = 1).rename({"Team": "TEAM"}, axis = 1).set_index(["TEAM", "Season"])

coachMaxGamesdf = coaches.reset_index().groupby(["TEAM", "Season"])["RS_G_Current"].max().reset_index()
coaches = pd.merge(coaches, coachMaxGamesdf, on = ["TEAM", "Season", "RS_G_Current"]).set_index(["TEAM", "Season"])

coaches["Perc_Seasons_TEAM"] = coaches.N_Seasons_TEAM/coaches.N_Seasons_Overall

coaches = coaches[["Coach", "Perc_Seasons_TEAM", "N_Seasons_Overall", "RS_W_Perc_Overall", "P_W_Perc"]]
coaches = coaches.rename({i: f"Coach_{i}" for i in coaches.columns[1:]}, axis = 1)

In [152]:
# Join the NBA statistics df
variables_of_interest = ["W", "FGA",  "3PA", "FGM", "AST", "TOV", "STL", "PF", "PFD", "OPP_FGA", "OPP_3PA", "OPP_FGM", "OPP_AST", "OPP_TOV", "OPP_STL"]

advCols = teamAdvdf.columns.difference(teamTraddf.columns)
finaldf = teamTraddf.join(teamAdvdf[advCols])
oppCols = teamOppdf.columns.difference(finaldf.columns)

finaldf = finaldf.join(teamOppdf[oppCols])
finaldf.columns = finaldf.columns.str.replace("\n", "_")
finaldf = finaldf[variables_of_interest].reset_index()

name_dict = {"Charlotte Bobcats":"Charlotte Hornets", "LA Clippers": "Los Angeles Clippers", "New Jersey Nets": "Brooklyn Nets", "New Orleans Hornets": "New Orleans Pelicans", "New Orleans/Oklahoma City Hornets": "New Orleans Pelicans", "Seattle SuperSonics": "Oklahoma City Thunder", "Vancouver Grizzlies": "Memphis Grizzlies", "Washington Bullets": "Washington Wizards"}
finaldf.TEAM = finaldf.TEAM.apply(lambda x: rename_teams(x, name_dict))

finaldf.TEAM = pd.Categorical(finaldf.TEAM)
finaldf.Season = pd.to_numeric(finaldf.Season)
finaldf = finaldf[finaldf.Season < 2023]
finaldf = finaldf.set_index(["TEAM", "Season"]).sort_index()

finaldf = finaldf.join(teamAgedf)
finaldf = finaldf.join(no_of_awards_won)
finaldf = finaldf.join(coaches)

# finaldf.POSS = pd.to_numeric(finaldf.POSS.str.replace(",", ""), errors = "coerce")
finaldf["2PA"] = finaldf.FGA - finaldf["3PA"]
finaldf["OPP_2PA"] = finaldf.OPP_FGA - finaldf["OPP_3PA"]

Dependent: W

Playstyle:
* 3PA/FGA
* 2PA/FGA
* AST/FGM
* STL/OPP_TOV
* PF - PFD

Controls:
* OPP_3PA/OPP_FGA
* OPP_2PA/OPP_FGA
* OPP_AST/AST_FGM
* OPP_STL/TOV
* AVG_PLAYER_AGE
* L(N_Awards_Won)
* Coach
* Coach_N_Seasons_TEAM/Coach_N_Seasons_Overall
* Coach_N_Seasons_Overall
* L(RS_W_Perc_Overall)
* L(P_W_Perc)

In [153]:
finaldf["Perc_3PA"] = finaldf["3PA"]/finaldf.FGA
finaldf["Perc_2PA"] = finaldf["2PA"]/finaldf.FGA
finaldf["Perc_AST"] = finaldf["AST"]/finaldf.FGM
finaldf["Perc_STL"] = finaldf["STL"]/finaldf.OPP_TOV
finaldf["PFminusPFD"] = finaldf["PF"] - finaldf["PFD"]

finaldf["OPP_Perc_3PA"] = finaldf.OPP_3PA/finaldf.OPP_FGA
finaldf["OPP_Perc_2PA"] = finaldf["OPP_2PA"]/finaldf.OPP_FGA
finaldf["OPP_Perc_AST"] = finaldf["OPP_AST"]/finaldf.OPP_FGM
finaldf["OPP_Perc_STL"] = finaldf["OPP_STL"]/finaldf.TOV

finaldf["L1_N_Awards_Won"] = finaldf.groupby("TEAM", observed = False)["N_Awards_Won"].shift()
finaldf["L1_Coach_RS_W_Perc_Overall"] = finaldf.groupby("TEAM", observed = False)["Coach_RS_W_Perc_Overall"].shift()
finaldf["L1_Coach_P_W_Perc"] = finaldf.groupby("TEAM", observed = False)["Coach_P_W_Perc"].shift()

In [154]:
finaldf = finaldf[["W", "Perc_3PA", "Perc_2PA", "Perc_AST", "Perc_STL", "PFminusPFD", "OPP_Perc_3PA", "OPP_Perc_2PA", "OPP_Perc_AST", "OPP_Perc_STL", "L1_N_Awards_Won", "L1_Coach_RS_W_Perc_Overall", "L1_Coach_P_W_Perc", "AVG_PLAYER_AGE", "Coach", "Coach_N_Seasons_Overall", "Coach_Perc_Seasons_TEAM"]]
finaldf.to_csv("./data/finaldf.csv")

In [155]:
import linearmodels as lm

lm.PanelOLS(finaldf.dropna()["W"], finaldf.dropna().drop(["W"], axis = 1)).fit()

ValueError: exog does not have full column rank. If you wish to proceed with model estimation irrespective of the numerical accuracy of coefficient estimates, you can set check_rank=False.