In [306]:
import pandas as pd
pd.set_option('display.max_columns', None)  # show all columns

games = pd.read_csv("../data/raw/all_games.csv")
games = games[(games["SEASON_ID"] >= 22010) & (games["SEASON_ID"] <= 22030)]
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22025,1610612737,ATL,Atlanta Hawks,22501217,2025-12-14,ATL vs. PHI,W,240,120,43,99,0.434,17,33.0,0.515,17,21,0.81,18.0,39.0,57.0,33,8.0,6,16,23,3.0
1,22025,1610612737,ATL,Atlanta Hawks,22501210,2025-12-12,ATL @ DET,L,241,115,39,85,0.459,11,36.0,0.306,26,31,0.839,13.0,24.0,37.0,28,9.0,8,20,18,-27.0
2,22025,1610612737,ATL,Atlanta Hawks,22500351,2025-12-06,ATL @ WAS,W,241,131,52,98,0.531,13,39.0,0.333,14,14,1.0,15.0,30.0,45.0,36,13.0,4,12,16,15.0
3,22025,1610612737,ATL,Atlanta Hawks,22500340,2025-12-05,ATL vs. DEN,L,241,133,49,95,0.516,20,46.0,0.435,15,16,0.938,11.0,34.0,45.0,35,8.0,2,12,25,-1.0
4,22025,1610612737,ATL,Atlanta Hawks,22500327,2025-12-03,ATL vs. LAC,L,241,92,34,85,0.4,13,39.0,0.333,11,14,0.786,7.0,31.0,38.0,20,9.0,1,16,17,-23.0


In [307]:
games["HOME_TEAM"] = games["MATCHUP"].str.contains("vs").astype(int)
games["POSS"] = 0.96 * (games["FGA"] - games["OREB"] + games["TOV"] + 0.44 * games["FTA"])
games["OFF_RTG"] = 100 * (games["PTS"] / games["POSS"])
games["TS_PCT"] = games["PTS"] / (2 * (games["FGA"] + 0.44 * games["FTA"]))
games["EFG_PCT"] = (games["FGM"] + 0.5 * games["FG3M"]) / games["FGA"]
games["A_TO_RATIO"] = games["AST"] / games["TOV"]
games["TO_PCT"] = games["TOV"] / (games["FGA"] + 0.44 * games["FTA"] + games["TOV"])
games["FT_RATE"] = games["FTA"] / games["FGA"]
games["WIN_FLAG"]  = (games["WL"] == "W").astype(int)

games["GAME_DATE"] = pd.to_datetime(games["GAME_DATE"])
games = games.sort_values(["TEAM_ID", "GAME_DATE"])

games["REST_DAYS"] = games.groupby("TEAM_ID")["GAME_DATE"].diff().dt.days
games["REST_DAYS"] = games["REST_DAYS"].fillna(7)
games["REST_DAYS"] = games["REST_DAYS"].clip(lower=0,upper=7)

games["B2B"] = (games["REST_DAYS"] <= 1).astype(int)

games["WIN_FLAG_SHIFTED"] = games.groupby("TEAM_ID")["WIN_FLAG"].shift(1)
games["WIN_STREAK"] = 0
for team_id, group in games.groupby("TEAM_ID"):
    streak = 0
    indices = group.index
    
    for idx in indices:
        past_win = games.loc[idx, "WIN_FLAG_SHIFTED"]
        if past_win == 1:
            streak += 1
        else:
            streak = 0
        games.loc[idx, "WIN_STREAK"] = streak

games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME_TEAM,POSS,OFF_RTG,TS_PCT,EFG_PCT,A_TO_RATIO,TO_PCT,FT_RATE,WIN_FLAG,REST_DAYS,B2B,WIN_FLAG_SHIFTED,WIN_STREAK
1390,22010,1610612737,ATL,Atlanta Hawks,21000008,2010-10-27,ATL @ MEM,W,240,119,42,80,0.525,8,16.0,0.5,27,31,0.871,9.0,35.0,44.0,20,7.0,9,15,20,15.0,0,95.6544,124.406196,0.635412,0.575,1.333333,0.138071,0.3875,1,7.0,0,,0
1389,22010,1610612737,ATL,Atlanta Hawks,21000021,2010-10-29,ATL @ PHI,W,241,104,36,70,0.514,6,17.0,0.353,26,31,0.839,10.0,36.0,46.0,24,3.0,8,20,26,3.0,0,89.8944,115.6913,0.621712,0.557143,1.2,0.192976,0.442857,1,2.0,0,1.0,1
1388,22010,1610612737,ATL,Atlanta Hawks,21000031,2010-10-30,ATL vs. WAS,W,239,99,34,81,0.42,5,19.0,0.263,26,30,0.867,13.0,30.0,43.0,19,4.0,4,16,22,4.0,1,93.312,106.095679,0.525478,0.450617,1.1875,0.145191,0.37037,1,1.0,1,1.0,2
1387,22010,1610612737,ATL,Atlanta Hawks,21000047,2010-11-02,ATL @ CLE,W,242,100,37,84,0.44,5,17.0,0.294,21,30,0.7,14.0,29.0,43.0,26,7.0,7,7,17,12.0,0,86.592,115.484109,0.514403,0.470238,3.714286,0.067179,0.357143,1,3.0,0,1.0,3
1386,22010,1610612737,ATL,Atlanta Hawks,21000054,2010-11-03,ATL vs. DET,W,240,94,36,67,0.537,3,14.0,0.214,19,23,0.826,3.0,32.0,35.0,25,3.0,3,13,13,9.0,1,83.6352,112.392868,0.60944,0.559701,1.923077,0.144252,0.343284,1,1.0,1,1.0,4


In [308]:
games.sort_values(["TEAM_ID", "GAME_DATE"], inplace=True, ascending=[True, True])
# choose which columns to compute rolling averages for
rolling_features = [
    "OFF_RTG",
    "EFG_PCT",
    "TS_PCT",
    "TO_PCT",
    "FT_RATE",
    "REB",
    "A_TO_RATIO",
    "WIN_FLAG",
    "BLK",
    "STL",
    "OREB",
    "PF",
    "POSS",
    "PLUS_MINUS"
]

# how many previous games to use
window = 10

# create rollingâ€‘window features
for col in rolling_features:
    new_col = f"{col}_L{window}"
    games[new_col] = (
        games.groupby("TEAM_ID")[col]
        .transform(lambda s: s.shift().rolling(window, min_periods=1).mean())
    )

print([f"{c}_L{window}" for c in rolling_features])
games.head()

['OFF_RTG_L10', 'EFG_PCT_L10', 'TS_PCT_L10', 'TO_PCT_L10', 'FT_RATE_L10', 'REB_L10', 'A_TO_RATIO_L10', 'WIN_FLAG_L10', 'BLK_L10', 'STL_L10', 'OREB_L10', 'PF_L10', 'POSS_L10', 'PLUS_MINUS_L10']


Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME_TEAM,POSS,OFF_RTG,TS_PCT,EFG_PCT,A_TO_RATIO,TO_PCT,FT_RATE,WIN_FLAG,REST_DAYS,B2B,WIN_FLAG_SHIFTED,WIN_STREAK,OFF_RTG_L10,EFG_PCT_L10,TS_PCT_L10,TO_PCT_L10,FT_RATE_L10,REB_L10,A_TO_RATIO_L10,WIN_FLAG_L10,BLK_L10,STL_L10,OREB_L10,PF_L10,POSS_L10,PLUS_MINUS_L10
1390,22010,1610612737,ATL,Atlanta Hawks,21000008,2010-10-27,ATL @ MEM,W,240,119,42,80,0.525,8,16.0,0.5,27,31,0.871,9.0,35.0,44.0,20,7.0,9,15,20,15.0,0,95.6544,124.406196,0.635412,0.575,1.333333,0.138071,0.3875,1,7.0,0,,0,,,,,,,,,,,,,,
1389,22010,1610612737,ATL,Atlanta Hawks,21000021,2010-10-29,ATL @ PHI,W,241,104,36,70,0.514,6,17.0,0.353,26,31,0.839,10.0,36.0,46.0,24,3.0,8,20,26,3.0,0,89.8944,115.6913,0.621712,0.557143,1.2,0.192976,0.442857,1,2.0,0,1.0,1,124.406196,0.575,0.635412,0.138071,0.3875,44.0,1.333333,1.0,9.0,7.0,9.0,20.0,95.6544,15.0
1388,22010,1610612737,ATL,Atlanta Hawks,21000031,2010-10-30,ATL vs. WAS,W,239,99,34,81,0.42,5,19.0,0.263,26,30,0.867,13.0,30.0,43.0,19,4.0,4,16,22,4.0,1,93.312,106.095679,0.525478,0.450617,1.1875,0.145191,0.37037,1,1.0,1,1.0,2,120.048748,0.566071,0.628562,0.165523,0.415179,45.0,1.266667,1.0,8.5,5.0,9.5,23.0,92.7744,9.0
1387,22010,1610612737,ATL,Atlanta Hawks,21000047,2010-11-02,ATL @ CLE,W,242,100,37,84,0.44,5,17.0,0.294,21,30,0.7,14.0,29.0,43.0,26,7.0,7,7,17,12.0,0,86.592,115.484109,0.514403,0.470238,3.714286,0.067179,0.357143,1,3.0,0,1.0,3,115.397725,0.527587,0.594201,0.158746,0.400243,44.333333,1.240278,1.0,7.0,4.666667,10.666667,22.666667,92.9536,7.333333
1386,22010,1610612737,ATL,Atlanta Hawks,21000054,2010-11-03,ATL vs. DET,W,240,94,36,67,0.537,3,14.0,0.214,19,23,0.826,3.0,32.0,35.0,25,3.0,3,13,13,9.0,1,83.6352,112.392868,0.60944,0.559701,1.923077,0.144252,0.343284,1,1.0,1,1.0,4,115.419321,0.51325,0.574251,0.135854,0.389468,44.0,1.85878,1.0,7.0,5.25,11.5,21.25,91.3632,8.5


In [309]:
opp_df = games.copy()
opp_df["HOME_TEAM"] = 1 - opp_df["HOME_TEAM"]

merged = games.merge(opp_df, on="GAME_ID", suffixes=("", "_OPP"))
merged = merged[merged["TEAM_ID"] != merged["TEAM_ID_OPP"]]

merged["DEF_RTG"] = 100 * (merged["PTS_OPP"] / merged["POSS_OPP"])
merged["NET_RTG"] = merged["OFF_RTG"] - merged["DEF_RTG"]
merged["DEF_RTG_OPP"] = 100 * (merged["PTS"] / merged["POSS"])
merged["NET_RTG_OPP"] = merged["OFF_RTG_OPP"] - merged["DEF_RTG_OPP"]
#merged["PACE"]      = 48 * (merged["POSS"] + merged["POSS_OPP"]) / (2 * merged["MIN"])
#FIX PACE CALCULATION
merged["TS_DIFF"]   = merged["TS_PCT"] - merged["TS_PCT_OPP"]
merged.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME_TEAM,POSS,OFF_RTG,TS_PCT,EFG_PCT,A_TO_RATIO,TO_PCT,FT_RATE,WIN_FLAG,REST_DAYS,B2B,WIN_FLAG_SHIFTED,WIN_STREAK,OFF_RTG_L10,EFG_PCT_L10,TS_PCT_L10,TO_PCT_L10,FT_RATE_L10,REB_L10,A_TO_RATIO_L10,WIN_FLAG_L10,BLK_L10,STL_L10,OREB_L10,PF_L10,POSS_L10,PLUS_MINUS_L10,SEASON_ID_OPP,TEAM_ID_OPP,TEAM_ABBREVIATION_OPP,TEAM_NAME_OPP,GAME_DATE_OPP,MATCHUP_OPP,WL_OPP,MIN_OPP,PTS_OPP,FGM_OPP,FGA_OPP,FG_PCT_OPP,FG3M_OPP,FG3A_OPP,FG3_PCT_OPP,FTM_OPP,FTA_OPP,FT_PCT_OPP,OREB_OPP,DREB_OPP,REB_OPP,AST_OPP,STL_OPP,BLK_OPP,TOV_OPP,PF_OPP,PLUS_MINUS_OPP,HOME_TEAM_OPP,POSS_OPP,OFF_RTG_OPP,TS_PCT_OPP,EFG_PCT_OPP,A_TO_RATIO_OPP,TO_PCT_OPP,FT_RATE_OPP,WIN_FLAG_OPP,REST_DAYS_OPP,B2B_OPP,WIN_FLAG_SHIFTED_OPP,WIN_STREAK_OPP,OFF_RTG_L10_OPP,EFG_PCT_L10_OPP,TS_PCT_L10_OPP,TO_PCT_L10_OPP,FT_RATE_L10_OPP,REB_L10_OPP,A_TO_RATIO_L10_OPP,WIN_FLAG_L10_OPP,BLK_L10_OPP,STL_L10_OPP,OREB_L10_OPP,PF_L10_OPP,POSS_L10_OPP,PLUS_MINUS_L10_OPP,DEF_RTG,NET_RTG,DEF_RTG_OPP,NET_RTG_OPP,TS_DIFF
1,22010,1610612737,ATL,Atlanta Hawks,21000008,2010-10-27,ATL @ MEM,W,240,119,42,80,0.525,8,16.0,0.5,27,31,0.871,9.0,35.0,44.0,20,7.0,9,15,20,15.0,0,95.6544,124.406196,0.635412,0.575,1.333333,0.138071,0.3875,1,7.0,0,,0,,,,,,,,,,,,,,,22010,1610612763,MEM,Memphis Grizzlies,2010-10-27,MEM vs. ATL,L,240,104,40,86,0.465,4,9.0,0.444,20,25,0.8,9.0,30.0,39.0,13,7.0,4,14,26,-15.0,0,97.92,106.20915,0.536082,0.488372,0.928571,0.126126,0.290698,0,7.0,0,,0,,,,,,,,,,,,,,,106.20915,18.197045,124.406196,-18.197045,0.09933
3,22010,1610612737,ATL,Atlanta Hawks,21000021,2010-10-29,ATL @ PHI,W,241,104,36,70,0.514,6,17.0,0.353,26,31,0.839,10.0,36.0,46.0,24,3.0,8,20,26,3.0,0,89.8944,115.6913,0.621712,0.557143,1.2,0.192976,0.442857,1,2.0,0,1.0,1,124.406196,0.575,0.635412,0.138071,0.3875,44.0,1.333333,1.0,9.0,7.0,9.0,20.0,95.6544,15.0,22010,1610612755,PHI,Philadelphia 76ers,2010-10-29,PHI vs. ATL,L,241,101,35,85,0.412,5,11.0,0.455,26,33,0.788,12.0,24.0,36.0,24,9.0,6,8,21,-3.0,0,91.6992,110.142728,0.507436,0.441176,3.0,0.074405,0.388235,0,2.0,0,0.0,0,96.573956,0.475904,0.495219,0.154083,0.13253,38.0,1.5,0.0,4.0,10.0,10.0,22.0,90.0864,-10.0,110.142728,5.548572,115.6913,-5.548572,0.114276
5,22010,1610612737,ATL,Atlanta Hawks,21000031,2010-10-30,ATL vs. WAS,W,239,99,34,81,0.42,5,19.0,0.263,26,30,0.867,13.0,30.0,43.0,19,4.0,4,16,22,4.0,1,93.312,106.095679,0.525478,0.450617,1.1875,0.145191,0.37037,1,1.0,1,1.0,2,120.048748,0.566071,0.628562,0.165523,0.415179,45.0,1.266667,1.0,8.5,5.0,9.5,23.0,92.7744,9.0,22010,1610612764,WAS,Washington Wizards,2010-10-30,WAS @ ATL,L,241,95,38,76,0.5,4,12.0,0.333,15,21,0.714,8.0,26.0,34.0,16,8.0,13,17,25,-4.0,1,90.4704,105.00672,0.55725,0.526316,0.941176,0.166275,0.276316,0,2.0,0,0.0,0,85.433136,0.397436,0.455044,0.12476,0.384615,25.0,1.307692,0.0,3.0,10.0,3.0,25.0,97.152,-29.0,105.00672,1.088959,106.095679,-1.088959,-0.031772
7,22010,1610612737,ATL,Atlanta Hawks,21000047,2010-11-02,ATL @ CLE,W,242,100,37,84,0.44,5,17.0,0.294,21,30,0.7,14.0,29.0,43.0,26,7.0,7,7,17,12.0,0,86.592,115.484109,0.514403,0.470238,3.714286,0.067179,0.357143,1,3.0,0,1.0,3,115.397725,0.527587,0.594201,0.158746,0.400243,44.333333,1.240278,1.0,7.0,4.666667,10.666667,22.666667,92.9536,7.333333,22010,1610612739,CLE,Cleveland Cavaliers,2010-11-02,CLE vs. ATL,L,239,88,32,74,0.432,5,17.0,0.294,19,26,0.731,10.0,34.0,44.0,15,2.0,4,14,22,-12.0,0,85.8624,102.489565,0.514981,0.466216,1.071429,0.140788,0.351351,0,3.0,0,0.0,0,100.567178,0.480335,0.515035,0.144711,0.246936,35.0,1.398689,0.333333,2.666667,7.333333,9.0,19.333333,92.928,-5.0,102.489565,12.994545,115.484109,-12.994545,-0.000578
9,22010,1610612737,ATL,Atlanta Hawks,21000054,2010-11-03,ATL vs. DET,W,240,94,36,67,0.537,3,14.0,0.214,19,23,0.826,3.0,32.0,35.0,25,3.0,3,13,13,9.0,1,83.6352,112.392868,0.60944,0.559701,1.923077,0.144252,0.343284,1,1.0,1,1.0,4,115.419321,0.51325,0.574251,0.135854,0.389468,44.0,1.85878,1.0,7.0,5.25,11.5,21.25,91.3632,8.5,22010,1610612765,DET,Detroit Pistons,2010-11-03,DET @ ATL,L,241,85,36,90,0.4,3,17.0,0.176,10,15,0.667,17.0,21.0,38.0,21,7.0,2,8,21,-9.0,1,84.096,101.074962,0.439959,0.416667,2.625,0.076482,0.166667,0,1.0,1,0.0,0,103.16268,0.470425,0.506566,0.131145,0.319995,40.5,1.507149,0.0,4.75,4.5,12.0,22.5,91.968,-9.25,101.074962,11.317906,112.392868,-11.317906,0.169481


In [310]:
merged.sort_values(["TEAM_ID", "GAME_DATE"], inplace=True, ascending=[True, True])
merged["DEF_RTG_L10"] = merged.groupby("TEAM_ID")["DEF_RTG"].transform(
    lambda s: s.shift().rolling(window).mean()
)
merged["DEF_RTG_L10_OPP"] = merged.groupby("TEAM_ID")["DEF_RTG_OPP"].transform(
    lambda s: s.shift().rolling(window).mean()
)
merged["NET_RTG_L10"] = merged["OFF_RTG_L10"] - merged["DEF_RTG_L10"]
merged["NET_RTG_L10_OPP"] = merged["OFF_RTG_L10_OPP"] - merged["DEF_RTG_L10_OPP"]
merged.sort_values(["TEAM_ID", "GAME_DATE"], inplace=True, ascending=[True, False])
merged.dropna(inplace=True)
merged

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME_TEAM,POSS,OFF_RTG,TS_PCT,EFG_PCT,A_TO_RATIO,TO_PCT,FT_RATE,WIN_FLAG,REST_DAYS,B2B,WIN_FLAG_SHIFTED,WIN_STREAK,OFF_RTG_L10,EFG_PCT_L10,TS_PCT_L10,TO_PCT_L10,FT_RATE_L10,REB_L10,A_TO_RATIO_L10,WIN_FLAG_L10,BLK_L10,STL_L10,OREB_L10,PF_L10,POSS_L10,PLUS_MINUS_L10,SEASON_ID_OPP,TEAM_ID_OPP,TEAM_ABBREVIATION_OPP,TEAM_NAME_OPP,GAME_DATE_OPP,MATCHUP_OPP,WL_OPP,MIN_OPP,PTS_OPP,FGM_OPP,FGA_OPP,FG_PCT_OPP,FG3M_OPP,FG3A_OPP,FG3_PCT_OPP,FTM_OPP,FTA_OPP,FT_PCT_OPP,OREB_OPP,DREB_OPP,REB_OPP,AST_OPP,STL_OPP,BLK_OPP,TOV_OPP,PF_OPP,PLUS_MINUS_OPP,HOME_TEAM_OPP,POSS_OPP,OFF_RTG_OPP,TS_PCT_OPP,EFG_PCT_OPP,A_TO_RATIO_OPP,TO_PCT_OPP,FT_RATE_OPP,WIN_FLAG_OPP,REST_DAYS_OPP,B2B_OPP,WIN_FLAG_SHIFTED_OPP,WIN_STREAK_OPP,OFF_RTG_L10_OPP,EFG_PCT_L10_OPP,TS_PCT_L10_OPP,TO_PCT_L10_OPP,FT_RATE_L10_OPP,REB_L10_OPP,A_TO_RATIO_L10_OPP,WIN_FLAG_L10_OPP,BLK_L10_OPP,STL_L10_OPP,OREB_L10_OPP,PF_L10_OPP,POSS_L10_OPP,PLUS_MINUS_L10_OPP,DEF_RTG,NET_RTG,DEF_RTG_OPP,NET_RTG_OPP,TS_DIFF,DEF_RTG_L10,DEF_RTG_L10_OPP,NET_RTG_L10,NET_RTG_L10_OPP
2431,22025,1610612737,ATL,Atlanta Hawks,22501217,2025-12-14,ATL vs. PHI,W,240,120,43,99,0.434,17,33.0,0.515,17,21,0.810,18.0,39.0,57.0,33,8.0,6,16,23,3.0,1,101.9904,117.658133,0.554324,0.520202,2.062500,0.128783,0.212121,1,2.0,0,0.0,0,118.065223,0.552028,0.582113,0.118280,0.219959,41.8,2.576264,0.5,4.8,9.1,10.7,19.3,100.20096,-2.1,22025,1610612755,PHI,Philadelphia 76ers,2025-12-14,PHI @ ATL,L,240,117,38,101,0.376,17,41.0,0.415,24,29,0.828,21.0,33.0,54.0,25,10.0,10,11,15,-3.0,1,99.6096,117.458558,0.514241,0.460396,2.272727,0.088169,0.287129,0,2.0,0,1.0,1,114.110054,0.512907,0.558838,0.117492,0.270578,46.8,2.040757,0.6,5.4,9.2,11.7,19.9,101.05728,0.3,117.458558,0.199574,117.658133,-0.199574,0.040083,119.908722,118.065223,-1.843499,-3.955170
2429,22025,1610612737,ATL,Atlanta Hawks,22501210,2025-12-12,ATL @ DET,L,241,115,39,85,0.459,11,36.0,0.306,26,31,0.839,13.0,24.0,37.0,28,9.0,8,20,18,-27.0,0,101.4144,113.396125,0.582928,0.523529,1.400000,0.168577,0.364706,0,6.0,0,1.0,1,119.503064,0.567491,0.591766,0.116139,0.198431,42.0,2.623764,0.5,4.6,8.9,10.0,19.9,99.92064,-0.3,22025,1610612765,DET,Detroit Pistons,2025-12-12,DET vs. ATL,W,240,142,53,91,0.582,12,29.0,0.414,24,29,0.828,10.0,30.0,40.0,34,16.0,6,12,22,27.0,0,101.5296,139.860691,0.684271,0.648352,2.833333,0.103663,0.318681,1,6.0,0,1.0,2,119.822394,0.555644,0.597163,0.139895,0.351831,45.8,1.650042,0.7,5.7,8.5,12.8,21.7,99.06048,3.8,139.860691,-26.464566,113.396125,26.464566,-0.101344,119.418334,119.503064,0.084730,0.319330
2427,22025,1610612737,ATL,Atlanta Hawks,22500351,2025-12-06,ATL @ WAS,W,241,131,52,98,0.531,13,39.0,0.333,14,14,1.000,15.0,30.0,45.0,36,13.0,4,12,16,15.0,0,97.1136,134.893568,0.628840,0.596939,3.000000,0.103306,0.142857,1,1.0,1,0.0,0,117.727228,0.564694,0.588711,0.121179,0.201386,41.0,2.506117,0.4,4.7,8.5,9.6,20.7,99.77088,-2.6,22025,1610612764,WAS,Washington Wizards,2025-12-06,WAS vs. ATL,L,240,116,41,80,0.513,17,38.0,0.447,17,19,0.895,4.0,22.0,26.0,29,6.0,5,20,18,-15.0,0,100.1856,115.785103,0.656406,0.618750,1.450000,0.184570,0.237500,0,2.0,0,0.0,0,113.221856,0.531049,0.563013,0.127056,0.244813,40.6,1.854216,0.2,5.6,7.5,10.8,20.8,97.68960,-16.3,115.785103,19.108465,134.893568,-19.108465,-0.027565,120.522454,117.727228,-2.795226,-4.505372
2425,22025,1610612737,ATL,Atlanta Hawks,22500340,2025-12-05,ATL vs. DEN,L,241,133,49,95,0.516,20,46.0,0.435,15,16,0.938,11.0,34.0,45.0,35,8.0,2,12,25,-1.0,1,98.9184,134.454257,0.651705,0.621053,2.916667,0.105226,0.168421,0,2.0,0,0.0,0,116.754443,0.563765,0.588422,0.125759,0.212779,39.9,2.420332,0.5,4.8,8.8,9.4,20.1,99.82080,-2.3,22025,1610612743,DEN,Denver Nuggets,2025-12-05,DEN @ ATL,W,239,134,47,89,0.528,16,37.0,0.432,24,30,0.800,9.0,33.0,42.0,28,9.0,6,12,15,1.0,1,100.9920,132.683777,0.655577,0.617978,2.333333,0.105079,0.337079,1,2.0,0,1.0,1,130.971657,0.607856,0.644188,0.117511,0.290103,43.2,2.475444,0.6,4.5,5.5,10.9,18.9,96.14976,4.3,132.683777,1.770480,134.454257,-1.770480,-0.003872,119.202490,116.754443,-2.448047,14.217214
2423,22025,1610612737,ATL,Atlanta Hawks,22500327,2025-12-03,ATL vs. LAC,L,241,92,34,85,0.400,13,39.0,0.333,11,14,0.786,7.0,31.0,38.0,20,9.0,1,16,17,-23.0,1,96.1536,95.680245,0.504607,0.476471,1.250000,0.149309,0.164706,0,2.0,0,0.0,0,120.075432,0.580563,0.604173,0.124660,0.220753,40.4,2.520332,0.6,5.3,9.7,9.6,20.2,100.44672,1.0,22025,1610612746,LAC,Los Angeles Clippers,2025-12-03,LAC @ ATL,W,241,115,44,89,0.494,10,32.0,0.313,17,20,0.850,13.0,41.0,54.0,29,12.0,1,12,18,23.0,1,92.9280,123.751722,0.587935,0.550562,2.416667,0.109290,0.224719,1,2.0,0,0.0,0,115.986233,0.527543,0.584499,0.125860,0.314915,41.8,1.756057,0.2,4.2,8.0,9.1,20.8,99.85152,-7.0,123.751722,-28.071477,95.680245,28.071477,-0.083327,118.251566,120.075432,1.823866,-4.089199
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70702,22010,1610612766,CHA,Charlotte Hornets,21000209,2010-11-24,CHA vs. NYK,L,240,95,39,87,0.448,2,12.0,0.167,15,21,0.714,18.0,29.0,47.0,24,9.0,8,14,14,-4.0,1,88.5504,107.283536,0.493558,0.459770,1.714286,0.126996,0.241379,0,1.0,1,0.0,0,111.069970,0.535663,0.573737,0.156403,0.300608,41.2,1.660313,0.4,7.3,6.8,9.6,21.3,88.84224,0.8,22010,1610612752,NYK,New York Knicks,2010-11-24,NYK @ CHA,W,240,99,36,75,0.480,12,25.0,0.480,15,20,0.750,8.0,30.0,38.0,22,7.0,11,15,21,4.0,1,87.1680,113.573789,0.590692,0.560000,1.466667,0.151822,0.266667,1,1.0,1,1.0,4,111.845003,0.504341,0.569654,0.144176,0.411672,40.3,1.357413,0.4,7.3,8.3,10.5,23.2,96.79872,-3.6,113.573789,-6.290253,107.283536,6.290253,-0.097134,108.165424,111.069970,2.904546,0.775034
70700,22010,1610612766,CHA,Charlotte Hornets,21000206,2010-11-23,CHA @ NYK,L,239,107,44,90,0.489,6,15.0,0.400,13,18,0.722,15.0,27.0,42.0,19,13.0,3,18,22,-3.0,0,96.8832,110.442264,0.546364,0.522222,1.055556,0.155280,0.200000,0,3.0,0,1.0,1,110.188928,0.529968,0.570854,0.157182,0.312553,41.1,1.686008,0.5,7.3,6.2,9.2,20.9,87.51744,1.3,22010,1610612752,NYK,New York Knicks,2010-11-23,NYK vs. CHA,W,241,110,38,73,0.521,10,25.0,0.400,24,25,0.960,5.0,26.0,31.0,18,10.0,9,20,20,3.0,0,95.0400,115.740741,0.654762,0.589041,0.900000,0.192308,0.342466,1,3.0,0,1.0,3,111.946937,0.502908,0.563175,0.140885,0.398116,41.3,1.395190,0.4,7.5,8.4,11.3,23.3,96.88704,-1.8,115.740741,-5.298476,110.442264,5.298476,-0.108398,106.240717,110.188928,3.948211,1.758009
70698,22010,1610612766,CHA,Charlotte Hornets,21000182,2010-11-20,CHA vs. PHX,W,239,123,50,87,0.575,9,19.0,0.474,14,19,0.737,8.0,33.0,41.0,37,6.0,6,14,27,18.0,1,97.3056,126.405880,0.644924,0.626437,2.642857,0.128018,0.218391,1,1.0,1,0.0,0,108.070226,0.521231,0.565725,0.163920,0.326651,39.8,1.493944,0.4,7.2,6.2,8.9,21.0,86.15040,-1.5,22010,1610612756,PHX,Phoenix Suns,2010-11-20,PHX @ CHA,L,240,105,34,74,0.459,5,21.0,0.238,32,39,0.821,7.0,27.0,34.0,19,6.0,5,13,22,-18.0,1,93.2736,112.572046,0.575910,0.493243,1.461538,0.124808,0.527027,0,2.0,0,0.0,0,113.146360,0.525410,0.561990,0.138652,0.308877,39.7,1.546693,0.5,4.0,7.3,12.1,21.0,94.45248,-2.9,112.572046,13.833834,126.405880,-13.833834,0.069014,106.733436,108.070226,1.336790,5.076134
70696,22010,1610612766,CHA,Charlotte Hornets,21000175,2010-11-19,CHA @ MIA,L,240,87,33,71,0.465,4,17.0,0.235,17,22,0.773,2.0,36.0,38.0,21,8.0,1,14,23,-8.0,0,88.9728,97.782693,0.539167,0.492958,1.500000,0.147866,0.309859,0,4.0,0,1.0,1,109.752559,0.519078,0.568044,0.165051,0.359951,40.5,1.443944,0.4,7.8,5.7,10.2,20.8,86.06592,-1.0,22010,1610612748,MIA,Miami Heat,2010-11-19,MIA vs. CHA,W,240,95,34,80,0.425,5,17.0,0.294,22,27,0.815,10.0,34.0,44.0,14,6.0,3,12,18,8.0,0,90.1248,105.409388,0.516979,0.456250,1.166667,0.115518,0.337500,1,2.0,0,1.0,2,118.101186,0.532317,0.588119,0.116619,0.427107,41.8,1.906949,0.7,5.2,6.7,8.7,21.7,90.52800,12.9,105.409388,-7.626695,97.782693,7.626695,0.022188,108.024447,109.752559,1.728112,8.348627


In [311]:
print(merged.shape)
print(merged[["TEAM_ABBREVIATION","TEAM_ABBREVIATION_OPP"]].head(10))
print(merged.groupby("GAME_ID")["TEAM_ID"].nunique().value_counts())

(36248, 118)
     TEAM_ABBREVIATION TEAM_ABBREVIATION_OPP
2431               ATL                   PHI
2429               ATL                   DET
2427               ATL                   WAS
2425               ATL                   DEN
2423               ATL                   LAC
2421               ATL                   DET
2419               ATL                   PHI
2417               ATL                   CLE
2415               ATL                   WAS
2413               ATL                   CHA
TEAM_ID
2    18119
1       10
Name: count, dtype: int64


In [312]:
good_ids = (
    merged.groupby("GAME_ID")["TEAM_ID"]
    .nunique()
    .reset_index()
    .query("TEAM_ID == 2")["GAME_ID"]
)

merged = merged[merged["GAME_ID"].isin(good_ids)]

print(merged.shape)
print(merged.groupby("GAME_ID")["TEAM_ID"].nunique().value_counts())

(36238, 118)
TEAM_ID
2    18119
Name: count, dtype: int64


In [313]:
merged.to_csv("../data/processed/adv_nba_data.csv", index=False)