# üèÄ NBA 2024-2025: Slumps & Recoveries
## Notebook 04 - Exploratory Analysis
This notebook builds the core slump and recovery framework, using True Shooting Percentage (TS%) as the primary metric for measuring how NBA players slump, adjust, and recover during the 2024-2025 regular season.

In [2]:
# Configuration parameters
PLAYER_SD_MULTIPLIER = 0.5    # how many standard deviations below player's season-long TS% counts as a slump
MIN_ATTEMPTS = 6              # minimum shot attempts (FGA + 0.44*FTA) for a game to be included

In [3]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

___
## Load Data

In [5]:
# Load merged data
df = pd.read_csv(r"C:\Users\dylan\OneDrive\Documents\Portfolio_Projects\project10_NBA_2025_game_logs\03_python_outputs\01_cleaning\NBA_2024_25_game_logs_merged.csv")

In [6]:
# Ensure `game_date` is datetime
df["game_date"] = pd.to_datetime(df["game_date"])

___
## 1) Create Efficiency Metrics

In [8]:
# Calculate true shooting attempts
df["ts_attempts"] = df["field_goals_attempted"] + 0.44 * df["free_throws_attempted"]

# Calculate true shooting percentage (TS%)
df["ts_pct"] = 0.5 * df["points"] / df["ts_attempts"]

In [9]:
# Calculate points per 10 shots
df["p10s"] = (df["points"] / df["field_goals_attempted"]) * 10

___
## 2) Baselines
#### a) Player-Level Baselines

In [11]:
# Filter for only "active rows" where player actually entered the game
active_games = df[df["minutes_played"] > 0].copy()

In [12]:
# Compute each player's season totals
player_season_totals = (
    active_games.groupby("player_name", as_index=False)
    .agg(
        total_points=("points", "sum"),
        total_fga=("field_goals_attempted", "sum"),
        total_fta=("free_throws_attempted", "sum"),
        games_played=("game_date", "count")
    )
)

In [13]:
# Compute player's season-long TS%
player_season_totals["season_ts_pct"] = (
    0.5 * player_season_totals["total_points"] 
    / (player_season_totals["total_fga"] + 0.44 * player_season_totals["total_fta"])
)

In [14]:
# Compute player's season-long TS% standard deviation
std_dev = (
    active_games.groupby("player_name", as_index=False)
    .agg(
        std_ts_pct=("ts_pct", "std")
    )
)

# Merge std_dev into player_totals
player_season_totals = player_season_totals.merge(std_dev, on="player_name", how="left")

In [15]:
# Append season totals to every game row
df = df.merge(
    player_season_totals[["player_name", "season_ts_pct", "std_ts_pct"]],
    on="player_name",
    how="left"
)

In [16]:
# Merge `season_ts_pct` and `std_ts_pct` into active_games
active_games = active_games.merge(
    player_season_totals[[
        "player_name",
        "season_ts_pct",
        "std_ts_pct"
    ]],
    on="player_name",
    how="left"
)

In [17]:
# Preview totals
player_season_totals

Unnamed: 0,player_name,total_points,total_fga,total_fta,games_played,season_ts_pct,std_ts_pct
0,Aaron Wiggins,914.0,728.0,89.0,76,0.595704,0.196776
1,Alex Sarr,869.0,828.0,165.0,67,0.482456,0.170794
2,Alperen ≈ûeng√ºn,1451.0,1143.0,428.0,76,0.544948,0.130577
3,Amen Thompson,970.0,697.0,247.0,69,0.601976,0.159219
4,Andrew Wiggins,1078.0,839.0,257.0,60,0.566129,0.154442
...,...,...,...,...,...,...,...
112,Tyrese Maxey,1369.0,1091.0,289.0,52,0.561913,0.133143
113,Tyus Jones,829.0,683.0,57.0,81,0.585386,0.254237
114,Victor Wembanyama,1116.0,857.0,189.0,46,0.593516,0.115759
115,Zaccharie Risacher,942.0,779.0,149.0,75,0.557687,0.182698


#### b) League-Level Baselines

In [19]:
# League-level TS% baseline
league_avg_ts = active_games["ts_pct"].mean()
league_std_ts = active_games["ts_pct"].std()

print(f"League Average (TS%): {league_avg_ts.round(3)*100}%")
print(f"League Standard Deviation (TS%): {round(league_std_ts,3)}")

League Average (TS%): 58.3%
League Standard Deviation (TS%): 0.169


In [20]:
# Append these to every game row
active_games["league_avg_ts"] = league_avg_ts
active_games["league_std_ts"] = league_std_ts

___
## 3) Defining Slump & Recovery Flags

- When does a slump *start*? (This triggers a **slump**.)
- When does a slump *end*?   (This triggers a **recovery**.)

In [23]:
# --- Player-level slump start threshold ---
# TS% must fall below: the player's season-long TS% minus (SD multiplier x 1 standard deviation of game-to-game TS%)
active_games["slump_start_threshold"] = (
    active_games["season_ts_pct"] - PLAYER_SD_MULTIPLIER * active_games["std_ts_pct"]
)

In [24]:
# Slump start flag (TS% < player threshold)
active_games["is_slump_start"] = (
    active_games["ts_pct"] < active_games["slump_start_threshold"]
).astype(int)

In [25]:
# Recovery flag (TS% >= league-average TS%)
active_games["is_recovery"] = (
    active_games["ts_pct"] >= active_games["league_avg_ts"]
).astype(int)

___
## 4) Next-Game Recovery Status
#### a) Player-Level Next-Game Recoveries

In [27]:
# Sort chronologically for each player
active_games = active_games.sort_values(["player_name", "game_date"]).reset_index(drop=True)

In [28]:
# Create next-game recovery flag
active_games["next_game_is_recovery"] = (
    active_games.groupby("player_name")["is_recovery"].shift(-1).fillna(0)
)

In [29]:
# Next-game recovery conditions
active_games["next_game_recovery"] = (
    (active_games["is_slump_start"] == 1) &
    (active_games["next_game_is_recovery"] == 1)
).astype(int)

In [30]:
# Player-level summary
player_next_game_recovery = (
    active_games.groupby("player_name")
    .agg(
        total_slumps=("is_slump_start", "sum"),
        next_game_recoveries=("next_game_recovery", "sum")
    ).reset_index()
)

# Calculate next-game recovery rate (player-level)
player_next_game_recovery["next_game_recovery_rate"] = (
    player_next_game_recovery["next_game_recoveries"]
    / player_next_game_recovery["total_slumps"]
)
player_next_game_recovery.sort_values("next_game_recovery_rate", ascending=False).head(10)

Unnamed: 0,player_name,total_slumps,next_game_recoveries,next_game_recovery_rate
84,Nikola Jokiƒá,21,20,0.952381
39,Giannis Antetokounmpo,18,15,0.833333
46,Jalen Duren,22,18,0.818182
67,Kevin Durant,17,13,0.764706
3,Amen Thompson,21,16,0.761905
116,Zach LaVine,20,15,0.75
101,Shai Gilgeous-Alexander,24,18,0.75
96,Rudy Gobert,26,19,0.730769
52,Jarrett Allen,24,17,0.708333
111,Tyrese Haliburton,19,13,0.684211


#### b) League-Level Next-Game Recoveries

In [32]:
# Find total slumps
total_slumps = active_games["is_slump_start"].sum()

# Find total "next-game recoveries"
total_next_game_recoveries = active_games.loc[
    active_games["is_slump_start"] == 1, "next_game_recovery"
].sum()

# Calculate next-game recovery rate (How many slumps are immediately ended during the next game?)
next_game_recovery_rate = total_next_game_recoveries / total_slumps
print(f"Next Game Recovery Rate: {next_game_recovery_rate*100:.1f}%")

Next Game Recovery Rate: 48.6%


In [33]:
# League-level summary
league_next_game_recovery = pd.DataFrame({
    "total_slumps": [total_slumps],
    "next_game_recoveries": [total_next_game_recoveries],
    "next_game_recovery_rate": [next_game_recovery_rate]
})
league_next_game_recovery

Unnamed: 0,total_slumps,next_game_recoveries,next_game_recovery_rate
0,2504,1216,0.485623


___
## 5) Detecting and Measuring Slump Behavior
#### a) Slump Sequence & Recovery Detection

In [35]:
slump_records = []

# Loop through all players
for player, df_p in active_games.groupby("player_name"):
    df_p = df_p.reset_index(drop=True)

    # Compute gaps (in days) between consecutive active games
    df_p["days_since_last_game"] = df_p["game_date"].diff().dt.days.fillna(0)

    in_slump = False
    slump_start_idx = None

    # ---------------------------
    # Main Loop
    # ---------------------------
    for i in range(len(df_p)):

        # 1a) Gap check -- A gap of 14+ days terminates the current slump as "unrecovered"
        if in_slump and df_p.loc[i, "days_since_last_game"] >= 14:

            slump_final_idx = i - 1
            slump_rows = df_p.loc[slump_start_idx : slump_final_idx]

            if len(slump_rows) > 0:
                slump_records.append({
                    "player_name": player,
                    "slump_id": f"{player}_{slump_start_idx}",
                    "slump_length": len(slump_rows),
                    "games_to_recover": None,   # unrecovered slump
                    "start_date": slump_rows["game_date"].min(),
                    "end_date": slump_rows["game_date"].max(),
                    "recovered": False
                })

            in_slump = False
            slump_start_idx = None
            continue

        # 1b) Slump start -- TS% below player-level slump start threshold
        if not in_slump and df_p.loc[i, "is_slump_start"] == 1:
            in_slump = True
            slump_start_idx = i
            continue

        # 1c) Slump recovery -- TS% ‚â• league-average TS%
        if in_slump and df_p.loc[i, "is_recovery"] == 1:
                
                slump_final_idx = i - 1
                recovery_idx    = i
                slump_rows = df_p.loc[slump_start_idx : slump_final_idx]

                if len(slump_rows) > 0:
                    slump_records.append({
                        "player_name": player,
                        "slump_id": f"{player}_{slump_start_idx}",
                        "slump_length": len(slump_rows),
                        "games_to_recover": (recovery_idx - slump_start_idx),
                        "start_date": slump_rows["game_date"].min(),
                        "end_date": slump_rows["game_date"].max(),
                        "recovered": True
                    })
    
                    in_slump = False
                    slump_start_idx = None
                    continue

    # ---------------------------------------------
    # 2) Season-end slump -- ends as "unrecovered"
    # ---------------------------------------------
    if in_slump:
            slump_final_idx = len(df_p) - 1
            slump_rows = df_p.loc[slump_start_idx: slump_final_idx]

            if len(slump_rows) > 0:
                slump_records.append({
                    "player_name": player,
                    "slump_id": f"{player}_{slump_start_idx}",
                    "slump_length": len(slump_rows),
                    "games_to_recover": None,
                    "start_date": slump_rows["game_date"].min(),
                    "end_date": slump_rows["game_date"].max(),
                    "recovered": False
                })

# Slump summary DataFrame
slump_df = pd.DataFrame(slump_records)

In [36]:
# Merge reference values from active_games
slump_df = slump_df.merge(
    active_games[[
        "player_name", "team", "game_date",
        "ts_pct", "slump_start_threshold", "season_ts_pct", "league_avg_ts"
    ]],
    left_on=["player_name", "start_date"],
    right_on=["player_name", "game_date"],
    how="left"
)

# Drop game_date column
slump_df = slump_df.drop(columns=["game_date"])

#### b) Player-Level Slump Summary

In [38]:
# Filter "recovered" and "unrecovered" slumps
recovered_slumps   = slump_df[slump_df["recovered"] == True].copy()
unrecovered_slumps = slump_df[slump_df["recovered"] == False].copy()

In [39]:
# Player-level summary
player_slump_summary = slump_df.groupby("player_name").apply(
    lambda x: pd.Series({
        "total_slumps": len(x),
        "recovered_slumps": x["recovered"].sum(),
        "unrecovered_slumps": len(x) - x["recovered"].sum(),
        "avg_slump_length": x["slump_length"].mean(),
        "median_slump_length": x["slump_length"].median(),
        "max_slump_length": x["slump_length"].max(),
        "avg_games_to_recover": x.loc[x["recovered"]==True, "games_to_recover"].mean(),
        "median_games_to_recover": x.loc[x["recovered"]==True, "games_to_recover"].median(),
        "max_games_to_recover": x.loc[x["recovered"]==True, "games_to_recover"].max(),
        "slumps_per_82": (len(x) / 82) * 82
    })
).reset_index()
player_slump_summary.sort_values("avg_games_to_recover")[["player_name", "avg_games_to_recover","total_slumps"]].head(10)

  player_slump_summary = slump_df.groupby("player_name").apply(


Unnamed: 0,player_name,avg_games_to_recover,total_slumps
84,Nikola Jokiƒá,1.052632,19.0
67,Kevin Durant,1.166667,13.0
39,Giannis Antetokounmpo,1.2,15.0
46,Jalen Duren,1.222222,18.0
96,Rudy Gobert,1.263158,20.0
52,Jarrett Allen,1.3125,17.0
101,Shai Gilgeous-Alexander,1.333333,18.0
3,Amen Thompson,1.352941,17.0
86,Norman Powell,1.363636,13.0
116,Zach LaVine,1.411765,17.0


#### c) League-Level Slump Summary

In [41]:
# League-level summary
league_slump_summary = pd.DataFrame({
    "total_slumps_league": [len(slump_df)],
    "recovered_slumps_league": [slump_df["recovered"].sum()],
    "unrecovered_slumps_league": [len(slump_df) - slump_df["recovered"].sum()],
    "avg_slump_length_league": [slump_df["slump_length"].mean()],
    "median_slump_length_league": [slump_df["slump_length"].median()],
    "max_slump_length_league": [slump_df["slump_length"].max()],
    "avg_games_to_recover_league": [slump_df.loc[slump_df["recovered"] == True, "games_to_recover"].mean()],
    "median_games_to_recover_league": [slump_df.loc[slump_df["recovered"] == True, "games_to_recover"].median()],
    "max_games_to_recover_league": [slump_df.loc[slump_df["recovered"] == True, "games_to_recover"].max()]
})
league_slump_summary

Unnamed: 0,total_slumps_league,recovered_slumps_league,unrecovered_slumps_league,avg_slump_length_league,median_slump_length_league,max_slump_length_league,avg_games_to_recover_league,median_games_to_recover_league,max_games_to_recover_league
0,1567,1502,65,2.015954,1.0,13,2.023968,1.0,13.0


___
## 6) Shot Volume Adjustment (from slumps -> recoveries)
#### a) Shot Volume Adjustment

In [43]:
shot_records = []

active_lookup = active_games.set_index(["player_name", "game_date"])

for idx, row in slump_df.iterrows():
    # Only process recovered slumps (to get recovery games)
    if not row["recovered"]:
        continue

    player = row["player_name"]
    start_date = row["start_date"]
    end_date = row["end_date"]
    games_to_recover = row["games_to_recover"]

    # 1) Slump window = all days between start_date and end_date (inclusive)
    slump_window = active_games[
        (active_games["player_name"] == player) &
        (active_games["game_date"] >= start_date) &
        (active_games["game_date"] <= end_date)
    ]

    # If there are no slump games, then skip (this most likely won't happen)
    if slump_window.empty:
        continue

    # Average FGA during slump window
    slump_avg_fga = slump_window["field_goals_attempted"].mean()

    # 2) Recovery game = next game after slump end
    next_game = active_games[
        (active_games["player_name"] == player) &
        (active_games["game_date"] > end_date)
    ].sort_values("game_date").head(1)   # only want first game after slump end

    # If there is no next game (e.g., end of season), then skip
    if next_game.empty:
        continue

    recovery_fga = next_game["field_goals_attempted"].iloc[0]

    # 3) % change calculation
    pct_change = (recovery_fga - slump_avg_fga) / slump_avg_fga

    # Assign slump-level shot volume category
    if pct_change > 0:
        category = "Took More Shots"
    elif pct_change < 0:
        category = "Took Less Shots"
    else:
        category = "No Change"
    
    # Record final row
    shot_records.append({
        "player_name": player,
        "slump_id": row["slump_id"],
        "slump_length": row["slump_length"],
        "games_to_recover": games_to_recover,
        "slump_avg_fga": slump_avg_fga,
        "recovery_fga": recovery_fga,
        "pct_change_fga": pct_change,
        "shot_category": category
    })

# Final slump-level shot volume DataFrame
shot_volume_df = pd.DataFrame(shot_records)

  pct_change = (recovery_fga - slump_avg_fga) / slump_avg_fga


In [44]:
# Replace infinite pct_change_fga values with NaN
shot_volume_df["pct_change_fga"].replace([np.inf, -np.inf], np.nan, inplace=True)

# Preview slump-level results
shot_volume_df

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  shot_volume_df["pct_change_fga"].replace([np.inf, -np.inf], np.nan, inplace=True)


Unnamed: 0,player_name,slump_id,slump_length,games_to_recover,slump_avg_fga,recovery_fga,pct_change_fga,shot_category
0,Aaron Wiggins,Aaron Wiggins_3,1,1.0,8.00,10.0,0.250000,Took More Shots
1,Aaron Wiggins,Aaron Wiggins_8,2,2.0,8.50,6.0,-0.294118,Took Less Shots
2,Aaron Wiggins,Aaron Wiggins_11,1,1.0,11.00,5.0,-0.545455,Took Less Shots
3,Aaron Wiggins,Aaron Wiggins_13,4,4.0,7.75,5.0,-0.354839,Took Less Shots
4,Aaron Wiggins,Aaron Wiggins_18,2,2.0,10.50,11.0,0.047619,Took More Shots
...,...,...,...,...,...,...,...,...
1497,Zach LaVine,Zach LaVine_53,1,1.0,16.00,22.0,0.375000,Took More Shots
1498,Zach LaVine,Zach LaVine_60,4,4.0,14.50,18.0,0.241379,Took More Shots
1499,Zach LaVine,Zach LaVine_65,1,1.0,8.00,15.0,0.875000,Took More Shots
1500,Zach LaVine,Zach LaVine_67,1,1.0,13.00,19.0,0.461538,Took More Shots


In [45]:
# Category-level averages (per player and category)
category_stats = (
    shot_volume_df
    .groupby(["player_name", "shot_category"])
    .agg(
        recovered_slump_count=("pct_change_fga", "count"),
        avg_pct_change_fga=("pct_change_fga", "mean"),
        avg_slump_avg_fga=("slump_avg_fga", "mean"),
        avg_recovery_fga=("recovery_fga", "mean")
    ).reset_index()
)
category_stats

Unnamed: 0,player_name,shot_category,recovered_slump_count,avg_pct_change_fga,avg_slump_avg_fga,avg_recovery_fga
0,Aaron Wiggins,No Change,2,0.000000,8.500000,8.500000
1,Aaron Wiggins,Took Less Shots,5,-0.444596,9.250000,5.200000
2,Aaron Wiggins,Took More Shots,6,1.204071,8.000000,17.166667
3,Alex Sarr,No Change,1,0.000000,12.000000,12.000000
4,Alex Sarr,Took Less Shots,5,-0.456517,15.193333,8.200000
...,...,...,...,...,...,...
290,Victor Wembanyama,Took More Shots,3,0.476754,16.055556,21.666667
291,Zaccharie Risacher,Took Less Shots,6,-0.243364,10.979167,8.166667
292,Zaccharie Risacher,Took More Shots,6,1.034517,7.965278,14.833333
293,Zach LaVine,Took Less Shots,5,-0.331292,18.600000,12.200000


#### b) Player-Level Shot Volume Summary

In [47]:
# Player-level summary
player_volume_summary = (
    shot_volume_df.groupby("player_name")
    .agg(
        total_recovered_slumps=("pct_change_fga", "count"),
        avg_pct_change_fga=("pct_change_fga", "mean"),
        median_pct_change_fga=("pct_change_fga", "median"),
        std_pct_change_fga=("pct_change_fga", "std"),
        max_pct_increase=("pct_change_fga", "max"),
        max_pct_decrease=("pct_change_fga", "min"),
        pct_slumps_increase=("pct_change_fga", lambda x: (x > 0).mean()),
        pct_slumps_decrease=("pct_change_fga", lambda x: (x < 0).mean())
    ).reset_index()
)

In [48]:
# Binary classification: "Shoots More" vs "Shoots Less" during recovery games
player_volume_summary["recovery_shot_profile"] = np.where(
    player_volume_summary["avg_pct_change_fga"] > 0,
    "Shoots More",
    np.where(
        player_volume_summary["avg_pct_change_fga"] < 0,
        "Shoots Less",
        "No Change"
    )
)
player_volume_summary.head(10)

Unnamed: 0,player_name,total_recovered_slumps,avg_pct_change_fga,median_pct_change_fga,std_pct_change_fga,max_pct_increase,max_pct_decrease,pct_slumps_increase,pct_slumps_decrease,recovery_shot_profile
0,Aaron Wiggins,13,0.384726,0.0,1.090968,3.090909,-0.6,0.461538,0.384615,Shoots More
1,Alex Sarr,11,-0.147509,0.0,0.320356,0.203704,-0.620253,0.454545,0.454545,Shoots Less
2,Alperen ≈ûeng√ºn,11,0.108717,0.153846,0.482465,1.272727,-0.521739,0.545455,0.454545,Shoots More
3,Amen Thompson,17,0.208433,0.130435,0.532648,1.333333,-0.636364,0.588235,0.294118,Shoots More
4,Andrew Wiggins,11,-0.03881,-0.111111,0.379244,0.625,-0.575758,0.363636,0.545455,Shoots Less
5,Anfernee Simons,10,0.430141,0.297203,0.404232,1.352941,-0.014493,0.9,0.1,Shoots More
6,Anthony Davis,9,0.233582,0.257143,0.335026,0.928571,-0.263158,0.777778,0.111111,Shoots More
7,Anthony Edwards,15,0.156306,0.0,0.589172,1.818182,-0.495798,0.4,0.4,Shoots More
8,Austin Reaves,16,0.066247,-0.062852,0.380389,0.818182,-0.357143,0.3125,0.625,Shoots More
9,Bam Adebayo,11,0.124458,0.076923,0.563983,1.526316,-0.454545,0.545455,0.454545,Shoots More


In [49]:
# Expand to include zero-count categories for each player
categories = ["Took More Shots", "Took Less Shots", "No Change"]

expanded_records = []

for player, group in category_stats.groupby("player_name"):
    total_recovered = group["recovered_slump_count"].sum()

    for cat in categories:
        if cat in group["shot_category"].values:
            row = group[group["shot_category"] == cat].iloc[0]

            expanded_records.append({
                "player_name": player,
                "shot_category": cat,
                "recovered_slump_count": row["recovered_slump_count"],
                "total_recovered_slumps": total_recovered,
                "pct": row["recovered_slump_count"] / total_recovered if total_recovered > 0 else 0,
                "avg_pct_change_fga": row["avg_pct_change_fga"],
                "avg_slump_avg_fga": row["avg_slump_avg_fga"],
                "avg_recovery_fga": row["avg_recovery_fga"]
            })
        else:
            # missing categories -> zeros
            expanded_records.append({
                "player_name": player,
                "shot_category": cat,
                "recovered_slump_count": 0,
                "total_recovered_slumps": total_recovered,
                "pct": 0,
                "avg_pct_change_fga": 0,
                "avg_slump_avg_fga": 0,
                "avg_recovery_fga": 0
            })

player_shot_profile = pd.DataFrame(expanded_records)
player_shot_profile

Unnamed: 0,player_name,shot_category,recovered_slump_count,total_recovered_slumps,pct,avg_pct_change_fga,avg_slump_avg_fga,avg_recovery_fga
0,Aaron Wiggins,Took More Shots,6,13,0.461538,1.204071,8.000000,17.166667
1,Aaron Wiggins,Took Less Shots,5,13,0.384615,-0.444596,9.250000,5.200000
2,Aaron Wiggins,No Change,2,13,0.153846,0.000000,8.500000,8.500000
3,Alex Sarr,Took More Shots,5,11,0.454545,0.131998,10.926667,12.400000
4,Alex Sarr,Took Less Shots,5,11,0.454545,-0.456517,15.193333,8.200000
...,...,...,...,...,...,...,...,...
346,Zaccharie Risacher,Took Less Shots,6,12,0.500000,-0.243364,10.979167,8.166667
347,Zaccharie Risacher,No Change,0,12,0.000000,0.000000,0.000000,0.000000
348,Zach LaVine,Took More Shots,12,17,0.705882,0.697475,12.187500,19.916667
349,Zach LaVine,Took Less Shots,5,17,0.294118,-0.331292,18.600000,12.200000


#### c) League-Level Shot Volume Summary

In [51]:
# League-level summary
league_volume_summary = pd.DataFrame({
    "total_recovered_slumps": [len(shot_volume_df)],
    "avg_pct_change_fga_league": [shot_volume_df["pct_change_fga"].mean()],
    "median_pct_change_fga_league": [shot_volume_df["pct_change_fga"].median()],
    "pct_increase_league": [(shot_volume_df["pct_change_fga"] > 0).mean()],
    "pct_decrease_league": [(shot_volume_df["pct_change_fga"] < 0).mean()],
    "max_pct_increase_league": [shot_volume_df["pct_change_fga"].max()],
    "max_pct_decrease_league": [shot_volume_df["pct_change_fga"].min()]
})

In [52]:
# Binary classification: "Shoots More" vs "Shoots Less" during recovery games
league_volume_summary["league_recovery_shot_profile"] = np.where(
    league_volume_summary["avg_pct_change_fga_league"] > 0,
    "Shoots More",
    "Shoots Less"
)
league_volume_summary

Unnamed: 0,total_recovered_slumps,avg_pct_change_fga_league,median_pct_change_fga_league,pct_increase_league,pct_decrease_league,max_pct_increase_league,max_pct_decrease_league,league_recovery_shot_profile
0,1502,0.164256,0.034483,0.506658,0.432756,5.0,-0.928571,Shoots More


In [53]:
# --- League-level recovery shooting profile ---

# Recovered slumps only
slump_recovered = shot_volume_df[shot_volume_df["games_to_recover"].notna()].copy()

# Remove any slumps where slump_avg_fga = 0
slump_recovered = slump_recovered[slump_recovered["slump_avg_fga"] > 0]

# Masks
shoot_more = slump_recovered["pct_change_fga"] > 0
shoot_less = slump_recovered["pct_change_fga"] < 0

# Main metrics
total_recovered = len(slump_recovered)
pct_shoot_more = shoot_more.mean()
pct_shoot_less = shoot_less.mean()
pct_same       = (slump_recovered["pct_change_fga"] == 0).mean()

# Average % FGA change
avg_increase = slump_recovered.loc[shoot_more, "pct_change_fga"].mean()
avg_decrease = slump_recovered.loc[shoot_less, "pct_change_fga"].mean()

# Shoots More
avg_fga_slump_more    = slump_recovered.loc[shoot_more, "slump_avg_fga"].mean()
avg_fga_recovery_more = slump_recovered.loc[shoot_more, "recovery_fga"].mean()

# Shoots Less
avg_fga_slump_less    = slump_recovered.loc[shoot_less, "slump_avg_fga"].mean()
avg_fga_recovery_less = slump_recovered.loc[shoot_less, "recovery_fga"].mean()


# Combine into DataFrame
league_shot_profile = pd.DataFrame({
    "total_recovered_slumps": [total_recovered],
    "pct_shoot_more": [pct_shoot_more],
    "pct_shoot_less": [pct_shoot_less],
    "pct_same_volume": [pct_same],
    "avg_pct_increase_fga": [avg_increase],
    "avg_pct_decrease_fga": [avg_decrease],
    "avg_fga_slump_more": [avg_fga_slump_more],
    "avg_fga_recovery_more": [avg_fga_recovery_more],
    "avg_fga_slump_less": [avg_fga_slump_less],
    "avg_fga_recovery_less": [avg_fga_recovery_less]
})
league_shot_profile

Unnamed: 0,total_recovered_slumps,pct_shoot_more,pct_shoot_less,pct_same_volume,avg_pct_increase_fga,avg_pct_decrease_fga,avg_fga_slump_more,avg_fga_recovery_more,avg_fga_slump_less,avg_fga_recovery_less
0,1501,0.506995,0.433045,0.05996,0.551794,-0.266719,11.332644,16.045992,15.377579,11.286154


In [54]:
# 1) Total slumps
total_slumps = len(slump_df)
print(f"Total slumps: {total_slumps:,}")

Total slumps: 1,567


In [55]:
# 2) Average games_to_recover (only recovered slumps)
recovered_slumps = slump_df[slump_df["recovered"] == True].copy()
avg_games_to_recover = recovered_slumps["games_to_recover"].mean()
print(f"Average Games to Recover: {avg_games_to_recover:.2f} games")

Average Games to Recover: 2.02 games


In [56]:
# 3) --- Average TS% during slumps ---

slump_ts_values = []

for _, row in recovered_slumps.iterrows():
    player = row["player_name"]
    start  = row["start_date"]
    end    = row["end_date"]

    # Games during slump window
    mask = (
        (active_games["player_name"] == player) &
        (active_games["game_date"] >= start) &
        (active_games["game_date"] <= end)
    )

    # Drop NaNs
    ts_slice = active_games.loc[mask, "ts_pct"].dropna()
    slump_ts_values.extend(ts_slice.tolist())

In [57]:
# Calculate average TS% during slumps
avg_ts_slump = np.mean(slump_ts_values)
print(f"Average TS% during slumps: {avg_ts_slump*100:.1f}%")

Average TS% during slumps: 43.0%


In [58]:
# 4) --- Average TS% during recovery games ---

recovery_ts_values = []

for _, row in recovered_slumps.iterrows():
    player = row["player_name"]
    slump_end = row["end_date"]

    # Find recovery game - the next game after slump_end
    df_p = active_games[active_games["player_name"] == player].sort_values("game_date")

    # Find index of the slump_end row
    idx = df_p.index[df_p["game_date"] == slump_end]

    if len(idx) == 0:
        continue

    idx = idx[0]
    recovery_idx = idx + 1

    # End of season defensive guard
    if recovery_idx in df_p.index:
        recovery_ts = df_p.loc[recovery_idx, "ts_pct"]
        if pd.notna(recovery_ts):
            recovery_ts_values.append(recovery_ts)

In [59]:
# Calculate average TS% during recovery games
avg_ts_recovery = np.mean(recovery_ts_values)
print(f"Average TS% during recovery games: {avg_ts_recovery*100:.1f}%")

Average TS% during recovery games: 71.3%


In [60]:
# Combine into DataFrame
league_kpi_summary = pd.DataFrame({
    "total_slumps": [total_slumps],
    "avg_games_to_recover": [avg_games_to_recover],
    "avg_ts_slump": [avg_ts_slump],
    "avg_ts_recovery": [avg_ts_recovery]
})
league_kpi_summary

Unnamed: 0,total_slumps,avg_games_to_recover,avg_ts_slump,avg_ts_recovery
0,1567,2.023968,0.430454,0.712818


___
## 7) Player-Level TS%: Slumps vs Recoveries 

In [62]:
# Input data
slumps_all = slump_df.copy()
recovered_slumps = slump_df[slump_df["recovered"] == True].copy()

In [63]:
# Player-level TS% (during slumps)
player_slump_records = []

for player, p_slumps in recovered_slumps.groupby("player_name"):

    total_pts = 0
    total_fga = 0
    total_fta = 0

    for _, row in p_slumps.iterrows():
        start = row["start_date"]
        end = row["end_date"]

        mask = (
            (active_games["player_name"] == player) &
            (active_games["game_date"] >= start) &
            (active_games["game_date"] <= end)
        )

        total_pts += active_games.loc[mask, "points"].sum()
        total_fga += active_games.loc[mask, "field_goals_attempted"].sum()
        total_fta += active_games.loc[mask, "free_throws_attempted"].sum()

    # TS% calculation
    denom = (total_fga + 0.44 * total_fta)
    slump_ts = total_pts / (2 * denom) if denom > 0 else None

    player_slump_records.append({
        "player_name": player,
        "slump_pts": total_pts,
        "slump_fga": total_fga,
        "slump_fta": total_fta,
        "slump_ts": slump_ts
    })

player_slump_ts = pd.DataFrame(player_slump_records)

In [64]:
# Player-level TS% (during recovery games)
player_recovery_records = []

for player, p_slumps in recovered_slumps.groupby("player_name"):

    total_pts = 0
    total_fga = 0
    total_fta = 0
    
    for _, row in p_slumps.iterrows():
        end = row["end_date"]

        next_game = (
            active_games[
                (active_games["player_name"] == player) &
                (active_games["game_date"] > end)
            ]
            .sort_values("game_date")
            .head(1)
        )

        if not next_game.empty:
            total_pts += next_game["points"].iloc[0]
            total_fga += next_game["field_goals_attempted"].iloc[0]
            total_fta += next_game["free_throws_attempted"].iloc[0]

    denom = (total_fga + 0.44 * total_fta)
    recovery_ts = total_pts / (2 * denom) if denom > 0 else None
    
    player_recovery_records.append({
        "player_name": player,
        "recovery_pts": total_pts,
        "recovery_fga": total_fga,
        "recovery_fta": total_fta,
        "recovery_ts": recovery_ts
    })

player_recovery_ts = pd.DataFrame(player_recovery_records)

In [65]:
# Merge into DataFrame
player_ts_summary = (
    player_slump_ts
    .merge(player_recovery_ts, on="player_name", how="outer")
)
player_ts_summary.head()

Unnamed: 0,player_name,slump_pts,slump_fga,slump_fta,slump_ts,recovery_pts,recovery_fga,recovery_fta,recovery_ts
0,Aaron Wiggins,176.0,214.0,12.0,0.401313,214.0,146.0,21.0,0.689255
1,Alex Sarr,327.0,400.0,77.0,0.376832,181.0,115.0,20.0,0.731018
2,Alperen ≈ûeng√ºn,565.0,524.0,163.0,0.474216,295.0,177.0,103.0,0.663458
3,Amen Thompson,230.0,225.0,71.0,0.448798,292.0,175.0,67.0,0.714006
4,Andrew Wiggins,350.0,357.0,84.0,0.444208,231.0,135.0,68.0,0.70034


___
## Tableau Edits

In [67]:
# Update teams for traded players (on "average games to recover" leaderboard only)
team_updates = {
    "Luka Donƒçiƒá": "DAL ‚Üí LAL",
    "Zach LaVine": "CHI ‚Üí SAC",
    "Quentin Grimes": "DAL ‚Üí PHI",
    "De'Andre Hunter": "ATL ‚Üí CLE",
    "Jimmy Butler": "MIA ‚Üí GSW",
    "Jonas Valanƒçi≈´nas": "WAS ‚Üí SAC"
}

# Apply updates
active_games["team"] = active_games.apply(
    lambda row: team_updates[row["player_name"]]
                if row["player_name"] in team_updates
                else row["team"],
    axis=1
)

___
## Save

In [69]:
# --- Save 11 dfs to CSV ---

# Game logs
active_games.to_csv("active_game_logs.csv", index=False)

# Next game recovery (step 4)
player_next_game_recovery.to_csv("player_next_game_recovery.csv", index=False)
league_next_game_recovery.to_csv("league_next_game_recovery.csv", index=False)

# Slumps (step 5)
slump_df.to_csv("all_slumps.csv", index=False)
recovered_slumps.to_csv("recovered_slumps.csv", index=False)
player_slump_summary.to_csv("player_slump_summary.csv", index=False)
league_slump_summary.to_csv("league_slump_summary.csv", index=False)

# Shot volume adjustment (step 6)
shot_volume_df.to_csv("shot_volume_adjustment.csv", index=False)
player_shot_profile.to_csv("player_shot_profile.csv", index=False)
league_shot_profile.to_csv("league_shot_profile.csv", index=False)

# TS%: slumps vs recoveries (step 7)
player_ts_summary.to_csv("player_slump_vs_recovery_ts.csv", index=False)