# NBA 2024-25: Slump Shots & Recovery Shots
## Notebook 04: Shot-Level Analysis
This notebook analyzes how shot selection changes between slump and recovery phases.

Key analysis questions include:
- How does average shot distance shift from slump to recovery?
- Which shot zones are most common during each phase?
- Which players show the largest changes in shot distance when recovering?

In [2]:
# Import libraries
import pandas as pd
import numpy as np

In [3]:
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [4]:
# Display options
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 100)
pd.set_option("display.width", 160)
pd.set_option("display.max_colwidth", None)
pd.set_option("display.float_format", lambda x: f"{x:.2f}")

---
## Load Data

In [6]:
# Load final shot-level data from 2024-25 regular season
shots = pd.read_parquet(r"...\03_python_outputs\nba_2024_25_shot_level_data_final.parquet")

In [7]:
# Inspect columns
shots.columns

Index(['GAME_ID', 'GAME_EVENT_ID', 'PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_NAME', 'PERIOD', 'MINUTES_REMAINING', 'SECONDS_REMAINING', 'ACTION_TYPE',
       'SHOT_TYPE', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_DISTANCE', 'LOC_X', 'LOC_Y', 'SHOT_MADE_FLAG', 'GAME_DATE', 'HTM', 'VTM',
       'calc_dist', 'shot_idx', 'phase'],
      dtype='object')

In [8]:
# Confirm: number of unique players
shots["PLAYER_NAME"].nunique()

566

---
## 1) Average Shot Distance
#### Neutral

In [10]:
# Filter to only shots taken during neutral phase ("neutral shots")
neutral_shots = shots[shots["phase"] == "neutral"]
print(f"{len(neutral_shots):,} total neutral shots")

169,949 total neutral shots


In [11]:
# Calculate average shot distance for "neutral shots"
avg_shot_distance_neutral = neutral_shots["SHOT_DISTANCE"].mean()
print(f"Average Neutral Shot Distance: {avg_shot_distance_neutral.round(2)} ft")

Average Neutral Shot Distance: 13.86 ft


#### Slumps

In [13]:
# Filter to only shots taken during slumps ("slump shots")
slump_shots = shots[shots["phase"] == "slump"]
print(f"{len(slump_shots):,} total slump shots")

43,180 total slump shots


In [14]:
# Calculate average shot distance for "slump shots"
avg_shot_distance_slump = slump_shots["SHOT_DISTANCE"].mean()
print(f"Average Slump Shot Distance: {avg_shot_distance_slump.round(2)} ft")

Average Slump Shot Distance: 14.93 ft


#### Recoveries

In [16]:
# Filter to only shots taken during recoveries ("recovery shots")
recovery_shots = shots[shots["phase"] == "recovered"]
print(f"{len(recovery_shots):,} total recovery shots")

6,398 total recovery shots


In [17]:
# Calculate average shot distance for "recovery shots"
avg_shot_distance_recovery = recovery_shots["SHOT_DISTANCE"].mean()
print(f"Average Recovery Shot Distance: {avg_shot_distance_recovery.round(2)} ft")

Average Recovery Shot Distance: 11.51 ft


### Player-Level Summary

In [19]:
# Filter for only players with at least 300 made field goals for the season
eligible_players = shots.groupby("PLAYER_NAME")["SHOT_MADE_FLAG"].sum().loc[lambda x: x >= 300].index

# Filter for only slumps and recoveries (no neutral rows)
player_phase_shots = shots[
    shots["phase"].isin(["slump", "recovered"]) &
    shots["PLAYER_NAME"].isin(eligible_players)
].copy()

In [20]:
# Check: how many players are left after these filters?
player_phase_shots["PLAYER_NAME"].nunique()

117

In [21]:
# Compute player-level shot attempt counts
player_phase_counts = (
    player_phase_shots.groupby(["PLAYER_NAME", "phase"]).size().unstack(fill_value=0).rename(columns={
        "slump": "slump_shots",
        "recovered": "recovered_shots"
    })
)

In [22]:
# Compute player-level average shot distances and locations
player_phase_averages = (
    player_phase_shots.groupby(["PLAYER_NAME", "phase"]).agg(
        avg_shot_distance=("SHOT_DISTANCE", "mean"),
        avg_loc_x=("LOC_X", "mean"),
        avg_loc_y=("LOC_Y", "mean")
    ).unstack()
)

In [23]:
# Flatten `player_phase_averages` columns
player_phase_averages.columns = [
    f"{stat}_{phase}"
    for stat, phase in player_phase_averages.columns
]

In [24]:
# Merge into one DataFrame
player_shot_summary = player_phase_counts.merge(
    player_phase_averages,
    left_index=True,
    right_index=True,
    how="left"
).reset_index()

In [25]:
# Calculate shot distance change for every player
player_shot_summary["avg_shot_distance_change"] = player_shot_summary["avg_shot_distance_recovered"] - player_shot_summary["avg_shot_distance_slump"]

In [26]:
# Shot distance change stats
player_shot_summary["avg_shot_distance_change"].describe()

count   117.00
mean     -3.27
std       2.23
min     -11.49
25%      -4.48
50%      -3.20
75%      -1.90
max       2.03
Name: avg_shot_distance_change, dtype: float64

In [27]:
# View top players by largest shot distance change (closer to basket)
player_shot_summary[["PLAYER_NAME", "avg_shot_distance_slump", "avg_shot_distance_recovered", "avg_shot_distance_change"]].sort_values("avg_shot_distance_change", ascending=True).head(10)

Unnamed: 0,PLAYER_NAME,avg_shot_distance_slump,avg_shot_distance_recovered,avg_shot_distance_change
88,Obi Toppin,15.07,3.58,-11.49
64,Keldon Johnson,13.0,3.21,-9.79
77,Michael Porter Jr.,15.19,6.56,-8.63
78,Mikal Bridges,16.5,8.89,-7.62
59,Josh Hart,11.68,4.26,-7.42
66,Keon Johnson,15.53,8.28,-7.26
91,Paolo Banchero,13.97,6.84,-7.13
18,Christian Braun,10.19,3.47,-6.73
81,Naji Marshall,12.82,6.5,-6.32
108,Trey Murphy III,15.71,9.61,-6.1


In [28]:
# View top players by largest shot distance change (further from basket)
player_shot_summary[["PLAYER_NAME", "avg_shot_distance_slump", "avg_shot_distance_recovered", "avg_shot_distance_change"]].sort_values("avg_shot_distance_change", ascending=False).head(5)

Unnamed: 0,PLAYER_NAME,avg_shot_distance_slump,avg_shot_distance_recovered,avg_shot_distance_change
12,Brook Lopez,14.75,16.77,2.03
106,Toumani Camara,14.52,16.13,1.62
48,Jalen Williams,11.25,12.36,1.11
46,Jalen Duren,2.19,3.0,0.81
74,Luka Dončić,15.14,15.56,0.41


> This confirms that getting closer to the basket is an active recovery strategy. Getting further from the basket is not the norm.

---
## 2) Most Common Shot Zones
#### Slumps

In [31]:
# Find most common shot zones for slump shots
top_slump_zones = slump_shots["SHOT_ZONE_BASIC"].value_counts().reset_index()

# Two columns
top_slump_zones.columns = ["shot_zone", "shot_count"]

# Add % column
top_slump_zones["pct"] = top_slump_zones["shot_count"] / top_slump_zones["shot_count"].sum() * 100

# View results
top_slump_zones

Unnamed: 0,shot_zone,shot_count,pct
0,Above the Break 3,14425,33.41
1,Restricted Area,10168,23.55
2,In The Paint (Non-RA),9116,21.11
3,Mid-Range,4324,10.01
4,Left Corner 3,2624,6.08
5,Right Corner 3,2364,5.47
6,Backcourt,159,0.37


#### Recoveries

In [33]:
# Find most common shot zones for recovery shots
top_recovery_zones = recovery_shots["SHOT_ZONE_BASIC"].value_counts().reset_index()

# Two columns
top_recovery_zones.columns = ["shot_zone", "shot_count"]

# Add % column
top_recovery_zones["pct"] = top_recovery_zones["shot_count"] / top_recovery_zones["shot_count"].sum() * 100

# View results
top_recovery_zones

Unnamed: 0,shot_zone,shot_count,pct
0,Restricted Area,2440,38.14
1,Above the Break 3,1582,24.73
2,In The Paint (Non-RA),1284,20.07
3,Mid-Range,613,9.58
4,Left Corner 3,250,3.91
5,Right Corner 3,229,3.58


### Player-Level Summary
#### Slumps

In [35]:
# Filter for only players with 300 made field goals for the season
eligible_players = shots.groupby("PLAYER_NAME")["SHOT_MADE_FLAG"].sum().loc[lambda x: x >= 300].index

# Filter for only slumps and recoveries (no neutral rows)
player_phase_shots = shots[
    shots["phase"].isin(["slump", "recovered"]) &
    shots["PLAYER_NAME"].isin(eligible_players)
].copy()

In [36]:
# Split slump shots and recovery shots
shot_zone_slump_shots = player_phase_shots[player_phase_shots["phase"] == "slump"]
shot_zone_recovery_shots = player_phase_shots[player_phase_shots["phase"] == "recovered"]

In [37]:
# Find most common shot zone for slump shots (per player)
player_slump_zone = (
    shot_zone_slump_shots.groupby(["PLAYER_NAME", "SHOT_ZONE_BASIC"]).size().reset_index(name="slump_shot_count")
    .sort_values(["PLAYER_NAME", "slump_shot_count"], ascending=[True, False]).drop_duplicates("PLAYER_NAME")
    .rename(columns={"SHOT_ZONE_BASIC": "most_common_slump_shot_zone"})
)

# Preview results
player_slump_zone.head(5)

Unnamed: 0,PLAYER_NAME,most_common_slump_shot_zone,slump_shot_count
4,Aaron Wiggins,Restricted Area,37
7,Alex Sarr,In The Paint (Non-RA),82
13,Alperen Sengun,In The Paint (Non-RA),93
20,Amen Thompson,Restricted Area,32
22,Andrew Wiggins,Above the Break 3,72


In [38]:
# Find most common shot zone for recovery shots (per player)
player_recovery_zone = (
    shot_zone_recovery_shots.groupby(["PLAYER_NAME", "SHOT_ZONE_BASIC"]).size().reset_index(name="recovery_shot_count")
    .sort_values(["PLAYER_NAME", "recovery_shot_count"], ascending=[True, False]).drop_duplicates("PLAYER_NAME")
    .rename(columns={"SHOT_ZONE_BASIC": "most_common_recovery_shot_zone"})
)

# Preview results
player_recovery_zone.head(5)

Unnamed: 0,PLAYER_NAME,most_common_recovery_shot_zone,recovery_shot_count
4,Aaron Wiggins,Restricted Area,9
10,Alex Sarr,Restricted Area,14
13,Alperen Sengun,Restricted Area,24
15,Amen Thompson,Restricted Area,8
19,Andrew Wiggins,Restricted Area,10


In [39]:
# Merge these two DataFrames into `player_shot_summary`
player_shot_summary = (
    player_shot_summary
        .merge(player_slump_zone, on="PLAYER_NAME", how="left")
        .merge(player_recovery_zone, on="PLAYER_NAME", how="left")
)

---
## Save

In [41]:
# Confirm: `player_shot_summary` columns
player_shot_summary.columns

Index(['PLAYER_NAME', 'recovered_shots', 'slump_shots', 'avg_shot_distance_recovered', 'avg_shot_distance_slump', 'avg_loc_x_recovered', 'avg_loc_x_slump',
       'avg_loc_y_recovered', 'avg_loc_y_slump', 'avg_shot_distance_change', 'most_common_slump_shot_zone', 'slump_shot_count',
       'most_common_recovery_shot_zone', 'recovery_shot_count'],
      dtype='object')

In [42]:
# Save to CSV (marts for Tableau)
player_shot_summary.to_csv("player_shot_summary_final.csv", index=False)
player_phase_shots.to_csv("all_player_shots_slump_or_recovery.csv", index=False)
top_slump_zones.to_csv("population_slump_shot_zones.csv", index=False)
top_recovery_zones.to_csv("population_recovery_shot_zones.csv", index=False)