# Q5: Predicting Shooting Percentages (FG%, 3P%, FT%)

This notebook evaluates which factors best predict a player’s shooting efficiency across the season.

We model:

- Field goal percentage (FG%)
- Three-point percentage (3P%)
- Free throw percentage (FT%)

Import data, then build player-season table with weighted shooting %. My method of analysis is thinking that FG% should be the sum of FG/sum of FGA, not the average of FG%. 

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("data/player_game_stats_clean.csv")

# Per-game rate features
df = df.copy()
df["FGA_safe"] = df["FGA"].replace(0, np.nan)
df["3PA_safe"] = df["3PA"].replace(0, np.nan)
df["FTA_safe"] = df["FTA"].replace(0, np.nan)

# Shot profile features
df["3PA_share"] = df["3PA"] / df["FGA_safe"] 
df["FT_rate"] = df["FTA"] / df["FGA_safe"] 
df["PTS_per_min"] = df["PTS"] / df["MP"].clip(lower=1)

# Aggregate per player-season
g = df.groupby(["Player", "Tm"], dropna=False)

player_season = g.agg(
    GamesPlayed=("PTS", "count"),
    MP_mean=("MP", "mean"),
    PTS_mean=("PTS", "mean"),
    TRB_mean=("TRB", "mean"),
    AST_mean=("AST", "mean"),
    TOV_mean=("TOV", "mean"),
    STL_mean=("STL", "mean"),
    BLK_mean=("BLK", "mean"),
    PF_mean=("PF", "mean"),
    GmSc_mean=("GmSc", "mean"),
    FGA_sum=("FGA", "sum"),
    FG_sum=("FG", "sum"),
    _3PA_sum=("3PA", "sum"),
    _3P_sum=("3P", "sum"),
    FTA_sum=("FTA", "sum"),
    FT_sum=("FT", "sum"),
    threePA_share_mean=("3PA_share", "mean"),
    FT_rate_mean=("FT_rate", "mean"),
    PTS_per_min_mean=("PTS_per_min", "mean"),).reset_index()

# Attempt-weighted shooting percentages at the season level
player_season["FG%_w"] = player_season["FG_sum"] / player_season["FGA_sum"].replace(0, np.nan)
player_season["3P%_w"] = player_season["_3P_sum"] / player_season["_3PA_sum"].replace(0, np.nan)
player_season["FT%_w"] = player_season["FT_sum"] / player_season["FTA_sum"].replace(0, np.nan)

player_season.head()

Unnamed: 0,Player,Tm,GamesPlayed,MP_mean,PTS_mean,TRB_mean,AST_mean,TOV_mean,STL_mean,BLK_mean,...,_3PA_sum,_3P_sum,FTA_sum,FT_sum,threePA_share_mean,FT_rate_mean,PTS_per_min_mean,FG%_w,3P%_w,FT%_w
0,A.J. Green,MIL,44,21.9975,7.659091,2.25,1.272727,0.613636,0.545455,0.113636,...,222,96,15,13,0.830751,0.061071,0.343601,0.44186,0.432432,0.866667
1,A.J. Lawson,TOR,4,3.7575,2.75,0.75,0.0,0.0,0.0,0.0,...,5,2,3,1,0.583333,0.416667,0.805603,0.5,0.4,0.333333
2,AJ Johnson,MIL,8,5.67125,2.5,1.0,0.875,0.5,0.125,0.0,...,5,3,2,1,0.095238,0.02381,0.386993,0.380952,0.6,0.5
3,AJ Johnson,WAS,1,8.83,2.0,1.0,4.0,0.0,0.0,0.0,...,2,0,0,0,0.5,0.0,0.226501,0.25,0.0,
4,Aaron Gordon,DEN,30,26.666,12.333333,4.733333,3.066667,1.333333,0.466667,0.266667,...,84,34,97,76,0.355436,0.405371,0.469476,0.509804,0.404762,0.783505
