In [None]:
import jupyter_black

jupyter_black.load()
import pandas as pd
import nfl_data_py as nfl
from src.visualisation.visualisation import plot_bar, plot_scatter
from src.utils.methods import flatten_grouped_cols

pd.set_option("display.max_columns", None)

## Load Data

In [None]:
pbp: pd.DataFrame = nfl.import_pbp_data([2023])

In [None]:
# offense_players - list of id's
id_df = nfl.import_ids()
id_df.sample(4)

In [None]:
wp_alpha = 0.05
pbp_pass = (
    pbp.query("week<=18")
    .query('play_type=="pass"')
    .query(f"wp >= {wp_alpha} and def_wp >={wp_alpha}")
)
# certain plays are marked as na's for pressure
pbp_pass["was_pressure"] = pbp_pass["was_pressure"].fillna(0)
pbp_pass.loc[pbp_pass["sack"] == 1, "was_pressure"] = 1
pbp_pass

## Pressures and Sacks

Who was able to get to the QB?

In [None]:
method = "sum"
# use the 'was_pressure' and 'sack'
pbp_pass_rush = (
    pbp_pass.groupby(["defteam"])
    .agg(
        {
            "was_pressure": ["sum", "mean"],
            "qb_hit": ["sum", "mean"],
            "sack": ["sum", "mean"],
        }
    )
    .round(3)
)
pbp_pass_rush.columns = flatten_grouped_cols(pbp_pass_rush.columns)
pbp_pass_rush = (
    pbp_pass_rush.reset_index()
    .rename(
        columns={
            "defteam": "team",
            "was_pressure_sum": "pressures",
            "was_pressure_mean": "pressure_rate",
            "qb_hit_sum": "qb_hits",
            "qb_hit_mean": "qb_hit_rate",
            "sack_sum": "sacks",
            "sack_mean": "sack_rate",
        }
    )
    .sort_values("pressure_rate", ascending=False)
)
pbp_pass_rush

In [None]:
plot_scatter(pbp_pass_rush, x="pressures", y="sacks")

In [None]:
plot_scatter(pbp_pass_rush, x="pressure_rate", y="sack_rate")

## How do QB's do when pressured vs not

In [None]:
pbp_pass_qb = (
    pbp_pass.groupby(["passer_id", "passer_player_name", "posteam"])
    .agg(
        {
            "pass_attempt": ["sum"],
            "was_pressure": ["sum", "mean"],
            # "qb_hit": ["sum", "mean"],
            "sack": ["sum", "mean"],
            "epa": ["sum", "mean"],
        }
    )
    .round(3)
)
pbp_pass_qb.columns = [
    col.replace("was_", "").replace("_mean", "_rate").replace("_sum", "s")
    for col in flatten_grouped_cols(pbp_pass_qb.columns)
]
pbp_pass_qb = pbp_pass_qb.reset_index()
pbp_pass_qb = pbp_pass_qb.rename(
    columns={
        "passer_player_name": "player",
        "posteam": "team",
        "epas": "total_epa",
        "epa_rate": "epa_per_play",
    }
).query("pass_attempts > 75")
pbp_pass_qb["pressure_to_sack_rate"] = (
    pbp_pass_qb["sack_rate"] / pbp_pass_qb["pressure_rate"]
)
pbp_pass_qb.sort_values("pressure_to_sack_rate")

In [None]:
custom_style = {
    "axes.grid": False,  # Disable gridlines
    # "axes.titlesize": "x-large",  # Decent sized titles
    # "axes.labelsize": "large",  # Larger labels
    # "lines.linewidth": 2,
    "lines.markersize": 6,
    "xtick.labelsize": "medium",  # Slightly larger tick labels
    "ytick.labelsize": "medium",
    "axes.titlepad": 20,
    # "figure.facecolor": "lightgray",  # Change figure background color
}

In [None]:
plot_scatter(
    pbp_pass_qb.sort_values("pressure_to_sack_rate").head(45),
    x="pressure_to_sack_rate",
    y="epa_per_play",
    marker="player",
    add_marker_label=True,
    alpha=0.8,
    title="2023 QB EPA per Play vs Pressure to Sack Rate (min 75 attempts)",
    flip_x=True,
    custom_style=custom_style,
)

In [None]:
pbp_pass.time_to_throw.describe()

In [None]:
import matplotlib.pyplot as plt

print(plt.style.available)

In [None]:
[col for col in pbp_pass.columns if "time" in col]

In [None]:
# pbp_pass[pbp_pass["was_pressure"].isna()][["desc", "was_pressure", "sack"]].head()
pbp_pass[(pbp_pass["was_pressure"].isna()) & (pbp_pass["sack"] < 1)].sample(4)
# ["desc", "was_pressure", "sack"]
# ["desc"]

# pbp_pass[pbp_pass["was_pressure"].isna()]["sack"].value_counts()
pbp_pass[pbp_pass["sack"] == 1]["was_pressure"].value_counts()