In [None]:
import jupyter_black

jupyter_black.load()

import pandas as pd
import nfl_data_py as nfl
from typing import Literal, Tuple
from src.modelling.metrics import success_rate_lambda
from src.visualisation.visualisation import plot_team_scatter

pd.set_option("display.max_columns", None)

In [None]:
pbp = nfl.import_pbp_data(range(2022, 2024))

# Rushing EPA

In [None]:
pbp_r = pbp.query('play_type=="run"')
len(pbp_r)

In [None]:
# filter for neutral game states
neutral_wp_threshold = 0.25

pbp_r_neu = pbp_r.query(
    f"wp>={neutral_wp_threshold} and def_wp>={neutral_wp_threshold}"
)
len(pbp_r_neu)

In [None]:
off_epa_df = (
    pbp_r_neu.groupby(["season", "posteam"])
    .agg({"epa": "mean"})
    .sort_values(by="epa", ascending=False)
)
off_epa_df["epa_rank"] = off_epa_df["epa"].rank(ascending=False).astype(int)
off_epa_df["epa_percentile"] = off_epa_df["epa"].rank(pct=True).round(2) * 10
off_epa_df.xs(2023, level="season")

In [None]:
pbp_r_neu.groupby(["season", "posteam"]).agg({"epa": "mean"}).sort_values(
    by="epa", ascending=False
).rename(columns={"epa": "epa_per_rush"})

In [None]:
def calculate_epa_metrics(
    data: pd.DataFrame, team: Literal["posteam", "defteam"] = "posteam"
) -> pd.DataFrame:
    sort_ascending = team == "defteam"
    df = data.copy()
    df["success"] = df["epa"].apply(success_rate_lambda)
    epa_df = (
        df.groupby(team)
        .agg({team: "count", "epa": "mean", "success": "mean"})
        .sort_values(by="epa", ascending=False)
        .rename(columns={team: "n", "success": "success_rate"})
    )
    for col in ["epa", "success_rate"]:
        epa_df[f"{col}_rank"] = epa_df[col].rank(ascending=sort_ascending).astype(int)
        epa_df[f"{col}_percentile"] = (
            epa_df[col].rank(ascending=(not sort_ascending), pct=True).round(2) * 10
        )
    epa_df["epa"] = epa_df["epa"].round(3)
    epa_df["success_rate"] = epa_df["success_rate"].round(2)
    col_list = list(epa_df.columns)
    col_list.remove("n")
    col_list = list(sorted(col_list))
    col_list.insert(0, "n")
    return epa_df[col_list].sort_values("epa_rank")


def dual_epa_metrics(data: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
    return calculate_epa_metrics(data), calculate_epa_metrics(data, "defteam")

In [None]:
off_rush_epa_df, def_rush_epa_df = dual_epa_metrics(pbp_r_neu.query("season==2023"))

In [None]:
off_rush_epa_df

In [None]:
plot_team_scatter(off_rush_epa_df, "success_rate", "epa", alpha=0.9)

In [None]:
def_rush_epa_df

In [None]:
plot_team_scatter(def_rush_epa_df, "success_rate", "epa", flip_def=True, alpha=0.9)