In [None]:
import jupyter_black

jupyter_black.load()
import pandas as pd
import numpy as np
import nfl_data_py as nfl
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnnotationBbox
import matplotlib.ticker as mtick
import matplotlib.image as mpimg


import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.linear_model import LogisticRegression


from src.visualisation.visualisation import plot_bar, plot_scatter
from src.utils.data import flatten_grouped_cols
from src.visualisation.colors import team_unique_colors, team_unique_alt_colors
from src.utils.logos import get_team_logo
from src.utils.data import coach_lambda

pd.set_option("display.max_columns", None)

In [None]:
pbp: pd.DataFrame = nfl.import_pbp_data([2021, 2022, 2023])

In [None]:
play_types = ["pass", "run", "punt", "field_goal"]

pbp_play = pbp.dropna(subset=["down", "play_type"]).query("play_type in @play_types")
pbp_play["coach"] = pbp_play.apply(
    coach_lambda,
    axis=1,
)

In [None]:
pbp_play["play_type"].value_counts()

In [None]:
pass_attempt_logit = smf.logit(
    # "pass_attempt ~ yardline_100 + quarter_seconds_remaining + half_seconds_remaining + game_seconds_remaining + qtr + down + goal_to_go + ydstogo + posteam_timeouts_remaining + posteam_score + score_differential",
    "pass_attempt ~ yardline_100 + half_seconds_remaining + game_seconds_remaining + qtr + down + goal_to_go + ydstogo + posteam_timeouts_remaining + score_differential",
    pbp_play,
)
pass_attempt_result = pass_attempt_logit.fit()
print(pass_attempt_result.summary())

In [None]:
# Step 1: Generate Predictions
pbp_play["pass_attempt_prob"] = pass_attempt_result.predict(pbp_play)

# Step 2: Calculate Pass Rate Over Expectation
pbp_play["proe"] = pbp_play["pass_attempt"] - pbp_play["pass_attempt_prob"]
# pbp_play.sample(8)
# # Inspect the resulting DataFrame
# print(pbp_play[['pass_attempt', 'pass_attempt_prob', 'pass_rate_over_expectation']].head())

In [None]:
# Step 1: Identify the correct coach for each play
# Step 2: Aggregate by coach
coach_proe = (
    pbp_play.groupby(["season", "coach"])
    .agg(
        pass_attempts=("pass_attempt", "count"),
        avg_proe=("proe", "mean"),
    )
    .reset_index()
    .query("pass_attempts > 40")
)

# Step 3: Sort the result by avg_proe for better readability
coach_proe = coach_proe.sort_values(by="avg_proe", ascending=False)

coach_proe