In [None]:
import jupyter_black

jupyter_black.load()
import pandas as pd
import numpy as np
import nfl_data_py as nfl
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnnotationBbox
import matplotlib.ticker as mtick
import matplotlib.image as mpimg


import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.linear_model import LogisticRegression


from src.visualisation.visualisation import plot_bar, plot_scatter
from src.utils.data import flatten_grouped_cols
from src.visualisation.colors import team_unique_colors, team_unique_alt_colors
from src.utils.logos import get_team_logo
from src.utils.data import coach_lambda

pd.set_option("display.max_columns", None)

In [None]:
pbp: pd.DataFrame = nfl.import_pbp_data([2023])

In [None]:
pbp.play_type.value_counts()

In [None]:
id_df = nfl.import_ids()[["name", "gsis_id"]].copy().rename(columns={"name": "player"})
id_df.head()

In [None]:
play_types = ["field_goal", "extra_point"]
pbp_kick = pbp.query("play_type in @play_types").merge(
    id_df, how="left", left_on="kicker_player_id", right_on="gsis_id"
)
# pbp_kick["kick_attempt"] = (
#     pbp_kick["extra_point_attempt"] + pbp_kick["field_goal_attempt"]
# )
pbp_kick["kick_result"] = (
    pbp_kick["extra_point_result"]
    .fillna(pbp_kick["field_goal_result"])
    .map({"good": 1, "made": 1})
    .fillna(0)
)
pbp_kick["made_distance"] = pbp_kick["kick_result"] * pbp_kick["kick_distance"]
pbp_kick["down"] = pbp_kick["down"].fillna(4)

# pbp_kick["desc"]

In [None]:
kick_cols = [
    "kicker_player_id",
    "player",
    "kick_distance",
    "field_goal_attempt",
    "field_goal_result",
    "extra_point_attempt",
    "extra_point_result",
    "quarter_seconds_remaining",
    "half_seconds_remaining",
    "game_seconds_remaining",
    "qtr",
    "down",
    "goal_to_go",
    "score_differential",
    "kick_result",
]
pbp_kick[kick_cols].sample(8)

In [None]:
pbp_kick["kick_result"].value_counts()

In [None]:
kick_logit = smf.logit(
    "kick_result ~ 1 +  kick_distance + extra_point_attempt + game_seconds_remaining + qtr + score_differential",
    pbp_kick,
)
kick_result = kick_logit.fit()
print(kick_result.summary())

In [None]:
# Step 1: Generate Predictions
pbp_kick["kick_prob"] = kick_result.predict(pbp_kick)

# Step 2: Calculate Pass Rate Over Expectation
pbp_kick["kroe"] = pbp_kick["kick_result"] - pbp_kick["kick_prob"]
# pbp_kick.sample(8)

In [None]:
# Step 1: Identify the correct coach for each play
# Step 2: Aggregate by coach
kroe_df = (
    pbp_kick.groupby(["player", "posteam"])
    .agg(
        fga=("kick_result", "count"),
        fgm=("kick_result", "sum"),
        avg_dist=("kick_distance", "mean"),
        avg_made_dist=("made_distance", "mean"),
        avg_kroe=("kroe", "mean"),
    )
    .reset_index()
    .query("fga>15")
)
kroe_df["fgm"] = kroe_df["fgm"].astype(int)
kroe_df["fgp"] = kroe_df["fgm"] / kroe_df["fga"]
kroe_df = kroe_df.sort_values(by="avg_kroe", ascending=False)
kroe_df

In [None]:
plot_scatter(
    kroe_df.head(30),
    x="fgp",
    y="avg_kroe",
    marker="player",
    add_marker_label=True,
    title="Field goal Percentage and Average Field Goal Percentage over Expected 2023",
    best_fit=True,
)