In [None]:
import jupyter_black
import matplotlib.pyplot as plt
import nfl_data_py as nfl
import numpy as np
import pandas as pd
from tqdm import tqdm
import warnings

import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.linear_model import LogisticRegression

from src.modelling.metrics import (
    success_rate_lambda,
    calculate_epa_metrics,
    dual_epa_metrics,
    calculate_havoc,
)
from src.utils.data import flatten_grouped_cols, coach_lambda
from src.utils.logos import get_team_logo
from src.utils.config import (
    CUR_SEASON,
    HFA,
    DEFAULT_WIN_PROB,
    EXPLOSIVE_PASS_THRESHOLD,
    EXPLOSIVE_RUN_THRESHOLD,
)
from src.visualisation.colors import team_unique_colors, team_unique_alt_colors
from src.visualisation.plots import plot_bar, plot_scatter
from src.utils.queries import pass_play_query, run_play_query


tqdm.pandas()
jupyter_black.load()
pd.set_option("display.max_columns", None)
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)

season = CUR_SEASON
week = 3  # include data up to this week

# Load Data

In [None]:
# load data for the given season and week
pbp_start: pd.DataFrame = nfl.import_pbp_data(
    years=[season],
)
# remove garbage time
pbp = pbp_start.query(f"week<={week}")
pbp = pbp.query(f"wp>={DEFAULT_WIN_PROB} and def_wp>={DEFAULT_WIN_PROB}").copy()
pbp = pbp.query('play_type=="pass" or play_type=="run"')

# Create the 'explosive_run' and 'explosive_pass' columns in the pbp DataFrame
pbp["explosive_run"] = (pbp["rushing_yards"] >= EXPLOSIVE_RUN_THRESHOLD).astype(int)
pbp["explosive_pass"] = (pbp["receiving_yards"] >= EXPLOSIVE_PASS_THRESHOLD).astype(int)
pbp["explosive_play"] = pbp["explosive_run"] + pbp["explosive_pass"]

# add havoc metric
pbp["havoc"] = pbp.apply(calculate_havoc, axis=1)

# isolate run and pass plays
pbp_pass = pbp.query(pass_play_query)
# limit rushing win probability to be between 25%-90%
# pbp_run = pbp.query(run_play_query).query("wp>=0.25 and wp<=0.90")
pbp_run = pbp.query(run_play_query)
pbp_play = pd.concat([pbp_run, pbp_pass])

## calculate epa and success rate dataframes

# # Overall
# off_epa_df, def_epa_df = dual_epa_metrics(pbp_play)
# ovr_epa_df = off_epa_df.merge(
#     def_epa_df, left_index=True, right_index=True, suffixes=("_off", "_def")
# )
# ovr_epa_df.index.name = "team"

# Passing
off_epa_pass_df, def_epa_pass_df = dual_epa_metrics(pbp_pass)

# Rushing
off_epa_run_df, def_epa_run_df = dual_epa_metrics(pbp_run)

# Pipeline V2

In [None]:
[
    "team",
    "side",
    "epa_play",
    "epa_pass",
    "epa_rush",
    "success_rate",
    "proe",
    "adot",
    "yac",
    "sack_rate",
]

In [None]:
team = "posteam"
offense_df = (
    pbp_play.groupby(team)
    .agg(
        {
            # team: "count",
            "epa": "mean",
            "success": "mean",
            "explosive_play": "mean",
            "pass_oe": "mean",
            "sack": "mean",
            "yards_after_catch": "mean",
            "havoc": "mean",
        }
    )
    # .rename(columns={team: "plays", "success": "success_rate"})
    .rename(columns={"success": "success_rate"})
    .sort_index()
)
offense_df.index.name = "team"
offense_df

In [None]:
percentile = False
# isolate run and pass plays
pbp_pass = pbp.query(pass_play_query)
# limit rushing win probability to be between 25%-90%
# pbp_run = pbp.query(run_play_query).query("wp>=0.25 and wp<=0.90")
pbp_run = pbp.query(run_play_query)
pbp_play = pd.concat([pbp_run, pbp_pass])

# Passing
pass_dfs = dual_epa_metrics(pbp_pass, percentile=percentile)
for df in pass_dfs:
    df.rename(columns={"epa": "epa_per_pass"}, inplace=True)
off_epa_pass_df, def_epa_pass_df = pass_dfs
# Rushing
rush_dfs = dual_epa_metrics(pbp_run, percentile=percentile)
for df in rush_dfs:
    df.rename(columns={"epa": "epa_per_rush"}, inplace=True)
off_epa_rush_df, def_epa_rush_df = rush_dfs

In [None]:
adot_df = (
    pbp_pass.groupby(team)
    .agg({"air_yards": "mean"})
    .rename(columns={"air_yards": "adot"})
)
adot_df.index.name = "team"
adot_df

In [None]:
offense_df = offense_df.merge(
    off_epa_pass_df["epa_per_pass"], left_index=True, right_index=True
)
offense_df = offense_df.merge(
    off_epa_rush_df["epa_per_rush"], left_index=True, right_index=True
)
offense_df = offense_df.merge(adot_df, left_index=True, right_index=True)

In [None]:
pbp_play[["desc", "tackled_for_loss", "fumble_forced", "sack", "interception"]].sample(
    10
)

In [None]:
offense_df

In [None]:
offense_ranked = offense_grouped.rank(ascending=False).astype(int)

In [None]:
offense

# Basic EPA

In [None]:
# epa
plot_scatter(
    ovr_epa_df,
    x="epa_off",
    y="epa_def",
    title=f"Offensive and Defensive EPA, Week {week} {season}",
    flip_y=True,
)
# success rate
plot_scatter(
    ovr_epa_df,
    x="success_rate_off",
    y="success_rate_def",
    title=f"Offensive and Defensive Success Rate, Week {week} {season}",
    flip_y=True,
)
# offense
plot_scatter(
    off_epa_df,
    x="success_rate",
    y="epa",
    title=f"Offensive EPA and Success Rate, Week {week} {season}",
)
# defense
plot_scatter(
    def_epa_df,
    x="success_rate",
    y="epa",
    title=f"Defensive EPA and Success Rate, Week {week} {season}",
    flip_x=True,
    flip_y=True,
)
ovr_epa_df

# Offense

## Quarterbacks
- EPA CPOE Composite
- Pressure to Sack (no more pressure data)
- Time to throw
- Run vs Pass
- EPA in expected pass situations (ep > 0.7)

## Skill Positions

### Receivers

### Running Backs

In [None]:
plot_scatter(
    off_epa_run_df,
    x="success_rate",
    y="epa",
    title=f"Run Offense EPA and Success Rate, Week {week} {season}",
    # flip_x=True,
    # flip_y=True,
)

In [None]:
plot_scatter(
    off_epa_pass_df,
    x="success_rate",
    y="epa",
    title=f"Pass Offense EPA and Success Rate, Week {week} {season}",
    # flip_x=True,
    # flip_y=True,
)

## O-Line

# Defense

## Run Stop

1. Success Rate and EPA vs the Run
2. Explosive run Rate & Yards Per attempt

In [None]:
plot_scatter(
    def_epa_run_df,
    x="success_rate",
    y="epa",
    title=f"Run Defense EPA and Success Rate, Week {week} {season}",
    flip_x=True,
    flip_y=True,
)

## Pass Rush

In [None]:
plot_scatter(
    def_epa_pass_df,
    x="success_rate",
    y="epa",
    title=f"Pass Defense EPA and Success Rate, Week {week} {season}",
    flip_x=True,
    flip_y=True,
)