In [None]:
import jupyter_black

jupyter_black.load()
import pandas as pd
import nfl_data_py as nfl
from src.visualisation.visualisation import plot_bar, plot_scatter
from src.visualisation import colors
import plotly.express as px

pd.set_option("display.max_columns", None)

In [None]:
pbp: pd.DataFrame = nfl.import_pbp_data([2023])

In [None]:
# offense_players - list of id's
id_df = nfl.import_ids()
id_df.sample(4)

In [None]:
pbp = pbp.query("week<=18").query('play_type=="pass" or play_type=="run"')

In [None]:
pbp["offense_players_list"] = pbp["offense_players"].apply(
    lambda x: x.split(";") if type(x) == str else None
)

In [None]:
pbp["offense_players_list"]

In [None]:
pbp_player = (
    pbp.explode("offense_players_list")
    .dropna(subset=["offense_players_list"])
    .rename(columns={"offense_players_list": "player_id"})
    # .query('player_id!=""')
    .merge(
        id_df[["gsis_id", "position", "name"]],
        how="left",
        left_on="player_id",
        right_on="gsis_id",
    )
    .drop(columns=["gsis_id"])
)

In [None]:
pbp_player.sample(8)

In [None]:
pbp_wr = pbp_player.query('position=="WR"').copy()
pbp_wr["is_self"] = pbp_wr.apply(
    lambda row: 1 if row["player_id"] == row["receiver_id"] else 0,
    axis=1,
)
pbp_wr["self_rec"] = pbp_wr["complete_pass"] * pbp_wr["is_self"]
pbp_wr["self_rec_yds"] = pbp_wr["receiving_yards"] * pbp_wr["is_self"]
pbp_wr["self_first_down"] = pbp_wr["first_down"] * pbp_wr["is_self"]
# lambda row: row[""] if row["player_id"] == row["receiver_id"] else 0,
# axis=1,
# )
# pbp_wr["own_rec_yds"] = pbp_wr.apply(
#     lambda row: row["receiving_yards"] if row["player_id"] == row["receiver_id"] else 0,
#     axis=1,
# )

In [None]:
[c for c in pbp_wr.columns if "recep" in c]

In [None]:
yprr_df = (
    pbp_wr.groupby(["player_id", "name_y", "posteam"])
    .agg(
        {
            "play_id": "count",
            "pass_attempt": "sum",
            "is_self": "sum",
            "self_rec": "sum",
            "self_rec_yds": "sum",
            "self_first_down": "sum",
        }
    )
    .rename(
        columns={
            "play_id": "snaps",
            "pass_attempt": "routes",
            "is_self": "tgts",
            "self_rec": "recs",
            "self_rec_yds": "yds",
            "self_first_down": "first_downs",
        }
    )
    .astype(int)
)
yprr_df["yprr"] = (yprr_df["yds"] / yprr_df["routes"]).round(3)
yprr_df["fdprr"] = (yprr_df["first_downs"] / yprr_df["routes"]).round(3)
yprr_df = yprr_df.query("routes>50").sort_values("yprr", ascending=False)
yprr_df = yprr_df.reset_index().rename(columns={"name_y": "player", "posteam": "team"})
yprr_df.head()

In [None]:
import matplotlib.pyplot as plt
from typing import Tuple, Union
from matplotlib.offsetbox import AnnotationBbox
from src.utils.logos import get_team_logo
from adjustText import adjust_text


def plot_team_scatter(
    data: pd.DataFrame,
    x: str,
    y: str,
    title: Union[str, None] = None,
    ax_labels: Tuple[str, str] = ("", ""),
    mean_reference: bool = True,
    zero_reference: bool = True,
    flip_def: bool = False,
    alpha: float = 1.0,
) -> None:

    # if team is the index of the df, turn it into a regular column
    if "team" not in data.columns:
        data = data.reset_index(level=0)
        data = data.rename(columns={data.columns[0]: "team"})

    assert all(
        col in data.columns for col in ["team", x, y]
    )  # ensure columns are in df

    plt.rcParams["figure.figsize"] = [12, 8]
    plt.rcParams["figure.autolayout"] = True
    fig, ax = plt.subplots()
    # List to hold all text objects for adjustment
    texts = []

    # Iterate over the DataFrame rows
    for _, row in data.iterrows():
        # Add the team logo
        ab = AnnotationBbox(
            get_team_logo(row["team"], size=(30, 30), alpha=alpha),
            (row[x], row[y]),
            frameon=False,
        )
        ax.add_artist(ab)

        # Add the player name with a slight offset
        text = ax.text(
            row[x] + 0.01,  # Initial offset for text placement
            row[y],
            row["player"],
            fontsize=12,
            ha="left",
            va="center",
        )

        # Append the text object to the list
        texts.append(text)

    # Adjust text labels to avoid overlap, with parameters to keep them close

    # Add padding to the axis limits
    padding_percentage = 0.1  # Adjust this value as needed
    x_min, x_max = data[x].min(), data[x].max()
    y_min, y_max = data[y].min(), data[y].max()

    x_padding = (x_max - x_min) * padding_percentage
    y_padding = (y_max - y_min) * padding_percentage

    plt.xlim(x_min - x_padding, x_max + x_padding)
    plt.ylim(y_min - y_padding, y_max + y_padding)
    # Set axis limits based on the plot
    if flip_def:
        # plt.xlim(x_max + x_padding, x_min - x_padding)
        # plt.ylim(y_max + y_padding, y_min - y_padding)
        plt.gca().invert_yaxis()
        plt.gca().invert_xaxis()
    # else:
    #     plt.xlim(x_min - x_padding, x_max + x_padding)
    #     plt.ylim(y_min - y_padding, y_max + y_padding)

    # add reference lines for 0's if the min is negative and we show the flag
    if zero_reference:
        if y_min < 0:
            plt.axhline(0, color="lightgrey", linestyle="-", linewidth=0.8)
        if x_min < 0:
            plt.axvline(0, color="lightgrey", linestyle="-", linewidth=0.8)

    # add reference lines for league averages
    if mean_reference:
        plt.axhline(data[y].mean(), color="red", linestyle="--", linewidth=0.8)
        plt.axvline(data[x].mean(), color="red", linestyle="--", linewidth=0.8)

    # add a title
    if title:
        plt.title(title)

    # label the axes
    plt.xlabel(ax_labels[0] or x)
    plt.ylabel(ax_labels[1] or y)
    plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

    # Adjust text labels to avoid overlap, with controlled arrow properties
    adjust_text(
        texts,
        arrowprops=dict(arrowstyle="-", color="gray", lw=0.5, shrinkA=5, shrinkB=5),
        only_move={
            "points": "y",
            "texts": "xy",
        },  # Limit movement to reduce displacement
        force_text=0.1,  # Reduce the force to keep text closer
        expand_text=(1.05, 1.2),  # Control expansion, adjust these values as needed
        expand_points=(1.05, 1.2),  # Control expansion for points as well
        lim=100,  # Limit the number of iterations
    )
    plt.show()

In [None]:
plot_team_scatter(yprr_df.iloc[1:40], x="yprr", y="fdprr", alpha=0.8)

In [None]:
plot_bar(
    yprr_df.head(20),
    x="fdprr",
    y="player",
    title="2023 First Downs per Route Run",
    ax_labels=("First Downs per Route Run", ""),
    dark_mode=True,
)

In [None]:
plot_bar(
    yprr_df.head(20),
    x="yprr",
    y="player",
    title="2023 Yards per Route Run",
    ax_labels=("Yards per Route Run", ""),
    dark_mode=True,
)

Future TODOs
* explosive play rate
* target share