In [None]:
import pandas as pd
import numpy as np
import jupyter_black
import matplotlib.pyplot as plt
import nfl_data_py as nfl
from typing import Literal, Tuple

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from scipy.stats import norm

jupyter_black.load()
pd.set_option("display.max_columns", None)

In [None]:
# pbp = nfl.import_pbp_data(range(2018, 2024))
pbp: pd.DataFrame = nfl.import_pbp_data(range(2019, 2024))

In [None]:
wp_df = (
    pbp.query('desc=="GAME"')[
        ["game_id", "spread_line", "vegas_home_wp"]
        # + [c for c in pbp.columns if "wp" in c]
    ]
    .copy()
    .sort_values("vegas_home_wp")
)

wp_df

In [None]:
X = wp_df[["vegas_home_wp"]].to_numpy()
y = wp_df["spread_line"].to_numpy()

# Transform features to include polynomial terms
poly = PolynomialFeatures(degree=3)  # You can experiment with the degree
X_poly = poly.fit_transform(X)

# Fit the model
model = LinearRegression()
model.fit(X_poly, y)

ROUND = 3


# To predict
def predict_spread_reg(percentile):
    return round(model.predict(poly.transform([[percentile]]))[0], ROUND)


def predict_spread_norm(percentile, stdev=11.5):
    return round(float(norm.ppf(percentile)) * stdev, ROUND)

In [None]:
# Plot the actual data points
ax = wp_df.plot(
    x="vegas_home_wp",
    y="spread_line",
    kind="scatter",
    color="steelblue",
    title="Spread Line vs. Vegas Home WP",
)

# Generate a range of percentiles
percentiles = np.linspace(min(wp_df["vegas_home_wp"]), max(wp_df["vegas_home_wp"]), 100)

# Predict using both models
reg_predictions = [predict_spread_reg(p) for p in percentiles]
norm_predictions = [predict_spread_norm(p, stdev=11.5) for p in percentiles]

# Plot the polynomial regression line on the same axes
# ax.plot(percentiles, reg_predictions, color="red", label="Polynomial Regression")

# Plot the normal distribution prediction line on the same axes
ax.plot(percentiles, norm_predictions, color="green", label="Normal Distribution")

# Add labels and legend
ax.set_xlabel("Vegas Home WP")
ax.set_ylabel("Spread Line")
# ax.legend()

# Show the plot
plt.show()