In [60]:
''' IMPORTS '''

from pprint import pprint
import math
from datetime import datetime

import pandas as pd
import numpy as np

import nfl_data_py as nfl

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import plotly.colors as cl
from plotly.subplots import make_subplots

from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

from resources.plotly_theme import nfl_template
from resources.get_nfl_data import get_team_info, get_pbp_data


pio.templates['nfl_template'] = nfl_template

In [61]:
''' Constants / Parameters  '''

## Parameters
INPUT_YEARS = [i for i in range(2018, 2026)]

# NOTE - make sure not to include any un-completed matchups in model
SEASON = 2025
PREDICTION_WEEK = 10

FEATURE_TYPE = 'EPA / Play'

## Constants ## 

EPA_COLS = []
EPA_PLAY_COLS = []
for n in [4,8,12,16]:
    for unit in ['O', 'D', 'ST']:
        EPA_COLS.append(f'Last_{n}_EPA_{unit}')
        EPA_PLAY_COLS.append(f'Last_{n}_EPA_{unit}_Play')

FEATURE_COLS = EPA_PLAY_COLS if FEATURE_TYPE == 'EPA / Play' else EPA_COLS

# Process

In [66]:
''' Import / Process Data '''

## Team info ##
team_data = get_team_info()

## PBP ##
pbp_data = get_pbp_data(years=INPUT_YEARS)

## Matchups ##
schedule_data = nfl.import_schedules(years=INPUT_YEARS).copy()
print(schedule_data.head().to_string())

# Some cleaning
schedule_data['winner'] = np.where(schedule_data['result'] > 0, 1, 0)
schedule_data = schedule_data.replace('OAK', 'LV')

# Get desired columns / weeks
COLS = ['game_id', 'season', 'week', 'home_team', 'away_team', 'winner', 'result', 'total', 'home_moneyline', 'away_moneyline', 'spread_line', 'away_spread_odds', 'home_spread_odds', 'total_line', 'under_odds', 'over_odds']
FILTERS = (schedule_data['game_type'] == 'REG') & (schedule_data['result'] != 0)

master_matchups_df = schedule_data.loc[FILTERS, COLS].sort_values(by=['season', 'week']).reset_index(drop=True)

## Create week master ##
master_weeks = master_matchups_df[['season', 'week']].drop_duplicates().reset_index(drop=True)
master_weeks.index = master_weeks.index + 1
master_weeks = master_weeks.reset_index(names=['master_week'])

# Add week back to matchup
master_matchups_df = master_matchups_df.merge(master_weeks, left_on=['season', 'week'], right_on=['season', 'week'])

print(master_weeks.shape)
print(master_weeks.tail().to_string())
print(master_matchups_df.shape)
print(master_matchups_df.loc[(master_matchups_df['season'] == SEASON) & (master_matchups_df['week'] == PREDICTION_WEEK),:].to_string())

2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
2023 done.
2024 done.
2025 done.
Downcasting floats.
              game_id  season game_type  week     gameday   weekday gametime away_team  away_score home_team  home_score location  result  total  overtime  old_game_id     gsis nfl_detail_id           pfr     pff       espn  ftn  away_rest  home_rest  away_moneyline  home_moneyline  spread_line  away_spread_odds  home_spread_odds  total_line  under_odds  over_odds  div_game      roof    surface  temp  wind  away_qb_id  home_qb_id        away_qb_name    home_qb_name      away_coach     home_coach        referee stadium_id                  stadium
5049  2018_01_ATL_PHI    2018       REG     1  2018-09-06  Thursday    20:20       ATL        12.0       PHI        18.0     Home     6.0   30.0       0.0   2018090600  57567.0           NaN  201809060phi  9338.0  401030710  NaN          7          7          -104.0          -106.0          1.0            -111.0             101.0        4

In [67]:
''' Calculate Weekly EPA '''

## Offense ##
offense_epa = pbp_data.loc[pbp_data['Is Special Teams Play'] == False, :].groupby(['season', 'week', 'posteam']).aggregate(
    Plays_O=('posteam', 'size'),
    EPA_O=('epa', 'sum')
)#.reset_index().rename(columns={'posteam': 'team'})
offense_epa['EPA_O_Play'] = offense_epa['EPA_O'] / offense_epa['Plays_O']
offense_epa.index = offense_epa.index.set_names('team', level=2)

## Defense ##
defense_epa = pbp_data.loc[pbp_data['Is Special Teams Play'] == False, :].groupby(['season', 'week', 'defteam']).aggregate(
    Plays_D=('posteam', 'size'),
    EPA_D=('epa', 'sum')
)#.reset_index().rename(columns={'defteam': 'team'})
defense_epa['EPA_D'] = -1 * defense_epa['EPA_D']
defense_epa['EPA_D_Play'] = defense_epa['EPA_D'] / defense_epa['Plays_D']
defense_epa.index = defense_epa.index.set_names('team', level=2)

## ST ##
special_teams_epa = pbp_data.loc[pbp_data['Is Special Teams Play'] == True, :].groupby(['season', 'week', 'posteam']).aggregate(
    Opp=('defteam', 'first'),
    POS_Plays_ST=('posteam', 'size'),
    POS_EPA_ST=('epa', 'sum')
)

def get_def_plays(row):
    seas = row.name[0]
    w = row.name[1]
    opp = row['Opp']
    return special_teams_epa.loc[(seas, w, opp), 'POS_Plays_ST']

def get_def_epa(row):
    seas = row.name[0]
    w = row.name[1]
    opp = row['Opp']
    return -1*special_teams_epa.loc[(seas, w, opp), 'POS_EPA_ST']

special_teams_epa['DEF_Plays_ST'] = special_teams_epa.apply(lambda x: get_def_plays(x), axis=1)
special_teams_epa['DEF_EPA_ST'] = special_teams_epa.apply(lambda x: get_def_epa(x), axis=1)

special_teams_epa['Plays_ST'] = special_teams_epa['POS_Plays_ST'] + special_teams_epa['DEF_Plays_ST']
special_teams_epa['EPA_ST'] = special_teams_epa['POS_EPA_ST'] + special_teams_epa['DEF_EPA_ST']
special_teams_epa['EPA_ST_Play'] = special_teams_epa['EPA_ST'] / special_teams_epa['Plays_ST']

special_teams_epa.index = special_teams_epa.index.set_names('team', level=2)

## Combine ##
master_epa_df = offense_epa.merge(defense_epa, left_index=True, right_index=True)
master_epa_df = master_epa_df.merge(special_teams_epa, left_index=True, right_index=True).reset_index()

master_epa_df = master_epa_df.merge(master_weeks, left_on=['season', 'week'], right_on=['season', 'week'], how='left')
print(master_epa_df.tail().to_string())


      season  week team  Plays_O      EPA_O  EPA_O_Play  Plays_D      EPA_D  EPA_D_Play  Opp  POS_Plays_ST  POS_EPA_ST  DEF_Plays_ST  DEF_EPA_ST  Plays_ST    EPA_ST  EPA_ST_Play  master_week
3975    2025     9  PIT       66  -6.835907   -0.103574       80   7.569427    0.094618  IND            15   10.598222            12   -3.744703        27  6.853519     0.253834          132
3976    2025     9  SEA       53  21.997595    0.415049       70   5.022761    0.071754  WAS            12   -0.321035            12    2.974045        24  2.653010     0.110542          132
3977    2025     9   SF       70  16.861158    0.240874       60  -9.062247   -0.151037  NYG            13    6.328593            17    0.663121        30  6.991714     0.233057          132
3978    2025     9  TEN       49 -11.994202   -0.244780       69  -2.884827   -0.041809  LAC            15    2.358635            14    3.028233        29  5.386868     0.185754          132
3979    2025     9  WAS       70  -5.022761  

In [68]:
''' Reshape dfs '''

master_weeks = master_weeks.set_index(['season', 'week'])
master_epa_df = master_epa_df.set_index(['master_week', 'team'])

print(master_weeks.tail().to_string())
print(master_matchups_df.loc[master_matchups_df['master_week'] == 129,:].to_string())
print(master_epa_df.loc[master_epa_df.index.get_level_values(0) == 129,:].to_string())

             master_week
season week             
2025   14            137
       15            138
       16            139
       17            140
       18            141
              game_id  season  week home_team away_team  winner  result  total  home_moneyline  away_moneyline  spread_line  away_spread_odds  home_spread_odds  total_line  under_odds  over_odds  master_week
1925  2025_06_PHI_NYG    2025     6       NYG       PHI       1    17.0   51.0           310.0          -395.0         -7.0            -120.0             100.0        40.5      -105.0     -115.0          129
1926  2025_06_DEN_NYJ    2025     6       NYJ       DEN       0    -2.0   24.0           310.0          -395.0         -7.5             100.0            -120.0        43.5      -112.0     -108.0          129
1927   2025_06_LA_BAL    2025     6       BAL        LA       0   -14.0   20.0           280.0          -355.0         -7.0            -105.0            -115.0        43.5      -108.0     -112.0       

In [70]:

def get_epa_inputs(teams: list, master_week: int):
    
    # Start return df
    teams_df = pd.DataFrame(data={'team': teams}).set_index('team')

    # Sum up EPA and Plays for each team and last n games
    for team in teams:
        team_sl = master_epa_df.loc[master_epa_df.index.get_level_values(1) == team, :]

        for n in [4,8,12,16]:
            sl = team_sl.loc[(team_sl.index.get_level_values(0) < master_week),:].tail(n)
            if team == 'IND' and n == 4 and master_week == 133:
                print(sl.head().to_string())
                
            for unit in ['O', 'D', 'ST']:
                epa = sl[f'EPA_{unit}'].sum()
                plays = sl[f'Plays_{unit}'].sum()

                teams_df.loc[team, f'Last_{n}_EPA_{unit}'] = epa
                teams_df.loc[team, f'Last_{n}_EPA_{unit}_Play'] = epa / plays

    teams_df = teams_df.reset_index()

    return teams_df

# # Params
# season = 2025
# week = 9
# team = 'IND'
# n = 4

# # Go
# c_master_week = master_weeks.loc[(season, week), 'master_week']
# print(c_master_week)

# results = get_epa_inputs(['IND', 'MIA'], c_master_week)
# print(results.to_string())

In [71]:
''' Forge Historical Weekly EPA Inputs for Historical Matchups '''


c_master_week = master_weeks.loc[(SEASON, PREDICTION_WEEK), 'master_week']
print(c_master_week)

## Matchups
input_weeks = master_weeks.loc[(master_weeks.index.get_level_values(0) >= 2019) & (master_weeks['master_week'] < c_master_week), 'master_week'].unique().tolist()
input_weeks: list[int] = master_weeks.loc[master_weeks.index.get_level_values(0) >= 2019, 'master_week'].unique().tolist()
print(input_weeks)

input_matchups = master_matchups_df.loc[master_matchups_df['master_week'].isin(input_weeks),:]
# input_matchups.loc[input_matchups['game_id'] == '2025_06_ARI_IND', ['home_team', 'away_team']] = ['ARI', 'IND']

## EPA Inputs

epa_inputs_df = pd.DataFrame(columns=['master_week', 'team'] + EPA_COLS + EPA_PLAY_COLS)

for week in input_weeks:

    home_teams = input_matchups.loc[input_matchups['master_week'] == week, 'home_team'].unique().tolist()
    away_teams = input_matchups.loc[input_matchups['master_week'] == week, 'away_team'].unique().tolist()
    
    df = get_epa_inputs(home_teams+away_teams, master_week=week)
    df['master_week'] = week

    epa_inputs_df = pd.concat([epa_inputs_df, df])

epa_inputs_df = epa_inputs_df.reset_index(drop=True)

## Home team EPA
rename_dict = {col: f'Home_Team_{col}' for col in EPA_COLS + EPA_PLAY_COLS}
input_matchups = input_matchups.merge(epa_inputs_df, left_on=['master_week', 'home_team'], right_on=['master_week', 'team'], how='left').rename(columns=rename_dict).drop(columns='team')

## Away team EPA
rename_dict = {col: f'Away_Team_{col}' for col in EPA_COLS + EPA_PLAY_COLS}
input_matchups = input_matchups.merge(epa_inputs_df, left_on=['master_week', 'away_team'], right_on=['master_week', 'team'], how='left').rename(columns=rename_dict).drop(columns='team')


print(input_matchups.loc[input_matchups['master_week'] == c_master_week, :].to_string())

133
[18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141]
                  season  week  Plays_O      EPA_O  EPA_O_Play  Plays_D     EPA_D  EPA_D_Play  Opp  POS_Plays_ST  POS_EPA_ST  DEF_Plays_ST  DEF_EPA_ST  Plays_ST    EPA_ST  EPA_ST_Play
master_week team                                                                                                                                                                       
129         IND     2025     6       62   9.207899    0.148515       77 -5.599131   -0.07

# Modeling

In [72]:
''' Model Parameters and Final Prep '''

# Last Completed Week
last_completed_week_master = master_weeks.loc[(SEASON, PREDICTION_WEEK - 1), 'master_week']
print(f'Last completed weeK:', last_completed_week_master)

# Features
FEATURES = [f'Home_Team_{col}' for col in FEATURE_COLS] + [f'Away_Team_{col}' for col in FEATURE_COLS]
print(FEATURES)

# Matchups w EPA
input_matchups_sl = input_matchups.loc[input_matchups['master_week'] <= last_completed_week_master, :]

sl = input_matchups_sl.loc[input_matchups_sl['master_week'] == input_matchups_sl['master_week'].min(), ['season', 'week', 'master_week']].drop_duplicates()
print(f'First input week: {sl["season"].values[0]}, Week {sl["week"].values[0]}')
sl = input_matchups_sl.loc[input_matchups_sl['master_week'] == input_matchups_sl['master_week'].max(), ['season', 'week', 'master_week']].drop_duplicates()
print(f'Last input week: {sl["season"].values[0]}, Week {sl["week"].values[0]}')

print(input_matchups_sl.loc[input_matchups_sl['master_week'] == sl['master_week'].values[0], :].to_string())

Last completed weeK: 132
['Home_Team_Last_4_EPA_O_Play', 'Home_Team_Last_4_EPA_D_Play', 'Home_Team_Last_4_EPA_ST_Play', 'Home_Team_Last_8_EPA_O_Play', 'Home_Team_Last_8_EPA_D_Play', 'Home_Team_Last_8_EPA_ST_Play', 'Home_Team_Last_12_EPA_O_Play', 'Home_Team_Last_12_EPA_D_Play', 'Home_Team_Last_12_EPA_ST_Play', 'Home_Team_Last_16_EPA_O_Play', 'Home_Team_Last_16_EPA_D_Play', 'Home_Team_Last_16_EPA_ST_Play', 'Away_Team_Last_4_EPA_O_Play', 'Away_Team_Last_4_EPA_D_Play', 'Away_Team_Last_4_EPA_ST_Play', 'Away_Team_Last_8_EPA_O_Play', 'Away_Team_Last_8_EPA_D_Play', 'Away_Team_Last_8_EPA_ST_Play', 'Away_Team_Last_12_EPA_O_Play', 'Away_Team_Last_12_EPA_D_Play', 'Away_Team_Last_12_EPA_ST_Play', 'Away_Team_Last_16_EPA_O_Play', 'Away_Team_Last_16_EPA_D_Play', 'Away_Team_Last_16_EPA_ST_Play']
First input week: 2019, Week 1
Last input week: 2025, Week 9
              game_id  season  week home_team away_team  winner  result  total  home_moneyline  away_moneyline  spread_line  away_spread_odds  home_s

In [13]:
''' Vizualize Inputs '''

## Feature Correlation Matrix
corr = input_matchups[['result'] + FEATURES].corr()

fig = px.imshow(
    corr,
    template='nfl_template',
    title='Features Correlation Matrix',
    zmin=-1,
    zmax=1,
    text_auto=True,
    # aspect='auto'
)
fig.show()

## Feature Distributions
for unit in ['O', 'D', 'ST']:
    col = f'Home_Team_Last_16_EPA_{unit}'
    fig = px.histogram(
        template='nfl_template',
        x=input_matchups[col],
        title=f'{col}<br><sup>Distribution</sup>'
    )
    fig.show()

## Win Probability - Logistic Regression

In [73]:
''' Logistic regression - Win Probability '''


# Get X and y
X = input_matchups_sl[FEATURES].to_numpy()
y = input_matchups_sl['winner'].to_numpy()
print(X.shape)
print(y.shape)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale the data (Standardization).
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

# Create a Logistic Regression model
log_reg_model = LogisticRegression(max_iter=100, solver='liblinear') # Increased max_iter for convergence

# Train the model
log_reg_model.fit(X_train, y_train)

# Make predictions
y_pred = log_reg_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy*100:,.2f}%")

# coefs_dict = {FEATURES[i]: log_reg_model.coef_[0][i] for i in range(len(FEATURES))}
# pprint(coefs_dict, sort_dicts=False)

(1728, 24)
(1728,)
Accuracy: 62.81%


In [53]:
for c in range(3):
    fig = px.histogram(
        x=[X[i][c] for i in range(len(X))],
    )
    fig.show()

In [None]:
''' Graph for Overfitting / Underfitting '''

## Spreads - Linear Regression

In [74]:
''' Linear Regression - Spread '''

# Get X and y
X = input_matchups_sl[FEATURES].to_numpy()
y = input_matchups_sl['result'].to_numpy()
print(X.shape)
print(y.shape)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale the data (Standardization).
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

# Create a Logistic Regression model
lin_reg_model = LinearRegression() # Increased max_iter for convergence

# Train the model
lin_reg_model.fit(X_train, y_train)

# Make predictions
y_pred = lin_reg_model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
# Print evaluation metrics
print(f"R-squared: {r2:.4f}")
print(f"Mean squared error: {mse:.4f}")
print(f"Root mean squared error: {rmse:.4f}")

(1728, 24)
(1728,)
R-squared: 0.1395
Mean squared error: 190.8116
Root mean squared error: 13.8135


## Predictions

In [75]:
''' Predict a week '''

# Find matchups
c_master_week = master_weeks.loc[(SEASON, PREDICTION_WEEK), 'master_week']
matchups = input_matchups.loc[input_matchups['master_week'] == c_master_week, :]

print(c_master_week)
print(matchups.to_string())

# Get inputs
X = matchups[FEATURES].to_numpy()

## WIN PROBABILITY
y_pred = log_reg_model.predict(X)
probs = log_reg_model.predict_proba(X)

## SPREAD
results = lin_reg_model.predict(X)

## Collect results
predictions_df = matchups[['game_id', 'season', 'week', 'home_team', 'away_team', 'home_moneyline', 'away_moneyline', 'spread_line', 'away_spread_odds', 'home_spread_odds', 'total_line', 'under_odds', 'over_odds']].copy().reset_index(drop=True)

# Win Probs
predictions_df['prob_home'] = [probs[i][1] for i in range(len(probs))]
predictions_df['prob_away'] = [probs[i][0] for i in range(len(probs))]
predictions_df['pred'] = np.where(y_pred == 1, predictions_df['home_team'], predictions_df['away_team'])
predictions_df['pred_prob'] = predictions_df[['prob_home','prob_away']].max(axis=1)

predictions_df['pred_home_ml'] = np.where(predictions_df['prob_home'] > predictions_df['prob_away'],
                                  (-100*predictions_df['prob_home'])/(1 - predictions_df['prob_home']),
                                  ((1 - predictions_df['prob_home'])/predictions_df['prob_home'])*100).astype(int)
# viz_df.loc[viz_df['home_team_ml'] > 0, 'home_team_ml'] = '+' + viz_df['home_team_ml'].astype(str)

predictions_df['pred_away_ml'] = np.where(predictions_df['prob_away'] > predictions_df['prob_home'],
                                  (-100*predictions_df['prob_away'])/(1 - predictions_df['prob_away']),
                                  ((1 - predictions_df['prob_away'])/predictions_df['prob_away'])*100).astype(int)
# viz_df.loc[viz_df['away_team_ml'] > 0, 'away_team_ml'] = '+' + viz_df['away_team_ml'].astype(str)


# Spread
predictions_df['pred_spread'] = results

print(predictions_df.to_string())


133
              game_id  season  week home_team away_team  winner  result  total  home_moneyline  away_moneyline  spread_line  away_spread_odds  home_spread_odds  total_line  under_odds  over_odds master_week  Home_Team_Last_4_EPA_O  Home_Team_Last_4_EPA_D  Home_Team_Last_4_EPA_ST  Home_Team_Last_8_EPA_O  Home_Team_Last_8_EPA_D  Home_Team_Last_8_EPA_ST  Home_Team_Last_12_EPA_O  Home_Team_Last_12_EPA_D  Home_Team_Last_12_EPA_ST  Home_Team_Last_16_EPA_O  Home_Team_Last_16_EPA_D  Home_Team_Last_16_EPA_ST  Home_Team_Last_4_EPA_O_Play  Home_Team_Last_4_EPA_D_Play  Home_Team_Last_4_EPA_ST_Play  Home_Team_Last_8_EPA_O_Play  Home_Team_Last_8_EPA_D_Play  Home_Team_Last_8_EPA_ST_Play  Home_Team_Last_12_EPA_O_Play  Home_Team_Last_12_EPA_D_Play  Home_Team_Last_12_EPA_ST_Play  Home_Team_Last_16_EPA_O_Play  Home_Team_Last_16_EPA_D_Play  Home_Team_Last_16_EPA_ST_Play  Away_Team_Last_4_EPA_O  Away_Team_Last_4_EPA_D  Away_Team_Last_4_EPA_ST  Away_Team_Last_8_EPA_O  Away_Team_Last_8_EPA_D  Away_Team_L

# Visualize Predictions

In [76]:
def hex_to_rgb(hex_color):
    hex_color = hex_color.lstrip('#')  # Remove '#' if present
    
    # Extract the red, green, and blue components
    r_hex = hex_color[0:2]
    g_hex = hex_color[2:4]
    b_hex = hex_color[4:6]
    
    # Convert hexadecimal to decimal integers
    r = int(r_hex, 16)
    g = int(g_hex, 16)
    b = int(b_hex, 16)
    
    return (r, g, b)


print(hex_to_rgb('#003b75'))

(0, 59, 117)


In [77]:
''' Format Results for Visualization '''

viz_df = predictions_df.copy()

# Add betting odds
# viz_df['home_team_ml'] = np.where(viz_df['prob_home'] > viz_df['prob_away'],
#                                   (-100*viz_df['prob_home'])/(1 - viz_df['prob_home']),
#                                   ((1 - viz_df['prob_home'])/viz_df['prob_home'])*100).astype(int)
viz_df['pred_home_ml_viz'] = np.where(viz_df['pred_home_ml'] > 0, '+' + viz_df['pred_home_ml'].astype(str), viz_df['pred_home_ml'].astype(str))

# viz_df['away_team_ml'] = np.where(viz_df['prob_away'] > viz_df['prob_home'],
#                                   (-100*viz_df['prob_away'])/(1 - viz_df['prob_away']),
#                                   ((1 - viz_df['prob_away'])/viz_df['prob_away'])*100).astype(int)
# viz_df.loc[viz_df['pred_away_ml_viz'] > 0, 'pred_away_ml'] = '+' + viz_df['pred_away_ml'].astype(str)
viz_df['pred_away_ml_viz'] = np.where(viz_df['pred_away_ml'] > 0, '+' + viz_df['pred_away_ml'].astype(str), viz_df['pred_away_ml'].astype(str))


# Get logos and colors
logos_df = pd.read_csv('../data/NFL_teams_info.csv', usecols=['abbreviation', 'color', 'logoURL'])
logos_df.loc[logos_df['abbreviation'] == 'LAR', 'abbreviation'] = 'LA'
logos_df.loc[logos_df['abbreviation'] == 'WSH', 'abbreviation'] = 'WAS'
logos_df['color'] = '#' + logos_df['color']

viz_df = viz_df.merge(logos_df, left_on='home_team', right_on='abbreviation', how='left').drop(columns=['abbreviation']).rename(columns={'color': 'home_color', 'logoURL': 'home_logo'})
viz_df = viz_df.merge(logos_df, left_on='away_team', right_on='abbreviation', how='left').drop(columns=['abbreviation']).rename(columns={'color': 'away_color', 'logoURL': 'away_logo'})
print(viz_df.to_string())


            game_id  season  week home_team away_team  home_moneyline  away_moneyline  spread_line  away_spread_odds  home_spread_odds  total_line  under_odds  over_odds  prob_home  prob_away pred  pred_prob  pred_home_ml  pred_away_ml  pred_spread pred_home_ml_viz pred_away_ml_viz home_color                                          home_logo away_color                                          away_logo
0    2025_10_LV_DEN    2025    10       DEN        LV          -500.0           380.0          9.5            -110.0            -110.0        42.5      -102.0     -118.0   0.772036   0.227964  DEN   0.772036          -338           338     8.635177             -338             +338    #0a2343  https://a.espncdn.com/i/teamlogos/nfl/500/den.png    #000000   https://a.espncdn.com/i/teamlogos/nfl/500/lv.png
1   2025_10_ATL_IND    2025    10       IND       ATL          -340.0           270.0          6.5            -108.0            -112.0        48.5      -112.0     -108.0   0.729583   0.2

In [78]:
''' Win Probability - Pie Charts '''

# Create pie chart for each game
titles = []
pie_charts = []
for i in viz_df.index:

    away_prob, home_prob = viz_df.loc[i, ['prob_away', 'prob_home']]
    away_team, home_team = viz_df.loc[i, ['away_team', 'home_team']]
    
    away_color, home_color = viz_df.loc[i, ['away_color', 'home_color']]
    away_logo, home_logo = viz_df.loc[i, ['away_logo', 'home_logo']]

    # away_rbg = hex_to_rgb(away_color)
    # away_rbg = f'rgba({away_rbg[0]},{away_rbg[1]},{away_rbg[2]},{1 if away_prob > home_prob else 0.5})'

    # home_rbg = hex_to_rgb(home_color)
    # home_rbg = f'rgba({home_rbg[0]},{home_rbg[1]},{home_rbg[2]},{1 if home_prob > away_prob else 0.5})'
    # # away_rbg = (*away_rbg, 1)
    # print(away_team, away_rbg)

    off_black = 'rgba(0,0,0,0.7)'
    pie_chart = go.Pie(
        values=[home_prob, away_prob],
        labels=[home_team, away_team],
        marker=dict(
            colors=[home_color if home_prob > away_prob else off_black, away_color if away_prob > home_prob else off_black], 
            pattern=dict(
                shape=["" if home_prob > away_prob else "x", "" if away_prob > home_prob else "x"],
                size=4,
            ),
            line=dict(color='#f3f3f3', width=2),
        ),
        textposition='outside',
        textinfo='percent+label',
        hole=0.4,
        textfont=dict(weight='bold'),
        sort=False
    )

    pie_charts.append(pie_chart)
    titles.append(f'{away_team} vs. {home_team}')

# Create Figure
N_COLS = 4
N_ROWS = math.ceil(len(viz_df) / N_COLS)
domain = [[{"type": "domain"} for i in range(N_COLS)] for i in range(N_ROWS)]

fig = make_subplots(rows=N_ROWS, cols=N_COLS, specs=domain, subplot_titles=titles, horizontal_spacing=0.05)

# Add charts to plot
i = 0
for r in range(1, N_ROWS+1):
    for c in range(1, N_COLS+1):
        fig.add_trace(
            pie_charts[i],
            row=r,
            col=c
        )

        i += 1
        if i > len(viz_df.index) - 1:
            break

fig.for_each_annotation(lambda a: a.update(font=dict(size=14, weight='bold')))

# fig.add_annotation(
#     # source='https://a.espncdn.com/i/teamlogos/nfl/500/jax.png',  # The loaded image
#     text='Heyyy',
#     xref="x2 domain", yref="y2 domain",
#     x=0.5, y=0.5, # Center of the plot area
#     # sizex=0.2, sizey=0.2, # Adjust size as needed
#     xanchor="center", yanchor="middle",
#     # layer="above", # Place image above other plot elements
#     # opacity=0.85,
#     row=2, col=2
# )


fig.update_layout(
    template='nfl_template',
    paper_bgcolor='#f0f0f0',
    title=dict(
        text=f'NFL Week {PREDICTION_WEEK} <span style="color: #D5A15D">Win Probability</span><br><sup>EPA / Play Model</sup>',
    ),
    margin=dict(t=100, b=50, l=50, r=50),
    showlegend=False,
    height=900,
    width=900,
)

# Credits
fig.add_annotation(
    text=f'EPA / Play from teams\' last 4, 8, 12, and 16 games, in all 3 phases<br>Figure & Model: @clankeranalytic | Data: nfl_data_py | {datetime.today():%Y-%m-%d}',
    # font=dict(weight='bold'),
    showarrow=False,
    xref='paper',
    yref='paper',
    y=-0.05, 
    x=1,
    align='right'
)
fig.show()


# Export
# pio.write_image(fig, f'Week {PREDICTION_WEEK} Win Probability.png', scale=6, width=900, height=900)


In [53]:
''' Predictions '''

from PIL import Image
import requests
from io import BytesIO


BORDER_COLOR = '#989898'

tables = []
winner_logos = []
for i in viz_df.index:
    
    away_team, home_team = viz_df.loc[i, ['away_team', 'home_team']]
    away_prob, home_prob = viz_df.loc[i, ['prob_away', 'prob_home']]
    away_team_ml, home_team_ml = viz_df.loc[i, ['pred_away_ml_viz', 'pred_home_ml_viz']]
    
    spread = viz_df.loc[i, 'pred_spread']
    spread = round(spread, 1)
    away_spread = f'+{abs(spread)}' if spread > 0 else f'-{abs(spread)}'
    home_spread = f'-{abs(spread)}' if spread > 0 else f'+{abs(spread)}'

    away_color, home_color = viz_df.loc[i, ['away_color', 'home_color']]
    away_logo, home_logo = viz_df.loc[i, ['away_logo', 'home_logo']]

    # TODO eventualy, Appr. Line
    table = go.Table(
        # columnwidth=[2,3,3],
        header=dict(
            values=['', 'Spread', 'Moneyline'],
            line_color=['rgba(0,0,0,0)']+[BORDER_COLOR]*3,
            fill_color=['rgba(0,0,0,0)']+['white']*3,
            align=['center', 'center'],
            font=dict(size=10)
        ),
        cells=dict(
            values=[[away_team, home_team], 
                    # [f'{away_prob*100:,.1f}%', f'{home_prob*100:,.1f}%'], 
                    [away_spread, home_spread], 
                    [away_team_ml, home_team_ml]],
            line_color=[BORDER_COLOR]*4,
            line_width=1,
            fill_color=[[away_color, home_color]]+['white']*3,
            font=dict(
                color=['white', 'black', 'black', 'black']
            )
        )
    )
    tables.append(table)
    winner_logos.append(away_logo if away_prob > home_prob else home_logo)


## Make Figure
N_ROWS = math.ceil(len(tables) / 2)
N_COLS = 2
fig = make_subplots(rows=N_ROWS, cols=N_COLS*2, 
                    specs=[[{"type": "table"}, {"type": "xy"}]*N_COLS for i in range(N_ROWS)],
                    subplot_titles=['', 'Projected<br>Winner']*len(tables),
                    horizontal_spacing=0.075,
                    column_widths=[5,1,5,1])

# Add tables / winner logos
i = 0
for r in range(1,N_ROWS+1):
    for c in range(1,N_COLS+1):

        ## Logo
        response = requests.get(winner_logos[i])
        img = Image.open(BytesIO(response.content))
        # img = img.resize((img.width*2, img.height*2), Image.LANCZOS)

        logo_trace = px.imshow(img=img)
        fig.add_trace(logo_trace.data[0], row=r, col=c*2)

        ## Table
        fig.add_trace(
            tables[i],
            row=r,col=(c*2)-1
        )

        i += 1
        if i > len(tables) - 1:
            break

    
## Formatting
fig.update_annotations(font=dict(size=10, weight='bold'))

fig.update_layout(
    template='nfl_template',
    paper_bgcolor='#f0f0f0',
    title=dict(
        text=f'NFL Week {PREDICTION_WEEK} <span style="color: #D5A15D">Odds</span><br><sup>EPA / Play Model</sup>',
        y=0.965
    ),
    margin=dict(b=50, l=50, r=50),
    height=1000,
    width=700
)
fig.update_xaxes(
    visible=False,
)
fig.update_yaxes(
    visible=False,
)

# Credits
fig.add_annotation(
    text=f'EPA / Play from teams\' last 4, 8, 12, and 16 games, in all 3 phases<br>Figure & Model: @clankeranalytic | Data: nfl_data_py | {datetime.today():%Y-%m-%d}',
    font=dict(size=8),
    showarrow=False,
    xref='paper',
    yref='paper',
    y=-0.05, 
    x=1,
    align='right'
)

fig.show()

# Export
# pio.write_image(fig, f'Week {PREDICTION_WEEK} Predictions.png', scale=6, width=700, height=1000)

## (After Week) Results

In [54]:
print(master_matchups_df.head().to_string())

           game_id  season  week home_team away_team  winner  result  total  home_moneyline  away_moneyline  spread_line  total_line  master_week
0  2018_01_ATL_PHI    2018     1       PHI       ATL       1     6.0   30.0          -106.0          -104.0          1.0        44.5            1
1  2018_01_BUF_BAL    2018     1       BAL       BUF       1    44.0   50.0          -355.0           309.0          7.5        39.0            1
2  2018_01_CIN_IND    2018     1       IND       CIN       0   -11.0   57.0          -105.0          -105.0         -1.0        47.5            1
3  2018_01_TEN_MIA    2018     1       MIA       TEN       1     7.0   47.0           104.0          -115.0         -1.0        43.5            1
4   2018_01_SF_MIN    2018     1       MIN        SF       1     8.0   40.0          -268.0           238.0          6.0        46.5            1


In [59]:
## Init 

week_results_df = viz_df.merge(master_matchups_df[['game_id', 'winner']], on='game_id', how='left')
week_results_df['winner'] = np.where(week_results_df['winner'] == 0, week_results_df['away_team'], week_results_df['home_team'])
week_results_df['correct?'] = np.where(week_results_df['winner'] == week_results_df['pred'], 1, 0)

## Add Actual EPA
cols = ['EPA_O_Play', 'EPA_D_Play', 'EPA_ST_Play']

week_epa = master_epa_df.loc[master_epa_df.index.get_level_values(0) == c_master_week, :].reset_index()
# print(week_epa.head().to_string())

# Home Team EPA
week_results_df = week_results_df.merge(week_epa[['team'] + cols], left_on='home_team', right_on='team', how='inner').rename(columns={
    col: f'home_team_{col}' for col in cols
}).drop(columns=['team'])

# Away Team EPA
week_results_df = week_results_df.merge(week_epa[['team'] + cols], left_on='away_team', right_on='team', how='inner').rename(columns={
    col: f'away_team_{col}' for col in cols
}).drop(columns=['team'])

week_results_df = week_results_df.reindex(columns=[
        'season', 'week', 'home_team', 'away_team', 
        'prob_home', 'pred_home_ml_viz', 'prob_away', 'pred_away_ml_viz',
        'home_moneyline', 'away_moneyline',
        'pred', 'winner', 'correct?', 'spread_line', 'pred_spread', 'total_line',  
        'home_team_EPA_O_Play', 'home_team_EPA_D_Play', 'home_team_EPA_ST_Play', 
        'away_team_EPA_O_Play', 'away_team_EPA_D_Play', 'away_team_EPA_ST_Play'])

print(week_results_df.columns)
print(week_results_df.head().to_string())
week_results_df.to_excel('Week Results.xlsx', index=False, sheet_name='Results')

Index(['season', 'week', 'home_team', 'away_team', 'prob_home',
       'pred_home_ml_viz', 'prob_away', 'pred_away_ml_viz', 'home_moneyline',
       'away_moneyline', 'pred', 'winner', 'correct?', 'spread_line',
       'pred_spread', 'total_line', 'home_team_EPA_O_Play',
       'home_team_EPA_D_Play', 'home_team_EPA_ST_Play', 'away_team_EPA_O_Play',
       'away_team_EPA_D_Play', 'away_team_EPA_ST_Play'],
      dtype='object')
   season  week home_team away_team  prob_home pred_home_ml_viz  prob_away pred_away_ml_viz  home_moneyline  away_moneyline pred winner  correct?  spread_line  pred_spread  total_line  home_team_EPA_O_Play  home_team_EPA_D_Play  home_team_EPA_ST_Play  away_team_EPA_O_Play  away_team_EPA_D_Play  away_team_EPA_ST_Play
0    2025     9       MIA       BAL   0.446689             +123   0.553311             -123           350.0          -455.0  BAL    BAL         1         -7.5    -1.815155        51.5             -0.225263             -0.163054              -0.017216 