In [None]:
''' IMPORTS '''

from pprint import pprint
import math
from datetime import datetime

import pandas as pd
import numpy as np

import nfl_data_py as nfl

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import plotly.colors as cl
from plotly.subplots import make_subplots

from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score, confusion_matrix, f1_score, classification_report, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler

from resources.plotly_theme import nfl_template
from resources.get_nfl_data import get_team_info, get_pbp_data, get_matchups
from resources.team_stats import get_team_stats

pio.templates['nfl_template'] = nfl_template

In [None]:
''' Constants / Parameters  '''

## Parameters
INPUT_YEARS = [i for i in range(2018, 2026)]

# NOTE - make sure not to include any un-completed matchups in model
SEASON = 2025
PREDICTION_WEEK = 20

FEATURE_TYPE = 'EPA / Play'
LAST_N_WEEKS = [4,8,12,16]
# LAST_N_WEEKS = [4]

## Constants ## 

FOLDER = f'../visuals/week {PREDICTION_WEEK}/predictions/'

EPA_COLS = []
EPA_PLAY_COLS = []
for n in LAST_N_WEEKS:
    for unit in ['O', 'D', 'ST']:
        EPA_COLS.append(f'Last_{n}_EPA_{unit}')
        EPA_PLAY_COLS.append(f'Last_{n}_EPA_{unit}_Play')

FEATURE_COLS = EPA_PLAY_COLS if FEATURE_TYPE == 'EPA / Play' else EPA_COLS

# Process

In [None]:
''' Import / Process Data '''

## Team info ##
team_data = get_team_info()

## PBP ##
pbp_data = get_pbp_data(years=INPUT_YEARS)

## Matchups ##
schedule_data = nfl.import_schedules(years=INPUT_YEARS).copy()
print(schedule_data.head().to_string())

# Some cleaning
schedule_data['winner'] = np.where(schedule_data['result'] > 0, 1, 0)
schedule_data = schedule_data.replace('OAK', 'LV')

# Get desired columns / weeks
COLS = ['game_id', 'season', 'week', 'home_team', 'away_team', 'home_score', 'away_score', 'result', 'winner', 'total', 'home_moneyline', 'away_moneyline', 'spread_line', 'away_spread_odds', 'home_spread_odds', 'total_line', 'under_odds', 'over_odds']
FILTERS = ((schedule_data['game_type'] == 'REG') | ((schedule_data['season'] == 2025) & (schedule_data['week'] > 18))) & (schedule_data['result'] != 0)

master_matchups_df = schedule_data.loc[FILTERS, COLS].sort_values(by=['season', 'week']).reset_index(drop=True)

## Create week master ##
master_weeks = master_matchups_df[['season', 'week']].drop_duplicates().reset_index(drop=True)
master_weeks.index = master_weeks.index + 1
master_weeks = master_weeks.reset_index(names=['master_week'])

# Add week back to matchup
master_matchups_df = master_matchups_df.merge(master_weeks, left_on=['season', 'week'], right_on=['season', 'week'])

print(master_weeks.shape)
print(master_weeks.head().to_string())
print(master_weeks.tail().to_string())
print(master_matchups_df.shape)
print(master_matchups_df.loc[(master_matchups_df['season'] == SEASON) & (master_matchups_df['week'] == PREDICTION_WEEK),:].to_string())

In [None]:
''' Calculate Weekly EPA '''

## Offense ##
offense_epa = pbp_data.loc[(pbp_data['Offensive Snap']) & (~pbp_data['Is Special Teams Play']), :].groupby(['season', 'week', 'posteam']).aggregate(
    Plays_O=('posteam', 'size'),
    EPA_O=('epa', 'sum')
)#.reset_index().rename(columns={'posteam': 'team'})
offense_epa['EPA_O_Play'] = offense_epa['EPA_O'] / offense_epa['Plays_O']
offense_epa.index = offense_epa.index.set_names('team', level=2)

## Defense ##
defense_epa = pbp_data.loc[(pbp_data['Offensive Snap']) & (~pbp_data['Is Special Teams Play']), :].groupby(['season', 'week', 'defteam']).aggregate(
    Plays_D=('posteam', 'size'),
    EPA_D=('epa', 'sum')
)#.reset_index().rename(columns={'defteam': 'team'})
defense_epa['EPA_D'] = -1 * defense_epa['EPA_D']
defense_epa['EPA_D_Play'] = defense_epa['EPA_D'] / defense_epa['Plays_D']
defense_epa.index = defense_epa.index.set_names('team', level=2)

## ST ##
special_teams_epa = pbp_data.loc[pbp_data['Is Special Teams Play'], :].groupby(['season', 'week', 'posteam']).aggregate(
    Opp=('defteam', 'first'),
    POS_Plays_ST=('posteam', 'size'),
    POS_EPA_ST=('epa', 'sum')
)

def get_def_plays(row):
    seas = row.name[0]
    w = row.name[1]
    opp = row['Opp']
    return special_teams_epa.loc[(seas, w, opp), 'POS_Plays_ST']

def get_def_epa(row):
    seas = row.name[0]
    w = row.name[1]
    opp = row['Opp']
    return -1*special_teams_epa.loc[(seas, w, opp), 'POS_EPA_ST']

special_teams_epa['DEF_Plays_ST'] = special_teams_epa.apply(lambda x: get_def_plays(x), axis=1)
special_teams_epa['DEF_EPA_ST'] = special_teams_epa.apply(lambda x: get_def_epa(x), axis=1)

special_teams_epa['Plays_ST'] = special_teams_epa['POS_Plays_ST'] + special_teams_epa['DEF_Plays_ST']
special_teams_epa['EPA_ST'] = special_teams_epa['POS_EPA_ST'] + special_teams_epa['DEF_EPA_ST']
special_teams_epa['EPA_ST_Play'] = special_teams_epa['EPA_ST'] / special_teams_epa['Plays_ST']

special_teams_epa.index = special_teams_epa.index.set_names('team', level=2)

## Combine ##
master_epa_df = offense_epa.merge(defense_epa, left_index=True, right_index=True)
master_epa_df = master_epa_df.merge(special_teams_epa, left_index=True, right_index=True).reset_index()

master_epa_df = master_epa_df.merge(master_weeks, left_on=['season', 'week'], right_on=['season', 'week'], how='left')
print(master_epa_df.loc[(master_epa_df['season'] == 2025) & (master_epa_df['week'] == PREDICTION_WEEK - 1),:].head().to_string())


In [None]:
''' Reshape dfs '''

master_weeks = master_weeks.set_index(['season', 'week'])
master_epa_df = master_epa_df.set_index(['master_week', 'team'])

print(master_weeks.tail().to_string())
print(master_matchups_df.loc[master_matchups_df['master_week'] == 138,:].to_string())
print(master_epa_df.loc[master_epa_df.index.get_level_values(0) == 138,:].to_string())

In [None]:

def get_epa_inputs(teams: list, master_week: int):
    
    # Start return df
    teams_df = pd.DataFrame(data={'team': teams}).set_index('team')

    # Sum up EPA and Plays for each team and last n games
    for team in teams:
        team_sl = master_epa_df.loc[master_epa_df.index.get_level_values(1) == team, :]

        for n in [4,8,12,16]:
            sl = team_sl.loc[(team_sl.index.get_level_values(0) < master_week),:].tail(n)
            if team == 'IND' and n == 4 and master_week == 138:
                print(sl.head().to_string())
                
            for unit in ['O', 'D', 'ST']:
                epa = sl[f'EPA_{unit}'].sum()
                plays = sl[f'Plays_{unit}'].sum()

                teams_df.loc[team, f'Last_{n}_EPA_{unit}'] = epa
                teams_df.loc[team, f'Last_{n}_EPA_{unit}_Play'] = epa / plays

    teams_df = teams_df.reset_index()

    return teams_df

# # Params
# season = 2025
# week = 9
# team = 'IND'
# n = 4

# # Go
# c_master_week = master_weeks.loc[(season, week), 'master_week']
# print(c_master_week)

# results = get_epa_inputs(['IND', 'MIA'], c_master_week)
# print(results.to_string())

In [None]:
''' Forge Historical Weekly EPA Inputs for Historical Matchups '''


c_master_week = master_weeks.loc[(SEASON, PREDICTION_WEEK), 'master_week']
print(c_master_week)

## Matchups
# input_weeks = master_weeks.loc[(master_weeks.index.get_level_values(0) >= 2019) & (master_weeks['master_week'] < c_master_week), 'master_week'].unique().tolist()
input_weeks: list[int] = master_weeks.loc[master_weeks.index.get_level_values(0) >= 2019, 'master_week'].unique().tolist()
print(input_weeks)

input_matchups = master_matchups_df.loc[master_matchups_df['master_week'].isin(input_weeks),:]
# input_matchups.loc[input_matchups['game_id'] == '2025_06_ARI_IND', ['home_team', 'away_team']] = ['ARI', 'IND']

## EPA Inputs

epa_inputs_df = pd.DataFrame(columns=['master_week', 'team'] + EPA_COLS + EPA_PLAY_COLS)

for week in input_weeks:

    home_teams = input_matchups.loc[input_matchups['master_week'] == week, 'home_team'].unique().tolist()
    away_teams = input_matchups.loc[input_matchups['master_week'] == week, 'away_team'].unique().tolist()
    
    df = get_epa_inputs(home_teams+away_teams, master_week=week)
    df['master_week'] = week

    epa_inputs_df = pd.concat([epa_inputs_df, df])

epa_inputs_df = epa_inputs_df.reset_index(drop=True)

## Home team EPA
rename_dict = {col: f'Home_Team_{col}' for col in EPA_COLS + EPA_PLAY_COLS}
input_matchups = input_matchups.merge(epa_inputs_df, left_on=['master_week', 'home_team'], right_on=['master_week', 'team'], how='left').rename(columns=rename_dict).drop(columns='team')

## Away team EPA
rename_dict = {col: f'Away_Team_{col}' for col in EPA_COLS + EPA_PLAY_COLS}
input_matchups = input_matchups.merge(epa_inputs_df, left_on=['master_week', 'away_team'], right_on=['master_week', 'team'], how='left').rename(columns=rename_dict).drop(columns='team')


print(input_matchups.loc[input_matchups['master_week'] == 137, :].to_string())

# Modeling

In [None]:
''' Model Parameters and Final Prep '''

# Features
FEATURES = [f'Home_Team_{col}' for col in FEATURE_COLS] + [f'Away_Team_{col}' for col in FEATURE_COLS]
print(FEATURES)

# Last Completed Week
last_completed_week_master = master_weeks.loc[(SEASON, PREDICTION_WEEK - 1), 'master_week']
print(f'Last completed week:', last_completed_week_master)

# Input Matchups
input_matchups_sl = input_matchups.loc[input_matchups['master_week'] <= last_completed_week_master, :]

# Prediction matchups
matchups = input_matchups.loc[input_matchups['master_week'] == c_master_week, :]

sl = input_matchups_sl.loc[input_matchups_sl['master_week'] == input_matchups_sl['master_week'].min(), ['season', 'week', 'master_week']].drop_duplicates()
print(f'First input week: {sl["season"].values[0]}, Week {sl["week"].values[0]}')
print(input_matchups_sl.tail(2).to_string())
sl = input_matchups_sl.loc[input_matchups_sl['master_week'] == input_matchups_sl['master_week'].max(), ['season', 'week', 'master_week']].drop_duplicates()
print(f'Last input week: {sl["season"].values[0]}, Week {sl["week"].values[0]}')

print(matchups.to_string())

In [None]:
''' Vizualize Inputs '''

## Feature Correlation Matrix
corr = input_matchups[['result'] + FEATURES].corr()

fig = px.imshow(
    corr,
    template='nfl_template',
    title='Features Correlation Matrix',
    zmin=-1,
    zmax=1,
    text_auto=True,
    # aspect='auto'
)
fig.show()

## Feature Distributions
for unit in ['O', 'D', 'ST']:
    col = f'Home_Team_Last_4_EPA_{unit}'
    fig = px.histogram(
        template='nfl_template',
        x=input_matchups[col],
        title=f'{col}<br><sup>Distribution</sup>'
    )
    fig.show()

## Win Probability - Logistic Regression

In [None]:
''' Logistic regression - Win Probability '''


# Get X and y
X = input_matchups_sl[FEATURES].to_numpy()
y = input_matchups_sl['winner'].to_numpy()
print(X.shape)
print(y.shape)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale the data (Standardization).
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

# Create a Logistic Regression model
log_reg_model = LogisticRegression(max_iter=100, solver='liblinear') # Increased max_iter for convergence

# Train the model
log_reg_model.fit(X_train, y_train)

# Make predictions
y_pred = log_reg_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy*100:,.2f}%")

cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel().tolist()
print(cm)
print(f"Pred Home, Home Win: {tp}")
print(f"Pred Home, Away Win: {fp}")
print(f"Pred Away, Home Win: {fn}")
print(f"Pred Away, Away Win: {tn}")
pred_home = tp+fp
pred_away = fn+tn
home_winners = tp+fn
away_winners = fp+tn

print(f'Pred Home: {pred_home} ({pred_home / len(y_test):.2%})')
print(f'Pred Away: {pred_away}')
print(f'Home Winners: {home_winners} ({home_winners / len(y_test):,.2%})')
print(f'Away Winners: {away_winners}')

f1 = f1_score(y_test, y_pred)
class_report = classification_report(y_test, y_pred)        # recall: % of values picked out (i.e., % of winners picked); precision: % correct (i.e., accuracy of picks)
print(f"f1: {f1:,.5f}")
print(class_report)

# coefs_dict = {FEATURES[i]: log_reg_model.coef_[0][i] for i in range(len(FEATURES))}
# pprint(coefs_dict, sort_dicts=False)

In [None]:
for c in range(3):
    fig = px.histogram(
        x=[X[i][c] for i in range(len(X))],
    )
    fig.show()

In [None]:
''' Graph for Overfitting / Underfitting '''

## Spreads - Linear Regression

In [None]:
''' Viz '''


x = input_matchups_sl['Home_Team_Last_4_EPA_D_Play'].to_numpy()
y = input_matchups_sl['away_score'].to_numpy()

scatter = px.scatter(
    x=x, y=y, trendline='ols'
)
scatter.update_layout(template='nfl_template')
scatter.show()

In [None]:
''' Linear Regression - Spread '''

# Get X and y
X = input_matchups_sl[FEATURES].to_numpy()
y = input_matchups_sl['result'].to_numpy()
print(X.shape)
print(y.shape)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale the data (Standardization).
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

# Create a Logistic Regression model
lin_reg_model = LinearRegression() # Increased max_iter for convergence

# Train the model
lin_reg_model.fit(X_train, y_train)

# Make predictions
y_pred = lin_reg_model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
# Print evaluation metrics
print(f"R-squared: {r2:.4f}")
print(f"Mean squared error: {mse:.4f}")
print(f"Root mean squared error: {rmse:.4f}")

In [None]:
''' Score '''

# Get X and y
X = input_matchups_sl[FEATURES].to_numpy()

## Home Score ##
y = input_matchups_sl['home_score'].to_numpy()
print(X.shape)
print(y.shape)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a Logistic Regression model
home_score_model = LinearRegression() # Increased max_iter for convergence

# Train the model
home_score_model.fit(X_train, y_train)

# Make predictions
y_pred = home_score_model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
# Print evaluation metrics
print(f"R-squared: {r2:.4f}")
print(f"Mean squared error: {mse:.4f}")
print(f"Root mean squared error: {rmse:.4f}")


## Away Score ##
y = input_matchups_sl['away_score'].to_numpy()
print(X.shape)
print(y.shape)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a Logistic Regression model
away_score_model = LinearRegression() # Increased max_iter for convergence

# Train the model
away_score_model.fit(X_train, y_train)

# Make predictions
y_pred = away_score_model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
# Print evaluation metrics
print(f"R-squared: {r2:.4f}")
print(f"Mean squared error: {mse:.4f}")
print(f"Root mean squared error: {rmse:.4f}")



## Predictions

In [None]:
''' Predict a week '''

# Get inputs
X = matchups[FEATURES].to_numpy()

## WIN PROBABILITY
y_pred = log_reg_model.predict(X)
probs = log_reg_model.predict_proba(X)

## SPREAD
# results = lin_reg_model.predict(X)

## SCORES
home_scores = home_score_model.predict(X)
away_scores = away_score_model.predict(X)

## Collect results
predictions_df = matchups[['game_id', 'season', 'week', 'home_team', 'away_team', 'home_score', 'away_score', 'home_moneyline', 'away_moneyline', 'spread_line', 'away_spread_odds', 'home_spread_odds', 'total_line', 'under_odds', 'over_odds']].copy().reset_index(drop=True)

# Win Probs - Moneyline
predictions_df['prob_home'] = [probs[i][1] for i in range(len(probs))]
predictions_df['prob_away'] = [probs[i][0] for i in range(len(probs))]
predictions_df['pred'] = np.where(y_pred == 1, predictions_df['home_team'], predictions_df['away_team'])
predictions_df['pred_prob'] = predictions_df[['prob_home','prob_away']].max(axis=1)

predictions_df['pred_home_ml'] = np.where(predictions_df['prob_home'] > predictions_df['prob_away'],
                                  (-100*predictions_df['prob_home'])/(1 - predictions_df['prob_home']),
                                  ((1 - predictions_df['prob_home'])/predictions_df['prob_home'])*100).astype(int)
predictions_df['pred_away_ml'] = np.where(predictions_df['prob_away'] > predictions_df['prob_home'],
                                  (-100*predictions_df['prob_away'])/(1 - predictions_df['prob_away']),
                                  ((1 - predictions_df['prob_away'])/predictions_df['prob_away'])*100).astype(int)
                             
predictions_df['home_ml_value'] = predictions_df['home_moneyline'] - predictions_df['pred_home_ml']
predictions_df['away_ml_value'] = predictions_df['away_moneyline'] - predictions_df['pred_away_ml']
predictions_df['moneyline_value'] = np.where(predictions_df['home_ml_value'] >= predictions_df['away_ml_value'], 'home', 'away')

# Scores / Spread / Total
predictions_df['pred_home_score'] = home_scores
predictions_df['pred_away_score'] = away_scores
predictions_df['pred_winner_score'] = np.where(predictions_df['pred_home_score'] >= predictions_df['pred_away_score'], 'home', 'away')
predictions_df['pred_spread'] = predictions_df['pred_home_score'] - predictions_df['pred_away_score']
predictions_df['spread_difference'] = predictions_df['pred_spread'] - predictions_df['spread_line']
predictions_df['spread_pick'] = np.where(predictions_df['spread_difference'] >= 0, 'home', 'away')
predictions_df['pred_total'] = home_scores + away_scores
predictions_df['total_pick'] = np.where(predictions_df['pred_total'] >= predictions_df['total_line'], 'over', 'under')


print(predictions_df.to_string())


# Visualize Predictions

In [None]:
def hex_to_rgb(hex_color):
    hex_color = hex_color.lstrip('#')  # Remove '#' if present
    
    # Extract the red, green, and blue components
    r_hex = hex_color[0:2]
    g_hex = hex_color[2:4]
    b_hex = hex_color[4:6]
    
    # Convert hexadecimal to decimal integers
    r = int(r_hex, 16)
    g = int(g_hex, 16)
    b = int(b_hex, 16)
    
    return (r, g, b)


print(hex_to_rgb('#003b75'))

In [None]:
''' Format Results for Visualization '''

viz_df = predictions_df.copy()

viz_df['pred_home_ml_viz'] = np.where(viz_df['pred_home_ml'] > 0, '+' + viz_df['pred_home_ml'].astype(str), viz_df['pred_home_ml'].astype(str))
viz_df['pred_away_ml_viz'] = np.where(viz_df['pred_away_ml'] > 0, '+' + viz_df['pred_away_ml'].astype(str), viz_df['pred_away_ml'].astype(str))


# Get logos and colors
logos_df = pd.read_csv('../data/NFL_teams_info.csv', usecols=['abbreviation', 'color', 'logoURL'])
logos_df.loc[logos_df['abbreviation'] == 'LAR', 'abbreviation'] = 'LA'
logos_df.loc[logos_df['abbreviation'] == 'WSH', 'abbreviation'] = 'WAS'
logos_df['color'] = '#' + logos_df['color']

viz_df = viz_df.merge(logos_df, left_on='home_team', right_on='abbreviation', how='left').drop(columns=['abbreviation']).rename(columns={'color': 'home_color', 'logoURL': 'home_logo'})
viz_df = viz_df.merge(logos_df, left_on='away_team', right_on='abbreviation', how='left').drop(columns=['abbreviation']).rename(columns={'color': 'away_color', 'logoURL': 'away_logo'})
print(viz_df.to_string())


In [None]:
''' Win Probability / Picks - Pie Charts '''

# Create pie chart for each game
titles = []
pie_charts = []
winner_logos = []
for i in viz_df.index:

    away_prob, home_prob = viz_df.loc[i, ['prob_away', 'prob_home']]
    away_team, home_team = viz_df.loc[i, ['away_team', 'home_team']]
    
    away_color, home_color = viz_df.loc[i, ['away_color', 'home_color']]
    away_logo, home_logo = viz_df.loc[i, ['away_logo', 'home_logo']]

    off_black = 'rgba(0,0,0,0.7)'
    pie_chart = go.Pie(
        values=[home_prob, away_prob],
        labels=[home_team, away_team],
        marker=dict(
            colors=[home_color if home_prob > away_prob else off_black, away_color if away_prob > home_prob else off_black], 
            pattern=dict(
                shape=["" if home_prob > away_prob else "x", "" if away_prob > home_prob else "x"],
                size=4,
            ),
            line=dict(color='#f3f3f3', width=2),
        ),
        textposition='outside',
        textinfo='percent+label',
        hole=0.6,
        textfont=dict(weight='bold'),
        sort=False
    )

    pie_charts.append(pie_chart)
    titles.append(f'{away_team} vs. {home_team}')
    winner_logos.append(away_logo if away_prob > home_prob else home_logo)


## Create Figure ##
N_COLS = 2
N_ROWS = math.ceil(len(viz_df) / N_COLS)
domain = [[{"type": "domain"} for i in range(N_COLS)] for i in range(N_ROWS)]

H_SPACING = 0.075
V_SPACING = 0.075
fig = make_subplots(rows=N_ROWS, cols=N_COLS, specs=domain, subplot_titles=titles, 
                    vertical_spacing=V_SPACING, horizontal_spacing=H_SPACING)

# Add charts to plot
i = 0

total_v_spacing = ((N_ROWS - 1)*V_SPACING)
total_v_available = 1 - total_v_spacing
row_size = total_v_available / N_ROWS

total_h_spacing = ((N_COLS - 1)*H_SPACING)
total_h_available = 1 - total_h_spacing
col_size = total_h_available / N_COLS

for r in range(N_ROWS):
    for c in range(N_COLS):
        # Pie
        fig.add_trace(
            pie_charts[i],
            row=r+1,
            col=c+1
        )

        # Winner logo
        fig.add_layout_image(
            source=winner_logos[i],  # The loaded image
            xref="paper",    # Reference x-coordinates to the x-axis
            yref="paper",    # Reference y-coordinates to the y-axis
            x=(col_size / 2) + (col_size*c + H_SPACING*c), # Y-coordinate of the image's center
            y=(1 - (row_size / 2)) - (row_size*r + V_SPACING*r), # Y-coordinate of the image's center
            sizex=(1/N_ROWS)*.2,   # Width of the image in data units
            sizey=(1/N_ROWS)*.2,   # Height of the image in data units
            xanchor="center", # Anchor the image by its center horizontally
            yanchor="middle", # Anchor the image by its middle vertically
            layer="above", # Place image above other plot elements
            opacity=0.9,
        )

        i += 1
        if i > len(viz_df.index) - 1:
            break

# Format
fig.for_each_annotation(lambda a: a.update(font=dict(size=14, weight='bold')))

fig.update_layout(
    template='nfl_template',
    paper_bgcolor='#f0f0f0',
    title=dict(
        text=f'<B>NFL Week {PREDICTION_WEEK} <span style="color: #D5A15D">Picks</span></b><br><sup>Win Probability Model</sup>',
    ),
    margin=dict(t=100, b=50, l=50, r=50),
    showlegend=False,
    height=600,
    width=700,
)

# Credits
fig.add_annotation(
    text=f'EPA / Play from teams\' last 4, 8, 12, and 16 games, in all 3 phases<br>Figure & Model: @clankeranalytic | Data: nfl_data_py | {datetime.today():%Y-%m-%d}',
    showarrow=False,
    xref='paper',
    yref='paper',
    y=-0.075, 
    x=1,
    align='right'
)
fig.show()


# Export
# pio.write_image(fig, f'{FOLDER}/Week {PREDICTION_WEEK} Win Probability.png', scale=6, width=700, height=600)


In [None]:
''' Predictions '''

from PIL import Image
import requests
from io import BytesIO


BORDER_COLOR = '#989898'

tables = []
winner_logos = []
for i in viz_df.index:
    
    away_team, home_team = viz_df.loc[i, ['away_team', 'home_team']]
    away_prob, home_prob = viz_df.loc[i, ['prob_away', 'prob_home']]
    away_team_ml, home_team_ml = viz_df.loc[i, ['pred_away_ml_viz', 'pred_home_ml_viz']]
    away_color, home_color = viz_df.loc[i, ['away_color', 'home_color']]
    away_logo, home_logo = viz_df.loc[i, ['away_logo', 'home_logo']]
   
    # Spread
    spread_line = viz_df.loc[i, 'spread_line']
    home_spread_odds = viz_df.loc[i, 'home_spread_odds'].astype(int)
    away_spread_odds = viz_df.loc[i, 'away_spread_odds'].astype(int)
    spread_pick = viz_df.loc[i, 'spread_pick']

    away_spread = f'+{abs(spread_line)}' if spread_line > 0 else f'-{abs(spread_line)}'
    away_spread += f' ({away_spread_odds})'
    home_spread = f'-{abs(spread_line)}' if spread_line > 0 else f'+{abs(spread_line)}'
    home_spread += f' ({home_spread_odds})'

    # Total
    total_line = viz_df.loc[i, 'total_line']
    total_pick = viz_df.loc[i, 'total_pick']

    # ML
    home_moneyline = viz_df.loc[i, 'home_moneyline'].astype(int)
    home_moneyline = f'+{abs(home_moneyline)}' if home_moneyline > 0 else f'-{abs(home_moneyline)}'
    away_moneyline = viz_df.loc[i, 'away_moneyline'].astype(int)
    away_moneyline = f'+{abs(away_moneyline)}' if away_moneyline > 0 else f'-{abs(away_moneyline)}'
    moneyline_value = viz_df.loc[i, 'moneyline_value']

    # Table
    table = go.Table(
        columnwidth=[2,4,3,2],
        header=dict(
            values=['', 'Spread', 'Total', 'ML'],
            line_color=['rgba(0,0,0,0)']+[BORDER_COLOR]*3,
            fill_color=['rgba(0,0,0,0)']+['white']*3,
            align=['center', 'center'],
            font=dict(size=10)
        ),
        cells=dict(
            values=[[away_team, home_team], 
                    [away_spread, home_spread], 
                    [f'O {total_line}', f'U {total_line}'],
                    [away_moneyline, home_moneyline]],
            line_color=[BORDER_COLOR]*4,
            line_width=1,
            fill_color=[[away_color, home_color],
                        ['white' if spread_pick == 'home' else 'rgba(75,181,67,0.7)', 'white' if spread_pick == 'away' else 'rgba(75,181,67,0.7)'],
                        ['white' if total_pick == 'under' else 'rgba(75,181,67,0.7)', 'white' if total_pick == 'over' else 'rgba(75,181,67,0.7)'],
                        ['white' if moneyline_value == 'home' else 'rgba(75,181,67,0.7)', 'white' if moneyline_value == 'away' else 'rgba(75,181,67,0.7)']],
            font=dict(
                color=[['white', 'white'],
                       ['black' if spread_pick == 'home' else 'white', 'black' if spread_pick == 'away' else 'white'],
                       ['black' if total_pick == 'under' else 'white', 'black' if total_pick == 'over' else 'white'],
                       ['black' if moneyline_value == 'home' else 'white', 'black' if moneyline_value == 'away' else 'white']]
            )
        )
    )
    tables.append(table)
    winner_logos.append(away_logo if away_prob > home_prob else home_logo)


## Make Figure
N_ROWS = math.ceil(len(tables) / 2)
N_COLS = 2
fig = make_subplots(rows=N_ROWS, cols=N_COLS*2, 
                    specs=[[{"type": "table"}, {"type": "xy"}]*N_COLS for i in range(N_ROWS)],
                    subplot_titles=['', 'Projected<br>Winner']*len(tables),
                    horizontal_spacing=0.05,
                    column_widths=[6,1,6,1])

# Add tables / winner logos
i = 0
for r in range(1,N_ROWS+1):
    for c in range(1,N_COLS+1):

        ## Logo
        response = requests.get(winner_logos[i])
        img = Image.open(BytesIO(response.content))

        logo_trace = px.imshow(img=img)
        fig.add_trace(
            logo_trace.data[0], 
            row=r, 
            col=c*2
        )

        ## Table
        fig.add_trace(
            tables[i],
            row=r,
            col=(c*2)-1
        )

        i += 1
        if i > len(tables) - 1:
            break

    
## Formatting
fig.update_annotations(font=dict(size=10, weight='bold'))

fig.update_layout(
    template='nfl_template',
    paper_bgcolor='#f0f0f0',
    plot_bgcolor='rgba(0,0,0,0)',
    title=dict(
        text=f'<b>NFL Week {PREDICTION_WEEK} <span style="color: #D5A15D">Betting Sheet</span></b><br><sup>Scoring Model</sup>',
        # y=0.965
    ),
    margin=dict(t=80, b=50, l=25, r=25),
    height=400,#1200,
    width=700
)
fig.update_xaxes(
    visible=False,
)
fig.update_yaxes(
    visible=False,
)

# Credits
fig.add_annotation(
    text=f'EPA / Play from teams\' last 4, 8, 12, and 16 games, in all 3 phases<br>Figure & Model: @clankeranalytic | Data: nfl_data_py | {datetime.today():%Y-%m-%d}',
    font=dict(size=8),
    showarrow=False,
    xref='paper',
    yref='paper',
    y=-.05,#-0.025, 
    x=1,
    align='right'
)

fig.show()

# Export
# pio.write_image(fig, f'{FOLDER}/Week {PREDICTION_WEEK} Predictions.png', scale=6, width=700, height=400)

In [None]:
viz_df.to_excel(f'{FOLDER}/Week {PREDICTION_WEEK} Predictions.xlsx', index=False)

## (After Week) Results

In [None]:

pred_week_viz_df = pd.read_excel(f'{FOLDER}/Week {PREDICTION_WEEK} Predictions.xlsx', sheet_name='Sheet1')
pred_week_viz_df = pred_week_viz_df.drop(columns=['home_score', 'away_score'])

## Get Results
week_results_df = pred_week_viz_df.merge(master_matchups_df[['game_id', 'home_score', 'away_score', 'winner', 'result', 'total']], on='game_id', how='left')

## Evaluate

# Game pick
week_results_df['winner'] = np.where(week_results_df['winner'] == 0, week_results_df['away_team'], week_results_df['home_team'])
week_results_df['winner_correct?'] = np.where(week_results_df['winner'] == week_results_df['pred'], 1, 0)

# Spread picks
week_results_df['spread_result'] = np.where(week_results_df['result'] > week_results_df['spread_line'], 'home', 'away')
week_results_df['spread_correct?'] = np.where(week_results_df['spread_pick'] == week_results_df['spread_result'], 1, 0)

# Total picks
week_results_df['total_result'] = np.where(week_results_df['total'] > week_results_df['total_line'], 'over', 'under')
week_results_df['total_correct?'] = np.where(week_results_df['total_pick'] == week_results_df['total_result'], 1, 0)

## Add Actual EPA
cols = ['Plays_O', 'EPA_O_Play', 'Plays_D','EPA_D_Play', 'Plays_ST', 'EPA_ST_Play']

week_epa = master_epa_df.loc[master_epa_df.index.get_level_values(0) == c_master_week, :].reset_index()
# print(week_epa.head().to_string())

# Home Team EPA
week_results_df = week_results_df.merge(week_epa[['team'] + cols], left_on='home_team', right_on='team', how='left').rename(columns={
    col: f'home_team_{col}' for col in cols
}).drop(columns=['team'])

# Away Team EPA
week_results_df = week_results_df.merge(week_epa[['team'] + cols], left_on='away_team', right_on='team', how='left').rename(columns={
    col: f'away_team_{col}' for col in cols
}).drop(columns=['team'])

week_results_df = week_results_df.reindex(columns=[
        'season', 'week', 'home_team', 'away_team', 'home_score', 'away_score',
        'prob_home', 'pred_home_ml_viz', 'prob_away', 'pred_away_ml_viz',
        'pred', 'winner', 'winner_correct?', 
        'pred_home_score', 'pred_away_score', 
        'spread_line', 'pred_spread', 'spread_pick', 'result', 'spread_result', 'spread_correct?',
        'total_line', 'pred_total', 'total_pick', 'total', 'total_result', 'total_correct?',
        'home_team_EPA_O_Play', 'home_team_EPA_D_Play', 'home_team_EPA_ST_Play', 
        'away_team_EPA_O_Play', 'away_team_EPA_D_Play', 'away_team_EPA_ST_Play'])

print(week_results_df.columns)
print(week_results_df.to_string())

## Export
week_results_df.to_excel(f'{FOLDER}/Week {PREDICTION_WEEK} Results.xlsx', index=False, sheet_name='Results')