In [1]:
# Import Pandas and numpy libraries
# Import 'model_df' dataset for training/testing prediction model
# Import 'validation_df' dataset for validating model/making predictions on current season
# Import games with lines data for comparing model predictions against the spread/betting lines

import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

model_df = pd.read_csv('model_df_final.csv')
validation_df = pd.read_csv('validation_df.csv')
games_with_lines_df = pd.read_csv('games_with_lines.csv')
games_with_lines_2024_df = pd.read_csv('games_with_lines_2024.csv')
rankings_2024_df = pd.read_csv('rankings_2024.csv')
records_2024_df = pd.read_csv('records_2024.csv')

In [2]:
# Create an elo_index to compare team strength for use in prediction model
# Create a homefield advantage column (1 for home team, 0 for neutral site, -1 for away team)

model_df['elo_index'] = model_df['pregame_elo'] / model_df['opp_pregame_elo']
validation_df['elo_index'] = validation_df['pregame_elo'] / validation_df['opp_pregame_elo']

model_df['hfa'] = np.where(model_df['neutral_site']==True,0,np.where(model_df['home_away']==1,1,-1))
validation_df['hfa'] = np.where(validation_df['neutral_site']==True,0,np.where(validation_df['home_away']==1,1,-1))

In [3]:
# Import necessary sklearn libraries and create linear regression model to predict scoring output

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

X = model_df[['elo_index','hfa','games','off_bye_week','opp_games','opp_off_bye_week','swing_explosiveness','swing_field_position','swing_dbs_havoc','swing_f7_havoc','swing_overall_havoc','swing_line_yards','swing_open_field_yards',
              'swing_passing_downs_explosiveness','swing_passing_downs_ppa','swing_passing_downs_rate','swing_passing_downs_success_rate','swing_passing_explosiveness',
              'swing_passing_ppa','swing_passing_success_rate','swing_points_per_opp','swing_power_success','swing_rushing_explosiveness',
              'swing_rushing_ppa','swing_second_level_yards','swing_standard_downs_explosiveness','swing_standard_downs_ppa','swing_standard_downs_rate',
              'swing_standard_downs_success_rate','swing_stuff_rate','swing_overall_success_rate','ant_yield']]

y = model_df['points']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse_model = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
feature_importance = model.coef_
feat_dict = dict(zip(X,feature_importance))

print(f'Mean Squared Error: {mse_model}')
print(f'R-squared: {r2}')
print(f'Feature Importance: {feat_dict}')

Mean Squared Error: 140.991346493007
R-squared: 0.27104427545405263
Feature Importance: {'elo_index': 22.619171889855902, 'hfa': 1.4701348045728682, 'games': -0.3788924862778259, 'off_bye_week': -0.09224086472407268, 'opp_games': 0.3398826131651959, 'opp_off_bye_week': -0.06598910496416377, 'swing_explosiveness': -1.6858950831913406, 'swing_field_position': -0.20512124782342275, 'swing_dbs_havoc': -9.162215268933927, 'swing_f7_havoc': 8.455741219271987, 'swing_overall_havoc': -0.7064740496619678, 'swing_line_yards': -3.954894275267809, 'swing_open_field_yards': -0.37791118742513313, 'swing_passing_downs_explosiveness': 1.0019533067006314, 'swing_passing_downs_ppa': -5.302149913143255, 'swing_passing_downs_rate': 2.7251330024395077, 'swing_passing_downs_success_rate': -14.644417105890547, 'swing_passing_explosiveness': 3.312192135341503, 'swing_passing_ppa': -11.199145544779936, 'swing_passing_success_rate': 27.060675243699258, 'swing_points_per_opp': -0.07976678067284057, 'swing_power_

In [4]:
# Run the model on the model dataframe to evaluate performance in predicting winners and against the spread
# Gives an expectation on predicting wins/losses vs model accuracy 

model_df['jay'] = model.predict(X)

In [5]:
# Merge with historical games data to compare team's scoring output on the head-to-head level and ATS

model_comp_df = games_with_lines_df.merge(model_df, left_on=['id','home_team'],
                                      right_on=['game_id','team']).merge(model_df, left_on=['id','away_team'],
                                                                                      right_on=['game_id','team'],
                                                                        suffixes=['_home','_away'])

model_comp_df['margin'] = model_comp_df['away_points']-model_comp_df['home_points']

In [6]:
# Take necessary columns and compare scoring output from actual game, model, and betting lines
# Create rounded fields to better present data in dashboard

wp_check = ['id','season','week','start_date','home_team','home_conference','home_points','away_team','away_conference','away_points','margin',
            'spread','over_under','jay_home','jay_away']

jay_model_check = model_comp_df.loc[:, wp_check]

# jay_model_check = jay_model_check.loc[jay_model_check['season']==2023]

jay_model_check['jay_game_margin'] = jay_model_check['jay_away']-jay_model_check['jay_home']

jay_model_check['rounded_jay_home'] = np.where((jay_model_check['jay_home'].round() == jay_model_check['jay_away'].round()) &
                                                     (jay_model_check['jay_home'] > jay_model_check['jay_away']),
                                                     jay_model_check['jay_home'].round()+1,
                                                     jay_model_check['jay_home'].round())

jay_model_check['rounded_jay_away'] = np.where((jay_model_check['jay_home'].round() == jay_model_check['jay_away'].round()) &
                                                     (jay_model_check['jay_away'] > jay_model_check['jay_home']),
                                                     jay_model_check['jay_away'].round()+1,
                                                     jay_model_check['jay_away'].round())

jay_model_check['jay_total'] = jay_model_check['jay_home']+jay_model_check['jay_away']
jay_model_check['point_total'] = jay_model_check['home_points']+jay_model_check['away_points']

jay_model_check['jay_spread'] = (np.ceil(jay_model_check['jay_away'] * 2) / 2) - (np.ceil(jay_model_check['jay_home'] * 2) / 2)
jay_model_check['rounded_jay_total'] = round(jay_model_check['jay_total']*2)/2


jay_model_check.loc[jay_model_check['home_points'] > jay_model_check['away_points'], 'home_win'] = 1
jay_model_check.loc[jay_model_check['away_points'] > jay_model_check['home_points'], 'home_win'] = 0

jay_model_check.loc[jay_model_check['jay_home'] > jay_model_check['jay_away'], 'home_win_jay'] = 1
jay_model_check.loc[jay_model_check['jay_away'] > jay_model_check['jay_home'], 'home_win_jay'] = 0

jay_model_check.loc[jay_model_check['home_win'] == jay_model_check['home_win_jay'], 'correct_pick_jay'] = 1
jay_model_check.loc[jay_model_check['home_win'] != jay_model_check['home_win_jay'], 'correct_pick_jay'] = 0
jay_model_check.loc[jay_model_check['home_points'].isnull(), 'correct_pick_jay'] = None

jay_model_check.loc[(jay_model_check['home_points']+jay_model_check['spread']) > jay_model_check['away_points'], 'home_cover_vegas'] = 1
jay_model_check.loc[(jay_model_check['home_points']+jay_model_check['spread']) < jay_model_check['away_points'], 'home_cover_vegas'] = 0

jay_model_check.loc[(jay_model_check['rounded_jay_home']+jay_model_check['spread']) > jay_model_check['rounded_jay_away'], 'jay_cover'] = 1
jay_model_check.loc[(jay_model_check['rounded_jay_home']+jay_model_check['spread']) < jay_model_check['rounded_jay_away'], 'jay_cover'] = 0


jay_model_check.loc[jay_model_check['home_cover_vegas'] == jay_model_check['jay_cover'], 'correct_pick_jay_ats'] = 1
jay_model_check.loc[jay_model_check['home_cover_vegas'] != jay_model_check['jay_cover'], 'correct_pick_jay_ats'] = 0
jay_model_check.loc[jay_model_check['home_cover_vegas'].isnull(), 'correct_pick_jay_ats'] = None
jay_model_check.loc[jay_model_check['jay_cover'].isnull(), 'correct_pick_jay_ats'] = None

jay_model_check.loc[jay_model_check['point_total'] > jay_model_check['over_under'], 'over_total'] = 1
jay_model_check.loc[jay_model_check['point_total'] < jay_model_check['over_under'], 'over_total'] = 0

jay_model_check.loc[jay_model_check['rounded_jay_total'] > jay_model_check['over_under'], 'jay_over'] = 1
jay_model_check.loc[jay_model_check['rounded_jay_total'] < jay_model_check['over_under'], 'jay_over'] = 0

jay_model_check.loc[jay_model_check['over_total'] == jay_model_check['jay_over'], 'correct_jay_over'] = 1
jay_model_check.loc[jay_model_check['over_total'] != jay_model_check['jay_over'], 'correct_jay_over'] = 0
jay_model_check.loc[jay_model_check['over_total'].isnull(), 'correct_jay_over'] = None
jay_model_check.loc[jay_model_check['jay_over'].isnull(), 'correct_jay_over'] = None

# mse_jay_spread = mean_squared_error(jay_model_check['margin'], jay_model_check['jay_spread'])
# mse_spread = mean_squared_error(jay_model_check['margin'], jay_model_check['spread'])

# mse_jay_points = mean_squared_error(jay_model_check['point_total'], jay_model_check['rounded_jay_total'])
# mse_points = mean_squared_error(jay_model_check['point_total'], jay_model_check['over_under'])

print('Historical Model Prediction Records')

print('M/L Record: ',round(jay_model_check['correct_pick_jay'].sum(skipna=True)),'-',
      round(jay_model_check['home_points'].count()-jay_model_check['correct_pick_jay'].sum(skipna=True)),
      ' (',round(jay_model_check['correct_pick_jay'].sum(skipna=True)/jay_model_check['home_points'].count(),3),')',sep="")

print('ATS Record: ',round(jay_model_check['correct_pick_jay_ats'].sum(skipna=True)),'-',
      round(jay_model_check['correct_pick_jay_ats'].count()-jay_model_check['correct_pick_jay_ats'].sum(skipna=True)),'-',
      round(jay_model_check['home_points'].count()-jay_model_check['correct_pick_jay_ats'].count()),
      ' (',round(jay_model_check['correct_pick_jay_ats'].sum(skipna=True)/jay_model_check['correct_pick_jay_ats'].count(),3),')',sep="")

print('O/U Record: ',round(jay_model_check['correct_jay_over'].sum(skipna=True)),'-',
      round(jay_model_check['correct_jay_over'].count()-jay_model_check['correct_jay_over'].sum(skipna=True)),'-',
      round(jay_model_check['home_points'].count()-jay_model_check['correct_jay_over'].count()),
      ' (',round(jay_model_check['correct_jay_over'].sum(skipna=True)/jay_model_check['correct_jay_over'].count(),3),')',sep="")

# print(f'Mean Squared Error (jayMo spread): {mse_spread}')
# print(f'Mean Squared Error (Vegas spread): {mse_jay_spread}')
# print(f'Mean Squared Error (jayMo points): {mse_points}')
# print(f'Mean Squared Error (Vegas points): {mse_jay_points}')

Historical Model Prediction Records
M/L Record: 2461-921 (0.728)
ATS Record: 1558-1619-205 (0.49)
O/U Record: 1671-1514-197 (0.525)


In [7]:
# Import sklearn logistic regression package to calculate a teams win probability based on their predicted scoring output
# Gives us the accuracy of the model for predicting wins/losses (72.7%)

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


X = jay_model_check[['jay_game_margin']]
y = jay_model_check['home_win']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
win_prob = LogisticRegression()

win_prob.fit(X_train, y_train)

y_pred = win_prob.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7267355982274741


In [8]:
# Run prediction model on validation data

valid_X = validation_df[['elo_index','hfa','games','off_bye_week','opp_games','opp_off_bye_week','swing_explosiveness','swing_field_position','swing_dbs_havoc','swing_f7_havoc','swing_overall_havoc','swing_line_yards','swing_open_field_yards',
              'swing_passing_downs_explosiveness','swing_passing_downs_ppa','swing_passing_downs_rate','swing_passing_downs_success_rate','swing_passing_explosiveness',
              'swing_passing_ppa','swing_passing_success_rate','swing_points_per_opp','swing_power_success','swing_rushing_explosiveness',
              'swing_rushing_ppa','swing_second_level_yards','swing_standard_downs_explosiveness','swing_standard_downs_ppa','swing_standard_downs_rate',
              'swing_standard_downs_success_rate','swing_stuff_rate','swing_overall_success_rate','ant_yield']]

validation_df['jay'] = model.predict(valid_X)

In [9]:
# Merge with 2024 games data to compare team's scoring output on the head-to-head level and ATS

validation_comp_df = games_with_lines_2024_df.merge(validation_df,
                                                    left_on=['game_id','home_team'],
                                                    right_on=['game_id','team']).merge(validation_df,
                                                                       left_on=['game_id','away_team'],
                                                                       right_on=['game_id','team'],
                                                                       suffixes=('_home','_away'))

In [10]:
# Take necessary columns and compare scoring output from actual game, model, and betting lines
# Create rounded fields to better present data in dashboard

wp_check = ['game_id','season','week','start_date_x','start_time_tbd',
            'home_team','abbreviation_home','home_conference','home_points','logo_home','color_home',
            'away_team','abbreviation_away','away_conference','away_points','logo_away','color_away',
            'spread','over_under','jay_home','jay_away']

jay_predictions = validation_comp_df.loc[:, wp_check]

jay_predictions = jay_predictions.rename(columns={"start_date_x": "start_date"})

jay_predictions['jay_game_margin'] = jay_predictions['jay_away']-jay_predictions['jay_home']

jay_predictions['rounded_jay_home'] = np.where((jay_predictions['jay_home'].round() == jay_predictions['jay_away'].round()) &
                                                     (jay_predictions['jay_home'] > jay_predictions['jay_away']),
                                                     jay_predictions['jay_home'].round()+1,
                                                     jay_predictions['jay_home'].round())

jay_predictions['rounded_jay_away'] = np.where((jay_predictions['jay_home'].round() == jay_predictions['jay_away'].round()) &
                                                     (jay_predictions['jay_away'] > jay_predictions['jay_home']),
                                                     jay_predictions['jay_away'].round()+1,
                                                     jay_predictions['jay_away'].round())
                                        
jay_predictions['jay_total'] = jay_predictions['jay_home']+jay_predictions['jay_away']
jay_predictions['point_total'] = jay_predictions['home_points']+jay_predictions['away_points']

jay_predictions['jay_spread'] = (np.ceil(jay_predictions['jay_away'] * 2) / 2) - (np.ceil(jay_predictions['jay_home'] * 2) / 2)
jay_predictions['rounded_jay_total'] = round(jay_predictions['jay_total']*2)/2


jay_predictions.loc[jay_predictions['home_points'] > jay_predictions['away_points'], 'home_win'] = 1
jay_predictions.loc[jay_predictions['away_points'] > jay_predictions['home_points'], 'home_win'] = 0

jay_predictions.loc[jay_predictions['home_points'] < jay_predictions['away_points'], 'away_win'] = 1
jay_predictions.loc[jay_predictions['away_points'] < jay_predictions['home_points'], 'away_win'] = 0

jay_predictions.loc[jay_predictions['jay_home'] > jay_predictions['jay_away'], 'home_win_jay'] = 1
jay_predictions.loc[jay_predictions['jay_away'] > jay_predictions['jay_home'], 'home_win_jay'] = 0

jay_predictions.loc[jay_predictions['home_win'] == jay_predictions['home_win_jay'], 'correct_pick_jay'] = 1
jay_predictions.loc[jay_predictions['home_win'] != jay_predictions['home_win_jay'], 'correct_pick_jay'] = 0
jay_predictions.loc[jay_predictions['home_points'].isnull(), 'correct_pick_jay'] = None

jay_predictions.loc[(jay_predictions['home_points']+jay_predictions['spread']) > jay_predictions['away_points'], 'home_cover_vegas'] = 1
jay_predictions.loc[(jay_predictions['home_points']+jay_predictions['spread']) < jay_predictions['away_points'], 'home_cover_vegas'] = 0

jay_predictions.loc[(jay_predictions['rounded_jay_home']+jay_predictions['spread']) > jay_predictions['rounded_jay_away'], 'jay_cover'] = 1
jay_predictions.loc[(jay_predictions['rounded_jay_home']+jay_predictions['spread']) < jay_predictions['rounded_jay_away'], 'jay_cover'] = 0


jay_predictions.loc[jay_predictions['home_cover_vegas'] == jay_predictions['jay_cover'], 'correct_pick_jay_ats'] = 1
jay_predictions.loc[jay_predictions['home_cover_vegas'] != jay_predictions['jay_cover'], 'correct_pick_jay_ats'] = 0
jay_predictions.loc[jay_predictions['home_cover_vegas'].isnull(), 'correct_pick_jay_ats'] = None
jay_predictions.loc[jay_predictions['jay_cover'].isnull(), 'correct_pick_jay_ats'] = None

jay_predictions.loc[jay_predictions['point_total'] > jay_predictions['over_under'], 'over_total'] = 1
jay_predictions.loc[jay_predictions['point_total'] < jay_predictions['over_under'], 'over_total'] = 0

jay_predictions.loc[jay_predictions['rounded_jay_total'] > jay_predictions['over_under'], 'jay_over'] = 1
jay_predictions.loc[jay_predictions['rounded_jay_total'] < jay_predictions['over_under'], 'jay_over'] = 0

jay_predictions.loc[jay_predictions['over_total'] == jay_predictions['jay_over'], 'correct_jay_over'] = 1
jay_predictions.loc[jay_predictions['over_total'] != jay_predictions['jay_over'], 'correct_jay_over'] = 0
jay_predictions.loc[jay_predictions['over_total'].isnull(), 'correct_jay_over'] = None
jay_predictions.loc[jay_predictions['jay_over'].isnull(), 'correct_jay_over'] = None


print('Current Season Prediction Records')

print('M/L Record: ',round(jay_predictions['correct_pick_jay'].sum(skipna=True)),'-',
      round(jay_predictions['home_points'].count()-jay_predictions['correct_pick_jay'].sum(skipna=True)),
      ' (',round(jay_predictions['correct_pick_jay'].sum(skipna=True)/jay_predictions['home_points'].count(),3),')',sep="")

print('ATS Record: ',round(jay_predictions['correct_pick_jay_ats'].sum(skipna=True)),'-',
      round(jay_predictions['correct_pick_jay_ats'].count()-jay_predictions['correct_pick_jay_ats'].sum(skipna=True)),'-',
      round(jay_predictions['home_points'].count()-jay_predictions['correct_pick_jay_ats'].count()),
      ' (',round(jay_predictions['correct_pick_jay_ats'].sum(skipna=True)/jay_predictions['correct_pick_jay_ats'].count(),3),')',sep="")

print('O/U Record: ',round(jay_predictions['correct_jay_over'].sum(skipna=True)),'-',
      round(jay_predictions['correct_jay_over'].count()-jay_predictions['correct_jay_over'].sum(skipna=True)),'-',
      round(jay_predictions['home_points'].count()-jay_predictions['correct_jay_over'].count()),
      ' (',round(jay_predictions['correct_jay_over'].sum(skipna=True)/jay_predictions['correct_jay_over'].count(),3),')',sep="")

Current Season Prediction Records
M/L Record: 496-217 (0.696)
ATS Record: 349-324-40 (0.519)
O/U Record: 359-302-52 (0.543)


In [11]:
# Score proximity metric to evaluate each game on how close the expected outcome was to the actual
# Eventually ended up simplifying in next cell

jay_predictions['home_away_margin'] = (jay_predictions['rounded_jay_home']-jay_predictions['home_points']).abs() + (jay_predictions['rounded_jay_away']-jay_predictions['away_points']).abs()

#jay_predictions['score_proximity'] = np.where(jay_predictions['home_points'].isnull(),
                                               #0,None)

#jay_predictions['score_proximity'] = np.where(jay_predictions['home_away_margin'] >= 31,
                                               #1,jay_predictions['score_proximity'])

#jay_predictions['score_proximity'] = np.where((jay_predictions['home_away_margin'] > 0) & (jay_predictions['home_away_margin'] >= 21) &
                                               #(jay_predictions['home_away_margin'] <=30),
                                               #2,jay_predictions['score_proximity'])

#jay_predictions['score_proximity'] = np.where((jay_predictions['home_away_margin'] > 0) & (jay_predictions['home_away_margin'] <= 20),
                                               #3,jay_predictions['score_proximity'])

#jay_predictions['score_proximity'] = np.where(jay_predictions['home_away_margin'] == 0,
                                               #4,jay_predictions['score_proximity'])

#jay_predictions['score_proximity'] = np.where(jay_predictions['correct_pick_jay'] == 0,
                                               #1,jay_predictions['score_proximity'])

#prox_check = ['week','home_team','home_points','rounded_jay_home',
            #'away_team','away_points','rounded_jay_away',
            #'home_away_margin','score_proximity']

#jay_predictions_prox_check = jay_predictions.loc[:, prox_check]

# jay_predictions_prox_check.loc[(jay_predictions_prox_check['week']==10)]

In [12]:
# Simplified version of 'score_proximity'
# Maybe revisit complex version ?

jay_predictions['score_proximity'] = np.where(jay_predictions['home_points'].isnull(),
                                              None,None)

jay_predictions['score_proximity'] = np.where(jay_predictions['correct_pick_jay'] == 1,
                                               1,jay_predictions['score_proximity'])

jay_predictions['score_proximity'] = np.where(jay_predictions['correct_pick_jay'] == 0,
                                               0,jay_predictions['score_proximity'])

jay_predictions['score_proximity'] = np.where(jay_predictions['home_away_margin'] == 0,
                                               2,jay_predictions['score_proximity'])

In [13]:
# Hit proximity metric to evaluate each game on how accurate betting predictions were

jay_predictions['hit_total'] = jay_predictions[['correct_pick_jay','correct_pick_jay_ats','correct_jay_over']].sum(axis=1)
jay_predictions['finished_hits'] = jay_predictions[['correct_pick_jay','correct_pick_jay_ats','correct_jay_over']].count(axis=1)

jay_predictions['hit_proximity'] = np.where(jay_predictions['hit_total']/jay_predictions['finished_hits']>=1,
                                            3,0)

jay_predictions['hit_proximity'] = np.where((jay_predictions['hit_total']/jay_predictions['finished_hits'] < 1) &
                                            (jay_predictions['hit_total']/jay_predictions['finished_hits'] > 0),
                                            2,jay_predictions['hit_proximity'])

jay_predictions['hit_proximity'] = np.where((jay_predictions['hit_total']/jay_predictions['finished_hits'] == 0),
                                            1,jay_predictions['hit_proximity'])

jay_predictions['hit_proximity'] = np.where(jay_predictions['home_points'].isnull(),
                                               0,jay_predictions['hit_proximity'])



hits_check = ['week','home_team','home_points','rounded_jay_home',
            'away_team','away_points','rounded_jay_away',
            'hit_total','finished_hits','hit_proximity']

jay_predictions_hits_check = jay_predictions.loc[:, hits_check]

# jay_predictions_hits_check.loc[(jay_predictions_hits_check['week']==10)]

In [14]:
# Calculate which teams are not the betting favorite, but the prediction model believes they will win (underdogs)
# Calculate those teams win probability

jay_predictions['jay_underdog'] = np.where(((jay_predictions['spread']<0) & (jay_predictions['jay_game_margin'] > 0)) | 
                                           ((jay_predictions['spread']>0) & (jay_predictions['jay_game_margin'] < 0)),
                                           1,0)

jay_predictions['home_win_probability'] = win_prob.predict_proba(jay_predictions[['jay_game_margin']])[:, 1]
jay_predictions['away_win_probability'] = 1-jay_predictions['home_win_probability']

jay_predictions['underdog_win_probability'] = np.where(jay_predictions['jay_underdog'] == 1,
                                                       np.where(((jay_predictions['spread']<0) & (jay_predictions['jay_game_margin'] > 0)),
                                                                jay_predictions['away_win_probability'],
                                                                jay_predictions['home_win_probability']),
                                                       None)



In [15]:
# Check the accuracy of betting lines provider in predicting wins/losses for comparison purposes

jay_predictions.loc[(jay_predictions['spread']<0) & (jay_predictions['home_win']==1), 'favorite_win'] = 1
jay_predictions.loc[(jay_predictions['spread']>0) & (jay_predictions['home_win']==0), 'favorite_win'] = 1

jay_predictions.loc[(jay_predictions['spread']>0) & (jay_predictions['home_win']==1), 'favorite_win'] = 0
jay_predictions.loc[(jay_predictions['spread']<0) & (jay_predictions['home_win']==0), 'favorite_win'] = 0

print('Picking Favorites Record: ',round(jay_predictions['favorite_win'].sum(skipna=True)),'-',
      round(jay_predictions['home_points'].count()-jay_predictions['favorite_win'].sum(skipna=True)),
      ' (',round(jay_predictions['favorite_win'].sum(skipna=True)/jay_predictions['favorite_win'].count(),3),')',sep="")

print('jayMo Predictions Record: ',round(jay_predictions['correct_pick_jay'].sum(skipna=True)),'-',
      round(jay_predictions['home_points'].count()-jay_predictions['correct_pick_jay'].sum(skipna=True)),
      ' (',round(jay_predictions['correct_pick_jay'].sum(skipna=True)/jay_predictions['home_points'].count(),3),')',sep="")

Picking Favorites Record: 511-202 (0.717)
jayMo Predictions Record: 496-217 (0.696)


In [16]:
# Add in AP rankings data

jay_predictions_rankings = jay_predictions.merge(rankings_2024_df,
                                                 left_on=['season','week','home_team'],
                                                 right_on=['year','week','school'],
                                                how='left').merge(rankings_2024_df,
                                                                  left_on = ['season','week','away_team'],
                                                                  right_on = ['year','week','school'],
                                                                  how='left',
                                                                  suffixes = ['_home','_away'])

jay_predictions_rankings = jay_predictions_rankings.drop(jay_predictions_rankings[['year_home','poll_home','school_home',
                                                                                   'year_away','poll_away','school_away']], axis=1)


In [17]:
# Add in (cumulative) records data
# Ensure no data was lost in the rankings/records merges

jay_predictions_records = jay_predictions_rankings.merge(records_2024_df,
                                                         left_on = ['season','week','home_team'],
                                                         right_on = ['season','week','team'],
                                                        how='left').merge(records_2024_df,
                                                                           left_on = ['season','week','away_team'],
                                                                           right_on = ['season','week','team'],
                                                                          how='left',
                                                                           suffixes = ['_home','_away'])


jay_predictions_records = jay_predictions_records.drop(jay_predictions_records[['team_home','home_win2_home','home_loss2_home','away_win2_home',
                                                                                'away_loss2_home','running_wins_home','running_losses_home',
                                                                                'team_away','home_win2_away','home_loss2_away','away_win2_away',
                                                                                'away_loss2_away','running_wins_away','running_losses_away',
                                                                               'week_lag_home','week_lag_away']], axis=1)

# jay_predictions_records = jay_predictions_records.drop([1,6,40])

if len(jay_predictions) == len(jay_predictions_records):
    print(True)
else:
    print(False)

True


In [18]:
# Export csv that powers Shiny app during offseason

jay_predictions_records.to_csv('jay_predictions.csv',index=False)