In [1]:
import time
import multiprocessing as mp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('fivethirtyeight')

from nfl_pickem import Pickem

In [None]:
pk = Pickem()
print(pk.file_path)
pk.pull_data()
df = pk.build_schedule()

In [None]:
team = 'NE'
cond = df.team == team
plt.plot(df[cond].week.values,
         df[cond].win_prob.values - df[cond].elo_prob1.values,
         label=team+' Win Prob')
plt.scatter(df[cond&(df.result1 > 0.9)].week.values,
            0.3*df[cond&(df.result1 > 0.9)].result1.values - 0.15,
            label='Win', c='C3')
plt.scatter(df[cond&(df.result1 < 0.9)].week.values,
            0.3*df[cond&(df.result1 < 0.9)].result1.values - 0.15,
            label='Lose/Tie', c='C2')
plt.ylabel('Win Prob Difference (from Week 1 ELO)')
plt.xlabel('Week')
plt.xticks(df[cond].week.values,
           df[cond].week.values)
plt.legend(loc='lower right')
plt.show()

## Historical Validation

In [None]:
df = pd.read_csv('results/strategy_analysis.csv')
print(df.shape)

In [None]:
df = df.sort_values(by=['season', 'max_week'])
for season in df.season.unique():
    cond = df.season == season
    plt.plot(df[cond].max_week.values,
             df[cond].elim_week.values,
             label=season)
plt.legend()
plt.show()

In [None]:
ew = df.groupby('max_week')['elim_week'].agg(['mean', 'std']).reset_index()
plt.plot(ew.max_week.values,
         ew['mean'].values)
plt.errorbar(ew.max_week.values,
             ew['mean'].values,
             yerr=ew['std'].values,
             elinewidth=2)
plt.xticks(ew.max_week.values,
           ew.max_week.values)
plt.xlabel('Max Forecast Distance')
plt.ylabel('Expected Elimination Week')
plt.title('Pool Elimination (1997-2017)')
plt.show()

In [None]:
ew = df.groupby('max_week')['correct'].agg(['mean', 'std']).reset_index()
plt.plot(ew.max_week.values,
         ew['mean'].values)
plt.errorbar(ew.max_week.values,
             ew['mean'].values,
             yerr=ew['std'].values,
             elinewidth=2)
plt.xticks(ew.max_week.values,
           ew.max_week.values)
plt.xlabel('Max Forecast Distance')
plt.ylabel('Expected Correct Picks')
plt.title('Pick Success (1997-2017)')
plt.show()

## Checking New Model

In [26]:
pk = Pickem()
pk.file_path = '../nfl-pickem/data/nfl_elo.csv'
pk.pull_data()
pk.data_.head()

Data successfully pulled!
Seasons 1920-2019: 16541 Games


Unnamed: 0,date,season,neutral,playoff,team1,team2,elo1_pre,elo2_pre,elo_prob1,elo_prob2,...,qb1_game_value,qb2_game_value,qb1_value_post,qb2_value_post,qbelo1_post,qbelo2_post,score1,score2,weekday,week
0,1920-09-26,1920,0,,RII,STP,1503.947,1300.0,0.824651,0.175349,...,,,,,,,48.0,0.0,6,1
1,1920-10-03,1920,0,,DAY,COL,1493.002,1504.908,0.575819,0.424181,...,,,,,,,14.0,0.0,6,2
2,1920-10-03,1920,0,,RII,MUN,1516.108,1478.004,0.644171,0.355829,...,,,,,,,45.0,0.0,6,2
3,1920-10-03,1920,0,,CHI,MUT,1368.333,1300.0,0.682986,0.317014,...,,,,,,,20.0,0.0,6,2
4,1920-10-03,1920,0,,CBD,PTQ,1504.688,1300.0,0.825267,0.174733,...,,,,,,,48.0,0.0,6,2


In [15]:
df = pk.build_schedule(season=2019,
                       qb_elo_model=True)
df[['qbelo1_pre', 'qb1_adj']].head()

Unnamed: 0,qbelo1_pre,qb1_adj
0,,5.972962
1,,-9.249038
2,,-0.985091
3,,3.412303
4,,10.920207


In [17]:
df[['qbelo1_pre', 'qb1_adj']]

Unnamed: 0,qbelo1_pre,qb1_adj
0,,
1,,
2,,
3,,
4,,
5,,
6,,
7,,
8,,
9,,


In [6]:
1/(1+10**(-(1598.852911-1335.76766-65)/400))

0.7577292749282232

In [33]:
cols = ['date', 'neutral', 'team1', 'team2', 'elo1_pre',
        'elo2_pre', 'qbelo1_pre', 'qbelo2_pre',
        'qb1_value_pre', 'qb2_value_pre', 'qb1_adj', 'qb2_adj',
        'qbelo_prob1', 'qbelo_prob2', 'qbelo1_post', 'qbelo2_post',]
df[(df.season == 2019)&
   ((df.team1 == 'GB')|(df.team2 == 'GB'))][cols].head(10)

Unnamed: 0,date,neutral,team1,team2,elo1_pre,elo2_pre,qbelo1_pre,qbelo2_pre,qb1_value_pre,qb2_value_pre,qb1_adj,qb2_adj,qbelo_prob1,qbelo_prob2,qbelo1_post,qbelo2_post
16274,2019-09-08,0,CHI,GB,1588.897931,1455.13137,,,174.958269,191.502166,5.972962,2.87846,0.640182,0.359818,,
16291,2019-09-15,0,GB,MIN,1455.13137,1538.423954,,,191.502166,175.325153,2.87846,0.36782,0.554531,0.445469,,
16307,2019-09-22,0,GB,DEN,1455.13137,1449.164195,,,191.502166,146.884985,2.87846,2.159314,0.664569,0.335431,,
16322,2019-09-29,0,GB,PHI,1455.13137,1581.513559,,,191.502166,183.509193,2.87846,2.600803,0.49161,0.50839,,
16349,2019-10-06,0,DAL,GB,1546.637605,1455.13137,,,181.381832,191.502166,4.648537,2.87846,0.618724,0.381276,,
16365,2019-10-13,0,GB,DET,1455.13137,1490.519409,,,191.502166,141.238569,2.87846,-4.964783,0.665872,0.334128,,
16371,2019-10-20,0,GB,OAK,1455.13137,1405.440618,,,191.502166,137.090067,2.87846,-3.553793,0.728441,0.271559,,
16393,2019-10-27,0,KC,GB,1602.077427,1455.13137,,,221.811811,191.502166,6.970182,2.87846,0.702843,0.297157,,
16406,2019-11-03,0,LAC,GB,1585.751918,1455.13137,,,167.622048,191.502166,-2.392168,2.87846,0.666445,0.333555,,
16416,2019-11-10,0,GB,CAR,1455.13137,1519.378772,,,191.502166,177.316184,2.87846,1.455572,0.592488,0.407512,,


In [31]:
1/(1+10**(-1*(1588.897931+55-1455.13137+174-191+4*185.5/1000)/400))

0.7296877838848084

In [24]:
df.columns

Index(['date', 'season', 'neutral', 'playoff', 'team1', 'team2', 'elo1_pre',
       'elo2_pre', 'elo_prob1', 'elo_prob2', 'elo1_post', 'elo2_post',
       'qbelo1_pre', 'qbelo2_pre', 'qb1', 'qb2', 'qb1_value_pre',
       'qb2_value_pre', 'qb1_adj', 'qb2_adj', 'qbelo_prob1', 'qbelo_prob2',
       'qb1_game_value', 'qb2_game_value', 'qb1_value_post', 'qb2_value_post',
       'qbelo1_post', 'qbelo2_post', 'score1', 'score2', 'weekday', 'week'],
      dtype='object')

In [None]:
(-400*np.log10(1/df.qbelo_prob1.iloc[15751]-1)-(1592.676645+55-1588.11685+18.210896+2.857484))*1000/4

In [None]:
2581*4/1000

In [None]:
df.qbelo_prob1.iloc[15751]

In [None]:
tmp = df[(df.season == 2017)]
tmp['elo_adj'] = -400*np.log10(1/df.elo_prob1 - 1) - (tmp['elo1_pre'] - tmp['elo2_pre'])
tmp[['neutral', 'elo_adj']]