In [1]:
import pandas as pd
import datetime as dt
import random as rnd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

In [2]:
elo_df = pd.read_csv(r'data\elo_data_set.csv', parse_dates=[0])
teams = []
for i in elo_df['team'].unique(): #constructs list of team names
    teams.append(i)
elo_df.head()

Unnamed: 0,datetime,team,season,opp,day_week,week,W/L,ot,record,home/away,...,opp_pass_yards,opp_rush_yards,opp_turnover,offense_expected_points,defense_expected_points,sp_expected_points,team_elo_before,team_elo_after,opp_elo_before,opp_elo_after
0,2002-09-05 20:38:00,NYG,2002,SFO,Thu,1,0.0,0.0,0-1,HOME,...,166.0,113.0,1.0,-4.72,-0.79,3.46,1500.0,1482.5,1500.0,1517.5
1,2002-09-08 13:04:00,CAR,2002,BAL,Sun,1,1.0,0.0,1-0,HOME,...,212.0,77.0,1.0,-4.38,6.68,4.2,1500.0,1517.5,1500.0,1482.5
2,2002-09-08 13:04:00,CHI,2002,MIN,Sun,1,1.0,0.0,1-0,HOME,...,228.0,140.0,3.0,6.37,-1.2,1.01,1500.0,1517.5,1500.0,1482.5
3,2002-09-08 13:04:00,CLE,2002,KAN,Sun,1,0.0,0.0,0-1,HOME,...,276.0,194.0,1.0,15.18,-17.94,1.15,1500.0,1482.5,1500.0,1517.5
4,2002-09-08 13:04:00,BUF,2002,NYJ,Sun,1,0.0,1.0,0-1,HOME,...,193.0,73.0,1.0,3.16,0.59,-7.81,1500.0,1482.5,1500.0,1517.5


In [3]:
# separates out played games, and the upcoming week.
#season_df = elo_df[elo_df['season']==dt.datetime.today()]
season_df = elo_df[elo_df['season']==dt.datetime.today().year]

history_df = elo_df[elo_df['datetime'] < dt.datetime.today()]

future_df = season_df[season_df['datetime'] > dt.datetime.today()]


upcoming_week = history_df['week'].iloc[-1] + 1
future_df = future_df[future_df['week'] == upcoming_week]
future_df = future_df[['datetime', 'team', 'opp', 'team_score', 'opp_score', 'team_elo_before', 'opp_elo_before', 'W/L']]
print(f'Week to predict : {upcoming_week}')

Week to predict : 8


In [4]:
# Generates a dictionary of dataframes for points gained/allowed per team. This allows convenient generation of the distributions fed into the game 
team_data_dict = {}
for i in range(len(teams)):
    team = teams[i]
    team_data = pd.DataFrame()
    team_off = []
    team_def = []
    team_elo = []
    team_df = history_df[(history_df['team'] == f'{team}') | (history_df['opp'] == f'{team}')]
    team_df.reset_index(inplace=True)
    team_df.drop('index', axis=1, inplace=True)
    for x in range(len(team_df)):
        if team_df['team'][x] == f'{team}':
            team_off.append(team_df['team_score'][x])
            team_def.append(team_df['opp_score'][x])
            team_elo.append(team_df['team_elo_before'][x])
        else:
            team_off.append(team_df['opp_score'][x])
            team_def.append(team_df['team_score'][x])
            team_elo.append(team_df['opp_elo_before'][x])
    team_data['team_off'] = team_off
    team_data['team_def'] = team_def
    team_data['elo'] = team_elo
    team_data_dict[team] = team_data
      
   
len(team_data_dict.keys())

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


32

In [5]:
# Games to be predicted
future_df.reset_index(inplace=True)
future_df.drop('index', axis=1, inplace=True)
future_df['team_elo_prediction%'] = np.nan
future_df['opp_elo_prediction%'] = np.nan
future_df['team_gauss_prediction%'] = np.nan
future_df['opp_gauss_prediction%'] = np.nan
future_df['tie_gauss_prediction%'] = np.nan
future_df

Unnamed: 0,datetime,team,opp,team_score,opp_score,team_elo_before,opp_elo_before,W/L,team_elo_prediction%,opp_elo_prediction%,team_gauss_prediction%,opp_gauss_prediction%,tie_gauss_prediction%
0,2020-10-29 20:20:00,CAR,ATL,,,1420.351117,1451.007231,,,,,,
1,2020-11-01 13:00:00,MIA,LAR,,,1434.532342,1603.925264,,,,,,
2,2020-11-01 13:00:00,BAL,PIT,,,1685.26657,1629.930316,,,,,,
3,2020-11-01 13:00:00,CIN,TEN,,,1282.024705,1616.33714,,,,,,
4,2020-11-01 13:00:00,CLE,LVR,,,1424.263489,1447.441514,,,,,,
5,2020-11-01 13:00:00,BUF,NWE,,,1537.921187,1610.302984,,,,,,
6,2020-11-01 13:00:00,KAN,NYJ,,,1743.292439,1318.121744,,,,,,
7,2020-11-01 13:00:00,GNB,MIN,,,1644.620252,1515.378678,,,,,,
8,2020-11-01 13:00:00,DET,IND,,,1400.851537,1487.687626,,,,,,
9,2020-11-01 16:05:00,DEN,LAC,,,1456.027121,1427.976243,,,,,,


In [6]:
# functions to simulate games

def elo_expected(teamA, teamB):
    rating_A = team_data_dict[teamA]['elo'].iloc[-1]
    rating_B = team_data_dict[teamB]['elo'].iloc[-1]
    expected_A = 1 / (1 + 10 ** ((rating_B - rating_A) / 400 ))
    expected_B = 1 / (1 + 10 ** ((rating_A - rating_B) / 400 ))
    return [expected_A, expected_B]

def elo_game_sim(teamA, teamB):
    win_values = elo_expected(teamA, teamB)
    win_gen = rnd.random()
    if win_values[0] > win_values[1]:
        if win_gen < win_values[0]:
            return [1, 0]
        else:
            return [0, 1]
    if win_values[0] < win_values[1]:
        if win_gen < win_values[1]:
            return [0, 1]
        else:
            return[1, 0]

def elo_series_sim(teamA, teamB, runs):
    games = []
    teamA_wins = 0
    teamB_wins = 0
    for i in range(runs):
        game = elo_game_sim(teamA, teamB)
        games.append(game)
        if game[0] == 1:
            teamA_wins += 1
        else:
            teamB_wins += 1
    return [teamA_wins, teamB_wins]

def gauss_game_sim(teamA, teamB):
    team_A_score = (rnd.gauss(team_data_dict[teamA]['team_off'].mean(), team_data_dict[teamA]['team_off'].std()) + rnd.gauss(team_data_dict[teamB]['team_def'].mean(), team_data_dict[teamB]['team_def'].std()))/2
    team_B_score = (rnd.gauss(team_data_dict[teamB]['team_off'].mean(), team_data_dict[teamB]['team_off'].std()) + rnd.gauss(team_data_dict[teamA]['team_def'].mean(), team_data_dict[teamA]['team_def'].std()))/2
    if int(round(team_A_score)) > int(round(team_B_score)):
        return 1
    elif int(round(team_A_score)) < int(round(team_B_score)):
        return -1
    else: return 0

def gauss_series_sim(teamA, teamB, runs):
    games = []
    teamA_wins = 0
    teamB_wins = 0
    ties = 0
    for i in range(runs):
        game = gauss_game_sim(teamA, teamB)
        games.append(game)
        if game == 1:
            teamA_wins += 1
        elif game== -1:
            teamB_wins += 1
        else:
            ties += 1
    return [teamA_wins, teamB_wins, ties]

In [7]:
# simulations and future_df update
num_sims = 50000
for i in range(len(future_df)):
    teamA = future_df['team'][i]
    teamB = future_df['opp'][i]
    
    elo_result = elo_series_sim(teamA, teamB, num_sims)
    elo_result_A = elo_result[0] / (elo_result[0] + elo_result[1])
    elo_result_B = elo_result[1] / (elo_result[0] + elo_result[1])
    future_df['team_elo_prediction%'][i] = elo_result_A
    future_df['opp_elo_prediction%'][i] = elo_result_B
    
    gauss_result = gauss_series_sim(teamA, teamB, num_sims)
    gauss_result_A = gauss_result[0] / (gauss_result[0] + gauss_result[1] + gauss_result[2])
    gauss_result_B = gauss_result[1] / (gauss_result[0] + gauss_result[1] + gauss_result[2])
    gauss_result_tie = gauss_result[2] / (gauss_result[0] + gauss_result[1] + gauss_result[2])
    future_df['team_gauss_prediction%'][i] = gauss_result_A
    future_df['opp_gauss_prediction%'][i] = gauss_result_B
    future_df['tie_gauss_prediction%'][i] = gauss_result_tie
    
    print(f'{teamA} | {teamB}')
    print(f'elo_result: {elo_result}')
    print(f'gaussian_result: {gauss_result}')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFram

CAR | ATL
elo_result: [21542, 28458]
gaussian_result: [23604, 24507, 1889]
MIA | LAR
elo_result: [13752, 36248]
gaussian_result: [24805, 23212, 1983]
BAL | PIT
elo_result: [29390, 20610]
gaussian_result: [23518, 24402, 2080]
CIN | TEN
elo_result: [6126, 43874]
gaussian_result: [24176, 23889, 1935]
CLE | LVR
elo_result: [21162, 28838]
gaussian_result: [24404, 23609, 1987]
BUF | NWE
elo_result: [17719, 32281]
gaussian_result: [14445, 33943, 1612]
KAN | NYJ
elo_result: [45648, 4352]
gaussian_result: [27369, 20792, 1839]
GNB | MIN
elo_result: [31909, 18091]
gaussian_result: [27086, 20950, 1964]
DET | IND
elo_result: [18058, 31942]
gaussian_result: [17908, 30296, 1796]
DEN | LAC
elo_result: [28272, 21728]
gaussian_result: [22204, 25783, 2013]
SEA | SFO
elo_result: [32667, 17333]
gaussian_result: [28192, 19876, 1932]
CHI | NOR
elo_result: [18880, 31120]
gaussian_result: [20406, 27619, 1975]
PHI | DAL
elo_result: [26880, 23120]
gaussian_result: [26014, 22002, 1984]
NYG | TAM
elo_result: [1395

In [8]:
future_df

Unnamed: 0,datetime,team,opp,team_score,opp_score,team_elo_before,opp_elo_before,W/L,team_elo_prediction%,opp_elo_prediction%,team_gauss_prediction%,opp_gauss_prediction%,tie_gauss_prediction%
0,2020-10-29 20:20:00,CAR,ATL,,,1420.351117,1451.007231,,0.43084,0.56916,0.47208,0.49014,0.03778
1,2020-11-01 13:00:00,MIA,LAR,,,1434.532342,1603.925264,,0.27504,0.72496,0.4961,0.46424,0.03966
2,2020-11-01 13:00:00,BAL,PIT,,,1685.26657,1629.930316,,0.5878,0.4122,0.47036,0.48804,0.0416
3,2020-11-01 13:00:00,CIN,TEN,,,1282.024705,1616.33714,,0.12252,0.87748,0.48352,0.47778,0.0387
4,2020-11-01 13:00:00,CLE,LVR,,,1424.263489,1447.441514,,0.42324,0.57676,0.48808,0.47218,0.03974
5,2020-11-01 13:00:00,BUF,NWE,,,1537.921187,1610.302984,,0.35438,0.64562,0.2889,0.67886,0.03224
6,2020-11-01 13:00:00,KAN,NYJ,,,1743.292439,1318.121744,,0.91296,0.08704,0.54738,0.41584,0.03678
7,2020-11-01 13:00:00,GNB,MIN,,,1644.620252,1515.378678,,0.63818,0.36182,0.54172,0.419,0.03928
8,2020-11-01 13:00:00,DET,IND,,,1400.851537,1487.687626,,0.36116,0.63884,0.35816,0.60592,0.03592
9,2020-11-01 16:05:00,DEN,LAC,,,1456.027121,1427.976243,,0.56544,0.43456,0.44408,0.51566,0.04026
