In [1]:
import pandas as pd
import numpy as np
import math

In [2]:
gamedata_df = pd.read_csv('/home/welced12/git/football_analytics/data/espn_gamedata2009-2017.csv')

In [3]:
gamedata_df.tail(5)

Unnamed: 0,gameId,result,season,week,home,away,winner,home_score,away_score,OT
2301,400951748,"BUF 22, MIA 16",2017,17,,,unknown,unknown,unknown,unknown
2302,400951791,"LAC 30, OAK 10",2017,17,OAK,LAC,LAC,10,30,0
2303,400951793,"ARI 26, SEA 24",2017,17,ARI,SEA,ARI,26,24,0
2304,400951764,"TB 31, NO 24",2017,17,NO,TB,TB,24,31,0
2305,400951739,"CIN 31, BAL 27",2017,17,CIN,BAL,CIN,31,27,0


In [4]:
teams = gamedata_df['home'].unique()
teams

array(['TEN', 'MIA', 'DEN', 'MIN', 'JAX', 'DET', 'DAL', 'PHI', 'KC',
       'NYJ', 'WSH', 'SF', 'STL', 'CHI', 'BUF', 'SD', 'CAR', 'CIN', 'HOU',
       'OAK', 'NE', 'NO', 'ARI', 'TB', 'SEA', 'PIT', 'CLE', 'BAL', 'NYG',
       'IND', 'GB', 'ATL', nan, 'LAR', 'LAC'], dtype=object)

In [5]:
# Define some functions that will be part of ELO ratings
def win_prob(home_elo,away_elo):
    exponent = (home_elo + 65 - away_elo)/400
    home_prob = 1 / ( 10**( -1 * exponent ) + 1 )
    away_prob = 1 / ( 10**(  1 * exponent ) + 1 )
    return (home_prob,away_prob)

In [6]:
win_prob(1629,1631)

(0.5896835031399501, 0.41031649686004995)

In [7]:
win_prob(1423,1413)

(0.6062878237854281, 0.3937121762145718)

In [8]:
def update_elo(home_elo,away_elo,home_score,away_score):
    # Make sure everything is floats
    if home_score == 'unknown':
        return (0,0)
    
    home_elo = float(home_elo)
    away_elo = float(away_elo)
    home_score = float(home_score)
    away_score = float(away_score)
    
    # Get probabilities of home/away wins
    p_hw, p_aw = win_prob(home_elo,away_elo)
    
    # Given actual score, figure out how to update ratings
    
    # Big K is a scaling factor for adjustments based on margin of victory
    big_k = 20 * math.log1p( math.fabs(home_score - away_score) ) \
            * 2.2 / (2.2 + 0.001*math.fabs( p_hw - p_aw ))
    
    home_win = 0
    away_win = 1
    if home_score > away_score:
        home_win = 1
        away_win = 0
    elif home_score == away_score:
        home_win = 0.5
        away_win = 0.5
    
    home_adjustment = big_k * (home_win - p_hw)
    away_adjustment = big_k * (away_win - p_aw)
    
    return (home_adjustment,away_adjustment)

In [9]:
update_elo(1629,1631,20,17)

(11.375461470854237, -11.375461470854239)

In [10]:
update_elo(1423,1413,7,33)

(-39.96065401087477, 39.96065401087478)

In [11]:
# Set up dataframe to keep track of ELO history for each team
teams = [x for x in gamedata_df['home'].unique() if str(x) != 'nan']
startval = 1500
start_dict = {(2009,1):{tm:startval for tm in teams}}
elo_hist = pd.DataFrame( start_dict )
elo_hist.head(5)

Unnamed: 0_level_0,2009
Unnamed: 0_level_1,1
ARI,1500
ATL,1500
BAL,1500
BUF,1500
CAR,1500


In [12]:
def get_elo(team,year,week):
    if isinstance(team, str):
        return elo_hist.loc[team,(year,week)]
    if math.isnan(team):
        return 0

# Try and update each team based on games in a week.
def update_for_week(year,week):
    relevant_games = gamedata_df.loc[ (gamedata_df['season'] == year) &
                                      (gamedata_df['week'] == week) ]
#    print(relevant_games)

    # make dict for weekly elo change
    elo_delta = {}
    
    for index in relevant_games.index.values:
        game = relevant_games.loc[index]
#        print(game)
        home = game['home']
        away = game['away']
        home_score = game['home_score']
        away_score = game['away_score']
        
        # pull elo ratings from elo history
        home_elo = get_elo(home,year,week)
        away_elo = get_elo(away,year,week)
        
        # Run function to calculate change in elo ratings after the week
        elo_change = update_elo(home_elo,
                                away_elo,
                                home_score,
                                away_score )
        
        if not home_elo == 0:
            elo_delta[home] = elo_change[0]
            elo_delta[away] = elo_change[1]
        
    return elo_delta

In [13]:
# Try running the function for a particular week
for wk in range(1,18):
    
    # Add column for this week's rating adjustment
    wkly_update = update_for_week(2009,wk)
    deltaname = str(wk)+'d'
    elo_hist[(2009,deltaname)] = pd.Series(wkly_update)
    # Make adjustment for teams that didn't play zero
    elo_hist.fillna(0, inplace=True)
    
    # Make new column for next week's rating
    elo_hist[(2009,wk+1)] = elo_hist[(2009,wk)] + elo_hist[(2009,deltaname)]
    # Drop column for rating adjustment
    elo_hist.drop( (2009,deltaname), axis=1, inplace=True )

elo_hist.head(5)

Unnamed: 0_level_0,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009
Unnamed: 0_level_1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
ARI,1500,1486.883097,1508.961224,1485.071398,1485.071398,1511.015703,1537.210078,1555.419196,1524.871258,1548.220232,1572.778873,1580.644402,1560.921191,1598.54641,1557.908318,1563.394003,1578.177895,1554.522625
ATL,1500,1530.390363,1552.815534,1514.120058,1514.120058,1548.157929,1572.657843,1535.643867,1517.397883,1542.509004,1509.786971,1492.922243,1504.982829,1483.152018,1479.317422,1492.460136,1526.767944,1538.08692
BAL,1500,1532.085869,1545.754017,1576.807128,1550.953701,1538.770775,1527.424577,1527.424577,1572.663498,1543.351485,1553.835703,1547.319745,1564.460391,1532.792451,1555.558232,1583.370256,1564.140164,1571.404229
BUF,1500,1491.787364,1522.074534,1500.482246,1454.544394,1441.335955,1455.543885,1475.788861,1454.584257,1454.584257,1414.758541,1400.237789,1442.472147,1429.612659,1442.981592,1434.014287,1399.706479,1454.950739
CAR,1500,1472.556572,1450.131401,1422.853829,1422.853829,1441.436417,1455.442728,1435.197752,1465.74569,1451.385668,1484.107701,1467.62657,1438.411555,1462.186138,1441.963216,1491.632151,1522.8215,1564.555977


In [14]:
for yr in range(2010,2018):
    # Try running the function for a particular week
    for wk in range(1,18):
        
        # if week 1, pull rating from last week of previous season
        # and regress to mean by a third once everything else works
        if wk == 1:
            elo_hist[(yr,1)] = elo_hist[(yr-1,18)] \
                               - 0.3 * (elo_hist[(yr-1,18)] - 1400)
            
#        print(elo_hist.head(5))
    
        # Add column for this week's rating adjustment
        wkly_update = update_for_week(yr,wk)
        deltaname = str(wk)+'d'
        elo_hist[(yr,deltaname)] = pd.Series(wkly_update)
        # Make adjustment for teams that didn't play zero
        elo_hist.fillna(0, inplace=True)
    
        # Make new column for next week's rating
        elo_hist[(yr,wk+1)] = elo_hist[(yr,wk)] + elo_hist[(yr,deltaname)]
        # Drop column for rating adjustment
        elo_hist.drop( (yr,deltaname), axis=1, inplace=True )

elo_hist.head(5)

Unnamed: 0_level_0,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,...,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017
Unnamed: 0_level_1,1,2,3,4,5,6,7,8,9,10,...,9,10,11,12,13,14,15,16,17,18
ARI,1500,1486.883097,1508.961224,1485.071398,1485.071398,1511.015703,1537.210078,1555.419196,1524.871258,1548.220232,...,1352.651744,1361.824563,1352.752232,1323.503373,1343.384249,1329.366767,1353.887967,1332.443219,1364.244791,1377.486433
ATL,1500,1530.390363,1552.815534,1514.120058,1514.120058,1548.157929,1572.657843,1535.643867,1517.397883,1542.509004,...,1483.707635,1465.070743,1506.053408,1517.609096,1537.347832,1523.651146,1540.809915,1545.060974,1516.451941,1546.926422
BAL,1500,1532.085869,1545.754017,1576.807128,1550.953701,1538.770775,1527.424577,1527.424577,1572.663498,1543.351485,...,1402.336828,1383.880746,1383.880746,1416.025681,1437.914314,1476.116404,1469.925582,1474.263724,1484.902386,1465.153492
BUF,1500,1491.787364,1522.074534,1500.482246,1454.544394,1441.335955,1455.543885,1475.788861,1454.584257,1454.584257,...,1475.353055,1434.940388,1412.618396,1370.540076,1395.597263,1387.062021,1402.743764,1426.826311,1407.354069,1407.354069
CAR,1500,1472.556572,1450.131401,1422.853829,1422.853829,1441.436417,1455.442728,1435.197752,1465.74569,1451.385668,...,1423.881593,1442.518484,1472.038839,1472.038839,1483.240118,1457.096065,1487.662677,1509.04896,1517.633195,1487.158714


In [15]:
elo_hist.loc[:,(2017)]

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
ARI,1441.08726,1407.999169,1417.216299,1401.152602,1410.295485,1374.546081,1396.589241,1352.651744,1352.651744,1361.824563,1352.752232,1323.503373,1343.384249,1329.366767,1353.887967,1332.443219,1364.244791,1377.486433
ATL,1495.540438,1501.614953,1532.177249,1542.046453,1519.854176,1519.854176,1502.943916,1474.977327,1483.707635,1465.070743,1506.053408,1517.609096,1537.347832,1523.651146,1540.809915,1545.060974,1516.451941,1546.926422
BAL,1388.879001,1417.887614,1434.657618,1377.42077,1363.010291,1389.164594,1372.317067,1352.73972,1402.336828,1383.880746,1383.880746,1416.025681,1437.914314,1476.116404,1469.925582,1474.263724,1484.902386,1465.153492
BUF,1387.492055,1412.77489,1391.653209,1427.895951,1450.088228,1428.505646,1428.505646,1442.588036,1475.353055,1434.940388,1412.618396,1370.540076,1395.597263,1387.062021,1402.743764,1426.826311,1407.354069,1407.354069
CAR,1437.151502,1448.002793,1469.124474,1434.13667,1450.940377,1462.697276,1449.223916,1405.493407,1423.881593,1442.518484,1472.038839,1472.038839,1483.240118,1457.096065,1487.662677,1509.04896,1517.633195,1487.158714
CHI,1267.428625,1261.354111,1236.181264,1270.897692,1253.600991,1248.132136,1264.979664,1308.710174,1292.831524,1292.831524,1283.512573,1277.583684,1264.072994,1256.989415,1293.694455,1273.145205,1292.903105,1280.968807
CIN,1437.479569,1408.470956,1392.403285,1379.73616,1391.588639,1413.17122,1413.17122,1387.123425,1393.154598,1357.09674,1338.431405,1351.122807,1366.464819,1361.677238,1324.972199,1305.119146,1339.565096,1359.313991
CLE,1216.820255,1213.637593,1196.867588,1186.174413,1174.321934,1169.035828,1152.740181,1147.542177,1142.618903,1142.618903,1130.999031,1124.905767,1109.563756,1102.375353,1098.67787,1094.339727,1074.581827,1072.237808
DAL,1493.819343,1522.869519,1480.550523,1496.61422,1474.190794,1462.954503,1462.954503,1471.377756,1489.208417,1525.585305,1484.60264,1464.017985,1436.345008,1472.236111,1483.677224,1491.642477,1472.273874,1494.681418
DEN,1466.66221,1480.688767,1523.007763,1486.765021,1507.561781,1507.561781,1474.070815,1430.436208,1412.225048,1383.995607,1374.012129,1361.320727,1337.348697,1294.140696,1338.348064,1352.638606,1320.527509,1316.381635


In [16]:
import json

In [17]:
with open("elo_history.json","w") as f:
    json.dump( elo_hist.to_json(orient='index'), f )