In [10]:
import pandas as pd
import numpy as np
import math

In [2]:
gamedata_df = pd.read_csv('/home/welced12/git/football_analytics/data/espn_gamedata2009-2016.csv')

In [3]:
gamedata_df.head(5)

Unnamed: 0,gameId,result,season,week,home,away,winner,home_score,away_score,OT
0,290910023,"PIT 13, TEN 10 (OT)",2009,1,TEN,PIT,PIT,10,13,1
1,290913001,"ATL 19, MIA 7",2009,1,MIA,ATL,ATL,7,19,0
2,290913004,"DEN 12, CIN 7",2009,1,DEN,CIN,DEN,12,7,0
3,290913005,"MIN 34, CLE 20",2009,1,MIN,CLE,MIN,34,20,0
4,290913011,"IND 14, JAX 12",2009,1,JAX,IND,IND,12,14,0


In [4]:
teams = gamedata_df['home'].unique()
teams

array(['TEN', 'MIA', 'DEN', 'MIN', 'JAX', 'DET', 'DAL', 'PHI', 'KC', 'NYJ',
       'WSH', 'SF', 'STL', 'CHI', 'BUF', 'SD', 'CAR', 'CIN', 'HOU', 'OAK',
       'NE', 'NO', 'ARI', 'TB', 'SEA', 'PIT', 'CLE', 'BAL', 'NYG', 'IND',
       'GB', 'ATL', nan, 'LAR'], dtype=object)

In [7]:
# Define some functions that will be part of ELO ratings
def win_prob(home_elo,away_elo):
    exponent = (home_elo + 65 - away_elo)/400
    home_prob = 1 / ( 10**( -1 * exponent ) + 1 )
    away_prob = 1 / ( 10**(  1 * exponent ) + 1 )
    return (home_prob,away_prob)

In [8]:
win_prob(1629,1631)

(0.5896835031399501, 0.41031649686004995)

In [9]:
win_prob(1423,1413)

(0.6062878237854281, 0.3937121762145718)

In [75]:
def update_elo(home_elo,away_elo,home_score,away_score):
    # Make sure everything is floats
    if home_score == 'unknown':
        return (0,0)
    
    home_elo = float(home_elo)
    away_elo = float(away_elo)
    home_score = float(home_score)
    away_score = float(away_score)
    
    # Get probabilities of home/away wins
    p_hw, p_aw = win_prob(home_elo,away_elo)
    
    # Given actual score, figure out how to update ratings
    
    # Big K is a scaling factor for adjustments based on margin of victory
    big_k = 20 * math.log1p( math.fabs(home_score - away_score) ) \
            * 2.2 / (2.2 + 0.001*math.fabs( p_hw - p_aw ))
    
    home_win = 0
    away_win = 1
    if home_score > away_score:
        home_win = 1
        away_win = 0
    elif home_score == away_score:
        home_win = 0.5
        away_win = 0.5
    
    home_adjustment = big_k * (home_win - p_hw)
    away_adjustment = big_k * (away_win - p_aw)
    
    return (home_adjustment,away_adjustment)

In [12]:
update_elo(1629,1631,20,17)

(11.375461470854237, -11.375461470854239)

In [13]:
update_elo(1423,1413,7,33)

(-39.96065401087477, 39.96065401087478)

In [120]:
# Set up dataframe to keep track of ELO history for each team
teams = [x for x in gamedata_df['home'].unique() if str(x) != 'nan']
startval = 1500
start_dict = {(2009,1):{tm:startval for tm in teams}}
elo_hist = pd.DataFrame( start_dict )
elo_hist.head(5)

Unnamed: 0_level_0,2009
Unnamed: 0_level_1,1
ARI,1500
ATL,1500
BAL,1500
BUF,1500
CAR,1500


In [121]:
def get_elo(team,year,week):
    if isinstance(team, str):
        return elo_hist.loc[team,(year,week)]
    if math.isnan(team):
        return 0

# Try and update each team based on games in a week.
def update_for_week(year,week):
    relevant_games = gamedata_df.loc[ (gamedata_df['season'] == year) &
                                      (gamedata_df['week'] == week) ]
#    print(relevant_games)

    # make dict for weekly elo change
    elo_delta = {}
    
    for index in relevant_games.index.values:
        game = relevant_games.loc[index]
#        print(game)
        home = game['home']
        away = game['away']
        home_score = game['home_score']
        away_score = game['away_score']
        
        # pull elo ratings from elo history
        home_elo = get_elo(home,year,week)
        away_elo = get_elo(away,year,week)
        
        # Run function to calculate change in elo ratings after the week
        elo_change = update_elo(home_elo,
                                away_elo,
                                home_score,
                                away_score )
        
        if not home_elo == 0:
            elo_delta[home] = elo_change[0]
            elo_delta[away] = elo_change[1]
        
    return elo_delta

In [123]:
# Try running the function for a particular week
for wk in range(1,18):
    
    # Add column for this week's rating adjustment
    wkly_update = update_for_week(2009,wk)
    deltaname = str(wk)+'d'
    elo_hist[(2009,deltaname)] = pd.Series(wkly_update)
    # Make adjustment for teams that didn't play zero
    elo_hist.fillna(0, inplace=True)
    
    # Make new column for next week's rating
    elo_hist[(2009,wk+1)] = elo_hist[(2009,wk)] + elo_hist[(2009,deltaname)]
    # Drop column for rating adjustment
    elo_hist.drop( (2009,deltaname), axis=1, inplace=True )

elo_hist.head(5)

Unnamed: 0_level_0,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009
Unnamed: 0_level_1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
ARI,1500,1486.883097,1508.961224,1485.071398,1485.071398,1511.015703,1537.210078,1555.419196,1524.871258,1548.220232,1572.778873,1580.644402,1560.921191,1598.54641,1557.908318,1563.394003,1578.177895,1554.522625
ATL,1500,1530.390363,1552.815534,1514.120058,1514.120058,1548.157929,1572.657843,1535.643867,1517.397883,1542.509004,1509.786971,1492.922243,1504.982829,1483.152018,1479.317422,1492.460136,1526.767944,1538.08692
BAL,1500,1532.085869,1545.754017,1576.807128,1550.953701,1538.770775,1527.424577,1527.424577,1572.663498,1543.351485,1553.835703,1547.319745,1564.460391,1532.792451,1555.558232,1583.370256,1564.140164,1571.404229
BUF,1500,1491.787364,1522.074534,1500.482246,1454.544394,1441.335955,1455.543885,1475.788861,1454.584257,1454.584257,1414.758541,1400.237789,1442.472147,1429.612659,1442.981592,1434.014287,1399.706479,1454.950739
CAR,1500,1472.556572,1450.131401,1422.853829,1422.853829,1441.436417,1455.442728,1435.197752,1465.74569,1451.385668,1484.107701,1467.62657,1438.411555,1462.186138,1441.963216,1491.632151,1522.8215,1564.555977


In [124]:
for yr in range(2010,2017):
    # Try running the function for a particular week
    for wk in range(1,18):
        
        # if week 1, pull rating from last week of previous season
        # and regress to mean by a third once everything else works
        if wk == 1:
            elo_hist[(yr,1)] = elo_hist[(yr-1,18)] \
                               - 0.3 * (elo_hist[(yr-1,18)] - 1400)
            
#        print(elo_hist.head(5))
    
        # Add column for this week's rating adjustment
        wkly_update = update_for_week(yr,wk)
        deltaname = str(wk)+'d'
        elo_hist[(yr,deltaname)] = pd.Series(wkly_update)
        # Make adjustment for teams that didn't play zero
        elo_hist.fillna(0, inplace=True)
    
        # Make new column for next week's rating
        elo_hist[(yr,wk+1)] = elo_hist[(yr,wk)] + elo_hist[(yr,deltaname)]
        # Drop column for rating adjustment
        elo_hist.drop( (yr,deltaname), axis=1, inplace=True )

elo_hist.head(5)

Unnamed: 0_level_0,2009,2009,2009,2009,2009,2009,2009,2009,2009,2009,...,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016
Unnamed: 0_level_1,1,2,3,4,5,6,7,8,9,10,...,9,10,11,12,13,14,15,16,17,18
ARI,1500,1486.883097,1508.961224,1485.071398,1485.071398,1511.015703,1537.210078,1555.419196,1524.871258,1548.220232,...,1500.369704,1500.369704,1507.312556,1481.504875,1442.315179,1468.236997,1448.416526,1426.392103,1442.933837,1457.729357
ATL,1500,1530.390363,1552.815534,1514.120058,1514.120058,1548.157929,1572.657843,1535.643867,1517.397883,1542.509004,...,1449.688256,1462.787121,1435.76311,1435.76311,1474.952807,1470.629286,1484.884556,1498.014571,1520.470571,1537.448476
BAL,1500,1532.085869,1545.754017,1576.807128,1550.953701,1538.770775,1527.424577,1527.424577,1572.663498,1543.351485,...,1342.066652,1374.983236,1393.935002,1373.479405,1397.184031,1440.490633,1425.603583,1433.108483,1418.702426,1384.631545
BUF,1500,1491.787364,1522.074534,1500.482246,1454.544394,1441.335955,1455.543885,1475.788861,1454.584257,1454.584257,...,1471.473554,1452.285502,1452.285502,1464.741536,1474.005704,1441.313261,1428.245858,1440.934031,1428.537942,1382.17817
CAR,1500,1472.556572,1450.131401,1422.853829,1422.853829,1441.436417,1455.442728,1435.197752,1465.74569,1451.385668,...,1482.579864,1489.870898,1480.526669,1492.767913,1474.660542,1441.832775,1466.518256,1484.213494,1461.757495,1452.134479


In [125]:
elo_hist.loc[:,(2016)]

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
ARI,1550.264805,1540.191447,1560.751961,1516.72942,1498.410209,1508.377487,1534.112145,1534.112145,1500.369704,1500.369704,1507.312556,1481.504875,1442.315179,1468.236997,1448.416526,1426.392103,1442.933837,1457.729357
ATL,1385.399866,1363.320938,1379.522215,1398.080638,1440.101601,1465.648853,1455.646348,1440.373877,1449.688256,1462.787121,1435.76311,1435.76311,1474.952807,1470.629286,1484.884556,1498.014571,1520.470571,1537.448476
BAL,1376.005975,1402.007645,1410.353011,1414.674843,1407.654588,1391.202501,1369.669053,1342.066652,1342.066652,1374.983236,1393.935002,1373.479405,1397.184031,1440.490633,1425.603583,1433.108483,1418.702426,1384.631545
BUF,1432.628143,1406.626473,1391.263326,1435.285868,1469.286105,1486.349991,1509.500801,1487.235149,1471.473554,1452.285502,1452.285502,1464.741536,1474.005704,1441.313261,1428.245858,1440.934031,1428.537942,1382.17817
CAR,1541.795875,1533.467375,1555.365749,1531.394379,1489.373416,1470.168138,1448.837423,1448.837423,1482.579864,1489.870898,1480.526669,1492.767913,1474.660542,1441.832775,1466.518256,1484.213494,1461.757495,1452.134479
CHI,1332.997708,1311.536827,1297.099312,1269.52029,1290.290492,1270.371018,1264.284289,1247.761776,1290.448386,1290.448386,1252.072426,1237.695592,1223.870303,1259.505238,1250.965548,1246.83721,1233.990958,1209.82511
CIN,1514.535391,1518.486217,1493.6647,1477.45311,1498.873191,1459.726716,1431.799947,1446.456547,1446.456547,1446.456547,1437.391689,1424.935656,1401.231029,1439.334808,1445.020391,1435.276544,1420.881758,1454.952638
CLE,1285.489264,1259.971999,1251.626633,1231.928656,1212.748159,1206.888869,1194.604678,1179.948078,1174.786943,1168.229472,1149.277706,1143.63391,1137.122216,1137.122216,1131.436633,1118.74846,1141.827394,1138.435717
DAL,1346.075996,1340.635721,1355.699747,1383.278769,1397.273789,1436.420264,1464.269923,1464.269923,1490.194562,1496.752032,1510.337574,1530.793171,1548.308964,1554.790444,1533.344826,1550.528571,1577.506256,1534.4482
DEN,1535.758888,1544.087388,1562.752447,1578.964037,1585.987265,1560.440012,1522.893156,1550.763828,1565.697945,1527.062548,1532.060329,1532.060329,1520.923028,1525.004798,1501.473061,1487.965771,1456.631007,1495.313977
