In [10]:
import pandas as pd
import numpy as np
import math

In [2]:
gamedata_df = pd.read_csv('/home/welced12/git/football_analytics/data/espn_gamedata2009-2016.csv')

In [3]:
gamedata_df.head(5)

Unnamed: 0,gameId,result,season,week,home,away,winner,home_score,away_score,OT
0,290910023,"PIT 13, TEN 10 (OT)",2009,1,TEN,PIT,PIT,10,13,1
1,290913001,"ATL 19, MIA 7",2009,1,MIA,ATL,ATL,7,19,0
2,290913004,"DEN 12, CIN 7",2009,1,DEN,CIN,DEN,12,7,0
3,290913005,"MIN 34, CLE 20",2009,1,MIN,CLE,MIN,34,20,0
4,290913011,"IND 14, JAX 12",2009,1,JAX,IND,IND,12,14,0


In [4]:
teams = gamedata_df['home'].unique()
teams

array(['TEN', 'MIA', 'DEN', 'MIN', 'JAX', 'DET', 'DAL', 'PHI', 'KC', 'NYJ',
       'WSH', 'SF', 'STL', 'CHI', 'BUF', 'SD', 'CAR', 'CIN', 'HOU', 'OAK',
       'NE', 'NO', 'ARI', 'TB', 'SEA', 'PIT', 'CLE', 'BAL', 'NYG', 'IND',
       'GB', 'ATL', nan, 'LAR'], dtype=object)

In [7]:
# Define some functions that will be part of ELO ratings
def win_prob(home_elo,away_elo):
    exponent = (home_elo + 65 - away_elo)/400
    home_prob = 1 / ( 10**( -1 * exponent ) + 1 )
    away_prob = 1 / ( 10**(  1 * exponent ) + 1 )
    return (home_prob,away_prob)

In [8]:
win_prob(1629,1631)

(0.5896835031399501, 0.41031649686004995)

In [9]:
win_prob(1423,1413)

(0.6062878237854281, 0.3937121762145718)

In [42]:
def update_elo(home_elo,away_elo,home_score,away_score):
    # Make sure everything is floats
    home_elo = float(home_elo)
    away_elo = float(away_elo)
    home_score = float(home_score)
    away_score = float(away_score)
    
    # Get probabilities of home/away wins
    p_hw, p_aw = win_prob(home_elo,away_elo)
    
    # Given actual score, figure out how to update ratings
    
    # Big K is a scaling factor for adjustments based on margin of victory
    big_k = 20 * math.log1p( math.fabs(home_score - away_score) ) \
            * 2.2 / (2.2 + 0.001*math.fabs( p_hw - p_aw ))
    
    home_win = 0
    away_win = 1
    if home_score > away_score:
        home_win = 1
        away_win = 0
    elif home_score == away_score:
        home_win = 0.5
        away_win = 0.5
    
    home_adjustment = big_k * (home_win - p_hw)
    away_adjustment = big_k * (away_win - p_aw)
    
    return (home_adjustment,away_adjustment)

In [12]:
update_elo(1629,1631,20,17)

(11.375461470854237, -11.375461470854239)

In [13]:
update_elo(1423,1413,7,33)

(-39.96065401087477, 39.96065401087478)

In [39]:
# Set up dataframe to keep track of ELO history for each team
teams = [x for x in gamedata_df['home'].unique() if str(x) != 'nan']
startval = 1400
start_dict = {(2009,1):{tm:startval for tm in teams}}
elo_hist = pd.DataFrame( start_dict )
elo_hist.head(5)

Unnamed: 0_level_0,2009
Unnamed: 0_level_1,1
ARI,1400
ATL,1400
BAL,1400
BUF,1400
CAR,1400


In [37]:
gamedata_df.set_index('gameId')
gamedata_df

Unnamed: 0,gameId,result,season,week,home,away,winner,home_score,away_score,OT
0,290910023,"PIT 13, TEN 10 (OT)",2009,1,TEN,PIT,PIT,10,13,1
1,290913001,"ATL 19, MIA 7",2009,1,MIA,ATL,ATL,7,19,0
2,290913004,"DEN 12, CIN 7",2009,1,DEN,CIN,DEN,12,7,0
3,290913005,"MIN 34, CLE 20",2009,1,MIN,CLE,MIN,34,20,0
4,290913011,"IND 14, JAX 12",2009,1,JAX,IND,IND,12,14,0
5,290913018,"NO 45, DET 27",2009,1,DET,NO,NO,27,45,0
6,290913027,"DAL 34, TB 21",2009,1,DAL,TB,DAL,34,21,0
7,290913029,"PHI 38, CAR 10",2009,1,PHI,CAR,PHI,38,10,0
8,290913033,"BAL 38, KC 24",2009,1,KC,BAL,BAL,24,38,0
9,290913034,"NYJ 24, HOU 7",2009,1,NYJ,HOU,NYJ,24,7,0


In [43]:
def get_elo(team,year,week):
    return elo_hist.loc[team,[year,week]]

# Try and update each team based on games in a week.
def update_for_week(year,week):
    relevant_games = gamedata_df.loc[ (gamedata_df['season'] == year) &
                                      (gamedata_df['week'] == week) ]
#    print(relevant_games)

    # make dict for weekly elo change
    elo_delta = {}
    
    for index in relevant_games.index.values:
        game = relevant_games.loc[index]
        print(game)
        home = game['home']
        away = game['away']
        home_score = game['home_score']
        away_score = game['away_score']
        
        # pull elo ratings from elo history
        home_elo = get_elo(home,year,week)
        away_elo = get_elo(away,year,week)
        
        # Run function to calculate change in elo ratings after the week
        elo_change = update_elo(home_elo,
                                away_elo,
                                home_score,
                                away_score )
        
        elo_delta[home] = elo_change[0]
        elo_delta[away] = elo_change[1]
        
    print(elo_delta)
        
# Try running the function for a particular week        
update_for_week(2009,1)

gameId                  290910023
result        PIT 13, TEN 10 (OT)
season                       2009
week                            1
home                          TEN
away                          PIT
winner                        PIT
home_score                     10
away_score                     13
OT                              1
Name: 0, dtype: object
gameId            290913001
result        ATL 19, MIA 7
season                 2009
week                      1
home                    MIA
away                    ATL
winner                  ATL
home_score                7
away_score               19
OT                        0
Name: 1, dtype: object
gameId            290913004
result        DEN 12, CIN 7
season                 2009
week                      1
home                    DEN
away                    CIN
winner                  DEN
home_score               12
away_score                7
OT                        0
Name: 2, dtype: object
gameId             290913005
re