In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3

import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', None)

seed=99

In [2]:
nba_all_elo = pd.read_csv('nbaallelo.csv')
test_elo_season = nba_all_elo[(nba_all_elo['year_id'] == 2014) & (nba_all_elo['is_playoffs'] == 0)]
test_elo_season.head()

Unnamed: 0,gameorder,game_id,lg_id,_iscopy,year_id,date_game,seasongame,is_playoffs,team_id,fran_id,pts,elo_i,elo_n,win_equiv,opp_id,opp_fran,opp_pts,opp_elo_i,opp_elo_n,game_location,game_result,forecast,notes
121054,60528,201310290IND,NBA,1,2014,10/29/2013,1,0,ORL,Magic,87,1313.2317,1311.5338,22.26676,IND,Pacers,97,1583.5721,1585.27,A,L,0.106039,
121055,60528,201310290IND,NBA,0,2014,10/29/2013,1,0,IND,Pacers,97,1583.5721,1585.27,49.365368,ORL,Magic,87,1313.2317,1311.5338,H,W,0.893961,
121056,60529,201310290LAL,NBA,0,2014,10/29/2013,1,0,LAL,Lakers,116,1511.02,1522.1942,42.866776,LAC,Clippers,103,1586.5884,1575.4142,H,W,0.535102,
121057,60529,201310290LAL,NBA,1,2014,10/29/2013,1,0,LAC,Clippers,103,1586.5884,1575.4142,48.369659,LAL,Lakers,116,1511.02,1522.1942,A,L,0.464898,
121058,60530,201310290MIA,NBA,1,2014,10/29/2013,1,0,CHI,Bulls,95,1519.9124,1516.6088,42.281761,MIA,Heat,107,1691.8896,1695.1932,A,L,0.172841,


In [3]:
def calc_K(MOV, elo_diff_winner):
    
    K = 20 * ( (MOV + 3)**0.8 / (7.5 + 0.006*(elo_diff_winner)) )
    return K

def new_season_elo_adj(elo):
    new_season_elo = (0.75*elo) + (0.25*1505)
    return new_season_elo

def update_elo(away_team, away_elo, away_score, home_team, home_elo, home_score):
    
    away_elo_og = away_elo
    home_elo_og = home_elo
    
    home_adv = 100
    home_elo += home_adv
    
    if away_score > home_score:
        winner = away_team
        winner_score = away_score
        winner_elo = away_elo
        
        loser_score = home_score
        loser_elo = home_elo
        
        S_away = 1
        S_home = 0
    else:
        winner = home_team
        winner_score = home_score
        winner_elo = home_elo
        
        loser_score = away_score
        loser_elo = away_elo
        S_away = 0
        S_home = 1
        
    elo_diff_winner = winner_elo - loser_elo
    
    E_away = 1 / (1 + 10**((home_elo-away_elo)/400))
    E_home = 1 / (1 + 10**((away_elo-home_elo)/400))
    
    MOV = winner_score - loser_score
    
    K = calc_K(MOV=MOV, elo_diff_winner=elo_diff_winner)
    
    away_elo_new = K*(S_away-E_away) + away_elo_og
    home_elo_new = K*(S_home-E_home) + home_elo_og
    
    return away_elo_new, home_elo_new

In [4]:
first_game_df = test_elo_season[test_elo_season['seasongame'] == 1]
first_game_df['team_id'].replace('CHA', 'CHO', inplace=True)
elo_df = first_game_df[['team_id', 'elo_i']]
elo_df.head()

Unnamed: 0,team_id,elo_i
121054,ORL,1313.2317
121055,IND,1583.5721
121056,LAL,1511.02
121057,LAC,1586.5884
121058,CHI,1519.9124


In [5]:
conn = sqlite3.connect('NBA-Boxscore-Database.sqlite')

In [6]:
query = 'SELECT * from game_info'
game_info = pd.read_sql(query, con=conn)
game_info['away_team'].replace('CHA', 'CHO', inplace=True)
game_info['home_team'].replace('CHA', 'CHO', inplace=True)
game_info

Unnamed: 0,game_id,season,date,away_team,away_score,home_team,home_score,result
0,131410290001,1314,2013-10-29,ORL,87,IND,97,1
1,131410290002,1314,2013-10-29,CHI,95,MIA,107,1
2,131410290003,1314,2013-10-29,LAC,103,LAL,116,1
3,131410300004,1314,2013-10-30,BRK,94,CLE,98,1
4,131410300005,1314,2013-10-30,BOS,87,TOR,93,1
...,...,...,...,...,...,...,...,...
11974,222304091226,2223,2023-04-09,UTA,117,LAL,128,1
11975,222304091227,2223,2023-04-09,NOP,108,MIN,113,1
11976,222304091228,2223,2023-04-09,MEM,100,OKC,115,1
11977,222304091229,2223,2023-04-09,LAC,119,PHO,114,0


In [7]:
elo_dict = {}
for tid in elo_df['team_id']:
    elo_dict[tid] = {season: [] for season in game_info['season'].unique()}
    elo_dict[tid][1314].append(elo_df['elo_i'][elo_df['team_id'] == tid].iloc[0])

In [8]:
seasons = list(game_info['season'].unique())
game_info[['away_elo_i', 'away_elo_n', 'home_elo_i', 'home_elo_n']] = None

for idx in game_info.index:
    
    curr_season = game_info.loc[idx]['season']
    
    if elo_dict[game_info.loc[idx,'away_team']][curr_season] == []:
        
        prev_season_idx = seasons.index(curr_season)-1
        prev_elo = elo_dict[game_info.loc[idx,'away_team']][seasons[prev_season_idx]][-1]
        new_season_elo = new_season_elo_adj(prev_elo)
        elo_dict[game_info.loc[idx,'away_team']][curr_season].append(new_season_elo)
    
    if elo_dict[game_info.loc[idx,'home_team']][curr_season] == []:
        
        prev_season_idx = seasons.index(curr_season)-1
        prev_elo = elo_dict[game_info.loc[idx,'home_team']][seasons[prev_season_idx]][-1]
        new_season_elo = new_season_elo_adj(prev_elo)
        elo_dict[game_info.loc[idx,'home_team']][curr_season].append(new_season_elo)
        
    
    away_elo_initial = elo_dict[game_info.loc[idx,'away_team']][curr_season][-1]
    home_elo_initial = elo_dict[game_info.loc[idx,'home_team']][curr_season][-1]
    
    away_elo_new, home_elo_new = update_elo(away_team = game_info.loc[idx,'away_team'],
                                            away_elo = away_elo_initial,
                                            away_score = game_info.loc[idx,'away_score'],
                                            home_team = game_info.loc[idx,'home_team'],
                                            home_elo = home_elo_initial,
                                            home_score = game_info.loc[idx,'home_score'])
    
    game_info['away_elo_i'][idx] = away_elo_initial
    game_info['away_elo_n'][idx] = away_elo_new
    game_info['home_elo_i'][idx] = home_elo_initial
    game_info['home_elo_n'][idx] = home_elo_new
    
    elo_dict[game_info.loc[idx,'away_team']][curr_season].append(away_elo_new)
    elo_dict[game_info.loc[idx,'home_team']][curr_season].append(home_elo_new)

In [9]:
game_info['elo_pred'] = None
for idx in game_info.index:
    
    if game_info['away_elo_i'][idx] > game_info['home_elo_i'][idx]:
        game_info['elo_pred'][idx] = 0
    elif game_info['away_elo_i'][idx] < game_info['home_elo_i'][idx]:
        game_info['elo_pred'][idx] = 1
        
game_info.to_csv('game_info_elo.csv')
game_info.head()

Unnamed: 0,game_id,season,date,away_team,away_score,home_team,home_score,result,away_elo_i,away_elo_n,home_elo_i,home_elo_n,elo_pred
0,131410290001,1314,2013-10-29,ORL,87,IND,97,1,1313.2317,1311.533869,1583.5721,1585.269931,1
1,131410290002,1314,2013-10-29,CHI,95,MIA,107,1,1519.9124,1516.608783,1691.8896,1695.193217,1
2,131410290003,1314,2013-10-29,LAC,103,LAL,116,1,1586.5884,1575.414216,1511.02,1522.194184,0
3,131410300004,1314,2013-10-30,BRK,94,CLE,98,1,1548.4365,1540.172399,1369.2823,1377.546401,0
4,131410300005,1314,2013-10-30,BOS,87,TOR,93,1,1484.9615,1479.784941,1483.9835,1489.160059,0


In [10]:
diff = game_info['result'] - game_info['elo_pred']
accuracy = len(diff[diff==0]) / len(diff)
accuracy

0.6437098255280074