In [1]:
names_to_ids = {"St. Louis Cardinals":"ARI","Arizona Cardinals":"ARI","Atlanta Falcons":"ATL",
                "Baltimore Ravens":"BAL","Buffalo Bills":"BUF","Carolina Panthers":"CAR","Chicago Bears":"CHI",
               "Cincinnati Bengals":"CIN","Cleveland Browns":"CLE","Dallas Cowboys":"DAL","Denver Broncos":"DEN",
               "Detroit Lions":"DET","Green Bay Packers":"GB","Houston Texans":"HOU","Baltimore Colts":"IND",
               "Indianapolis Colts":"IND","Jacksonville Jaguars":"JAX","Kansas City Chiefs":"KC",
               "San Diego Chargers":"LAC","Los Angeles Chargers":"LAC","St. Louis Rams":"LAR",
               "Los Angeles Rams":"LAR","Miami Dolphins":"MIA","Minnesota Vikings":"MIN","Boston Patriots":"NE",
               "New England Patriots":"NE","New Orleans Saints":"NO","New York Giants":"NYG","New York Jets":"NYJ",
               "Oakland Raiders":"OAK","Los Angeles Raiders":"OAK","Philadelphia Eagles":"PHI",
                "Pittsburgh Steelers":"PIT","San Francisco 49ers":"SF","Seattle Seahawks":"SEA",
                "Tampa Bay Buccaneers":"TB","Houston Oilers":"TEN","Tennessee Titans":"TEN",
                "Washington Redskins":"WAS","Phoenix Cardinals":"ARI","Tennessee Oilers":"TEN"}

In [2]:
import pandas as pd
import datetime as dt

vegas = pd.read_csv("nfl-scores-and-betting-data/spreadspoke_scores.csv")

# only keep rows where there is a betting line
vegas = vegas[(vegas["spread_favorite"] == vegas["spread_favorite"]) & 
             (vegas["over_under_line"] == vegas["over_under_line"])]
vegas = vegas.drop(["schedule_season","schedule_week","schedule_playoff","stadium", "stadium_neutral",
            "weather_temperature","weather_wind_mph","weather_humidity","weather_detail"],axis=1)
# change team names to ids
vegas["team_home"] = vegas["team_home"].apply(lambda x : names_to_ids[x])
vegas["team_away"] = vegas["team_away"].apply(lambda x : names_to_ids[x])
vegas["home_favorite"] = vegas.apply(lambda x : 1 if x["team_favorite_id"] == x["team_home"] else -1, axis=1)
vegas["fixed_spread"] = vegas.apply(lambda x : x["spread_favorite"] * x["home_favorite"], axis=1)

# more cleaning
vegas["game_info"] = vegas.apply(lambda x : x["schedule_date"] + " " + x["team_home"] + "-" + x["team_away"],axis=1)

vegas.to_csv("nfl-scores-and-betting-data/cleaned_spreads.csv")

In [3]:
vegas.head(10)

Unnamed: 0,schedule_date,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,home_favorite,fixed_spread,game_info
350,01/14/1968,GB,33.0,14.0,OAK,GB,-13.5,43,1,-13.5,01/14/1968 GB-OAK
538,01/12/1969,IND,7.0,16.0,NYJ,IND,-18.0,40,1,-18.0,01/12/1969 IND-NYJ
727,01/11/1970,KC,23.0,7.0,MIN,MIN,-12.0,39,-1,12.0,01/11/1970 KC-MIN
916,01/17/1971,IND,16.0,13.0,DAL,IND,-2.5,36,1,-2.5,01/17/1971 IND-DAL
1105,01/16/1972,DAL,24.0,3.0,MIA,DAL,-6.0,34,1,-6.0,01/16/1972 DAL-MIA
1294,01/14/1973,MIA,14.0,7.0,WAS,MIA,-1.0,33,1,-1.0,01/14/1973 MIA-WAS
1483,01/13/1974,MIA,24.0,7.0,MIN,MIA,-6.5,33,1,-6.5,01/13/1974 MIA-MIN
1672,01/12/1975,MIN,6.0,16.0,PIT,PIT,-3.0,33,-1,3.0,01/12/1975 MIN-PIT
1861,01/18/1976,DAL,17.0,21.0,PIT,PIT,-7.0,36,-1,7.0,01/18/1976 DAL-PIT
2064,01/09/1977,MIN,14.0,32.0,OAK,OAK,-4.0,38,-1,4.0,01/09/1977 MIN-OAK


In [5]:
import math

elo = pd.read_csv("nfl_elo.csv")

# clean data
elo["date_asdate"] = [dt.datetime.strptime(x,"%Y-%m-%d") for x in elo["date"]]
elo = elo[elo["date_asdate"] >= dt.datetime(1979,1,1)]
cols_to_drop = [1,2,3,10,11,14,15,16,17,18,19,22,23,24,25,26,27]
elo.drop(elo.columns[cols_to_drop],axis=1,inplace=True)

# get spreads
#elo["elo_spread"] = elo.apply(lambda x : -(x["elo1_pre"] - x["elo2_pre"])/25,axis=1)
#elo["qb_spread"] = elo.apply(lambda x : -(x["qbelo1_pre"] - x["qbelo2_pre"])/25,axis=1)
elo["elo_spread"] = elo.apply(lambda x : 400*math.log10(1/x["elo_prob1"]-1)/25,axis=1)
elo["qb_spread"] = elo.apply(lambda x : 400*math.log10(1/x["qbelo_prob1"]-1)/25,axis=1)

elo["game_info"] = elo.apply(lambda x : x["date_asdate"].strftime("%m/%d/%Y") + " " + str(x["team1"]) + "-" + 
                             str(x["team2"]), axis=1)

elo.to_csv("cleaned_nfl_elo.csv")

In [6]:
elo.head(10)

Unnamed: 0,date,team1,team2,elo1_pre,elo2_pre,elo_prob1,elo_prob2,qbelo1_pre,qbelo2_pre,qbelo_prob1,qbelo_prob2,score1,score2,date_asdate,elo_spread,qb_spread,game_info
6319,1979-01-07,PIT,TEN,1728.898,1615.116,0.736751,0.263249,1728.632527,1624.599635,0.738451,0.261549,34.0,5.0,1979-01-07,-7.15128,-7.21229,01/07/1979 PIT-TEN
6320,1979-01-07,LAR,DAL,1635.352,1732.774,0.453476,0.546524,1629.763058,1739.859373,0.404952,0.595048,0.0,28.0,1979-01-07,1.29688,2.674362,01/07/1979 LAR-DAL
6321,1979-01-21,PIT,DAL,1745.459,1762.871,0.474963,0.525037,1745.07643,1766.326906,0.45965,0.54035,35.0,31.0,1979-01-21,0.69648,1.123978,01/21/1979 PIT-DAL
6322,1979-09-01,TB,DET,1385.204,1487.039,0.447188,0.552812,1382.928035,1501.886317,0.427168,0.572832,31.0,16.0,1979-09-01,1.4734,2.038847,09/01/1979 TB-DET
6323,1979-09-02,LAR,OAK,1571.837,1543.108,0.631707,0.368293,1570.53035,1554.746347,0.617516,0.382484,17.0,24.0,1979-09-02,-3.74916,-3.328562,09/02/1979 LAR-OAK
6324,1979-09-02,MIN,SF,1501.07,1376.895,0.748189,0.251811,1501.392697,1385.556901,0.70752,0.29248,28.0,22.0,1979-09-02,-7.567,-6.138269,09/02/1979 MIN-SF
6325,1979-09-02,NYJ,CLE,1486.703,1483.734,0.596586,0.403414,1491.273443,1479.160062,0.578964,0.421036,22.0,25.0,1979-09-02,-2.71876,-2.213327,09/02/1979 NYJ-CLE
6326,1979-09-02,KC,IND,1408.082,1420.16,0.575577,0.424423,1411.647742,1424.210902,0.528007,0.471993,14.0,0.0,1979-09-02,-2.11688,-0.779255,09/02/1979 KC-IND
6327,1979-09-02,DEN,CIN,1579.121,1491.38,0.706666,0.293334,1580.348626,1487.024327,0.675602,0.324398,10.0,0.0,1979-09-02,-6.10964,-5.097818,09/02/1979 DEN-CIN
6328,1979-09-02,WSH,TEN,1469.366,1567.37,0.452646,0.547354,1474.876923,1573.770488,0.438268,0.561732,27.0,29.0,1979-09-02,1.32016,1.724628,09/02/1979 WSH-TEN


In [7]:
# merge the two dataframes
new_elo = pd.merge(vegas, elo, on="game_info")
cols_to_drop = [0,1,2,3,4,5,6,8,10,24]
new_elo.drop(new_elo.columns[cols_to_drop],axis=1,inplace=True)

new_elo = new_elo.rename(columns={"fixed_spread":"vegas_spread"})
cols = new_elo.columns.tolist()
cols = cols[2:5] + cols[13:15] + cols[5:13] + cols[15:] + [cols[1]] + [cols[0]]
new_elo = new_elo[cols]
new_elo.drop(["over_under_line"],axis=1,inplace=True)

new_elo["result"] = new_elo.apply(lambda x : x["score2"] - x["score1"], axis=1)

new_elo.to_csv("elo_and_vegas_spreads.csv")

In [8]:
new_elo.head(10)

Unnamed: 0,date,team1,team2,score1,score2,elo1_pre,elo2_pre,elo_prob1,elo_prob2,qbelo1_pre,qbelo2_pre,qbelo_prob1,qbelo_prob2,elo_spread,qb_spread,vegas_spread,result
0,1979-09-01,TB,DET,31.0,16.0,1385.204,1487.039,0.447188,0.552812,1382.928035,1501.886317,0.427168,0.572832,1.4734,2.038847,-3.0,-15.0
1,1979-09-02,BUF,MIA,7.0,9.0,1422.82,1573.513,0.379119,0.620881,1423.239353,1559.950522,0.375353,0.624647,3.42772,3.539119,5.0,2.0
2,1979-09-02,CHI,GB,6.0,3.0,1485.806,1462.502,0.624412,0.375588,1483.90949,1464.019768,0.608396,0.391604,-3.53216,-3.061441,-3.0,-3.0
3,1979-09-02,DEN,CIN,10.0,0.0,1579.121,1491.38,0.706666,0.293334,1580.348626,1487.024327,0.675602,0.324398,-6.10964,-5.097818,-3.0,-10.0
4,1979-09-02,KC,IND,14.0,0.0,1408.082,1420.16,0.575577,0.424423,1411.647742,1424.210902,0.528007,0.471993,-2.11688,-0.779255,-1.0,-14.0
5,1979-09-02,LAR,OAK,17.0,24.0,1571.837,1543.108,0.631707,0.368293,1570.53035,1554.746347,0.617516,0.382484,-3.74916,-3.328562,-4.0,7.0
6,1979-09-02,MIN,SF,28.0,22.0,1501.07,1376.895,0.748189,0.251811,1501.392697,1385.556901,0.70752,0.29248,-7.567,-6.138269,-7.0,-6.0
7,1979-09-02,NO,ATL,34.0,40.0,1454.541,1472.748,0.566936,0.433064,1447.159545,1464.242458,0.563144,0.436856,-1.87172,-1.764494,-5.0,6.0
8,1979-09-02,NYJ,CLE,22.0,25.0,1486.703,1483.734,0.596586,0.403414,1491.273443,1479.160062,0.578964,0.421036,-2.71876,-2.213327,-2.0,3.0
9,1979-09-02,PHI,NYG,23.0,17.0,1516.285,1435.862,0.697859,0.302141,1515.401198,1430.892805,0.686289,0.313711,-5.81692,-5.439652,-7.0,-6.0


In [17]:
import numpy as np
import math

elo_spreads = new_elo["elo_spread"].tolist()
qbelo_spreads = new_elo["qb_spread"].tolist()
vegas_spreads = new_elo["vegas_spread"].tolist()
results = new_elo["result"].tolist()

def listsquare(l):
    return [x**2 for x in l]

def listabs(l):
    return [abs(x) for x in l]

def listsqrt(l):
    return [math.sqrt(x) for x in l]

print("elo mse: " + str(np.mean(listsquare(np.array(elo_spreads)-np.array(results)))))
print("qb elo mse: " + str(np.mean(listsquare(np.array(qbelo_spreads)-np.array(results)))))
print("spread mse: " + str(np.mean(listsquare(np.array(vegas_spreads)-np.array(results)))))

print("\n")

print("elo mad: " + str(np.mean(listabs(np.array(elo_spreads)-np.array(results)))))
print("qb elo mad: " + str(np.mean(listabs(np.array(qbelo_spreads)-np.array(results)))))
print("spread mad: " + str(np.mean(listabs(np.array(vegas_spreads)-np.array(results)))))

print("\n")

print("elo mean square root error: " + str(np.mean(listsqrt(listabs(np.array(elo_spreads)-np.array(results))))))
print("qb elo mean square root error: " + str(np.mean(listsqrt(listabs(np.array(qbelo_spreads)-np.array(results))))))
print("spread mean square root error: " + str(np.mean(listsqrt(listabs(np.array(vegas_spreads)-np.array(results))))))

elo mse: 185.44129294527718
qb elo mse: 183.3264040950243
spread mse: 180.45236933420082


elo mad: 10.63839990231547
qb elo mad: 10.574302242856131
spread mad: 10.471318585990025


elo mean square root error: 2.981328813414562
qb elo mean square root error: 2.971617323550396
spread mean square root error: 2.9389492125736174


In [137]:
for i in range(20, 31):
    print(i)
    elo_spreads = new_elo.apply(lambda x : 400*math.log10(1/x["elo_prob1"]-1)/i,axis=1).tolist()
    qbelo_spreads = new_elo.apply(lambda x : 400*math.log10(1/x["qbelo_prob1"]-1)/i,axis=1).tolist()
    vegas_spreads = new_elo["vegas_spread"].tolist()
    results = new_elo["result"].tolist()
    
    print("elo mad: " + str(np.mean(listabs(np.array(elo_spreads)-np.array(results)))))
    print("qb elo mad: " + str(np.mean(listabs(np.array(qbelo_spreads)-np.array(results)))))
    print("spread mad: " + str(np.mean(listabs(np.array(vegas_spreads)-np.array(results)))))

    print("")

    print("elo mean square root error: " + str(np.mean(listsqrt(listabs(np.array(elo_spreads)-np.array(results))))))
    print("qb elo mean square root error: " + str(np.mean(listsqrt(listabs(np.array(qbelo_spreads)-np.array(results))))))
    print("spread mean square root error: " + str(np.mean(listsqrt(listabs(np.array(vegas_spreads)-np.array(results))))))
    
    print("")

20
elo mad: 10.733418375531782
qb elo mad: 10.662895201894578
spread mad: 10.471318585990025

elo mean square root error: 2.9978623271023155
qb elo mean square root error: 2.9897217129777363
spread mean square root error: 2.9389492125736174

21
elo mad: 10.698524214640116
qb elo mad: 10.629070894886397
spread mad: 10.471318585990025

elo mean square root error: 2.992141994643583
qb elo mean square root error: 2.9829280098013187
spread mean square root error: 2.9389492125736174

22
elo mad: 10.673433613803628
qb elo mad: 10.60513839476731
spread mad: 10.471318585990025

elo mean square root error: 2.9877238498050325
qb elo mean square root error: 2.978232234491164
spread mean square root error: 2.9389492125736174

23
elo mad: 10.655880769847736
qb elo mad: 10.58855690384148
spread mad: 10.471318585990025

elo mean square root error: 2.9845041494872917
qb elo mean square root error: 2.9740366773327556
spread mean square root error: 2.9389492125736174

24
elo mad: 10.64475510743611
qb elo

In [22]:
elo_spreads = new_elo["elo_spread"].tolist()
qbelo_spreads = new_elo["qb_spread"].tolist()
vegas_spreads = new_elo["vegas_spread"].tolist()
results = new_elo["result"].tolist()

def get_accuracy(l):
    over = 0
    under = 0
    exact = 0
    for x in l:
        if x > 0:
            over += 1
        elif x == 0:
            exact += 1
        else:
            under += 1
    print("over: " + str(over))
    print("under: " + str(under))
    print("exact: " + str(exact))
    print("over percentage: " + str(over/(over+under)))

print("elo accuracy: ")
get_accuracy(np.array(elo_spreads)-np.array(results))
print("")
print("qb elo accuracy: ")
get_accuracy(np.array(qbelo_spreads)-np.array(results))
print("")
print("vegas accuracy: ")
get_accuracy(np.array(vegas_spreads)-np.array(results))

elo accuracy: 
over: 4664
under: 4558
exact: 0
over percentage: 0.5057471264367817

qb elo accuracy: 
over: 4720
under: 4502
exact: 0
over percentage: 0.5118195619171546

vegas accuracy: 
over: 4501
under: 4471
exact: 250
over percentage: 0.5016718680338832
