# Get Closing Odds From Odds Time Series

Bestfightodds periodically scrapes the various betting sites and posts new odds only when they are updated. Because some betting sites off live betting whereas others don't, the final quoted odds accross sites might not be comparable. As such, I pull pre-fight closing odds as of 16 hours before the final time odds are posted for the UFC card/event by any site. This should closely approximate betting the day before the fight after weigh ins or the day of the fight before the first fight on the card

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
pd.set_option('display.max_columns', 500)
from zipfile import ZipFile
from datetime import timedelta

In [2]:
# load all odds. Since I'm just creating a mapping I only need one row per fight
zip_file = ZipFile('../../data/bestfightodds_data/straight_bets.zip')
dfs = [pd.read_csv(zip_file.open(text_file.filename)) for text_file in zip_file.infolist() 
       if text_file.filename.endswith('.csv')]
df = pd.concat(dfs, sort = False)
del dfs

# get all fights that actually happened and their outcomes
actualbets = pd.read_csv('../../data/bestfightodds_data/outcome_mapping_bfodds_to_wiki.csv')

df = pd.merge(df, actualbets[['fighter1', 'fighter2', 'fight_odds_url']],
             left_on = ['fighter1', 'fighter2', 'url'],
             right_on = ['fighter1', 'fighter2', 'fight_odds_url']
             )
df['Date'] = pd.to_datetime(df['dates'], unit = 'ms')
del df['url']
display(df.head())

Unnamed: 0,Bet,betsite,dates,odds,fighter1,fighter2,class,fight_odds_url,Date
0,Fabio Maldonado,5Dimes,1400173028000,335,Fabio Maldonado,Stipe Miocic,even,https://www.bestfightodds.com/events/ufc-the-u...,2014-05-15 16:57:08
1,Fabio Maldonado,5Dimes,1400176811000,420,Fabio Maldonado,Stipe Miocic,even,https://www.bestfightodds.com/events/ufc-the-u...,2014-05-15 18:00:11
2,Fabio Maldonado,5Dimes,1400635445000,350,Fabio Maldonado,Stipe Miocic,even,https://www.bestfightodds.com/events/ufc-the-u...,2014-05-21 01:24:05
3,Fabio Maldonado,5Dimes,1401040266000,400,Fabio Maldonado,Stipe Miocic,even,https://www.bestfightodds.com/events/ufc-the-u...,2014-05-25 17:51:06
4,Fabio Maldonado,5Dimes,1401472446000,450,Fabio Maldonado,Stipe Miocic,even,https://www.bestfightodds.com/events/ufc-the-u...,2014-05-30 17:54:06


In [3]:
# get time odds quotesd will be taken from. 16 hours before final odd quoted for a UFC card by any site
df['odds_close_time'] = df.groupby('fight_odds_url')['Date'].transform("max") - timedelta(hours = 16)

# confirm all sites had odds quoted prior to this time and not too far behind it (to avoid weird anomalies where maybe 
#                                                                                 bets stopped being offered prior to the time)
df['recent_odds_window']= df['odds_close_time'] - timedelta(days = 7)
has_odds = df.groupby(['fight_odds_url', 'betsite', 'fighter1', 'fighter2', 'Bet']).apply(lambda x: (((x['Date'] <= x['odds_close_time'])   
                                                                     & (x['Date'] >= x['recent_odds_window'])
                                                                                         ).any())).reset_index()
has_odds.rename(columns = {0:'has_bet'}, inplace = True)

In [4]:
sum(has_odds.has_bet)/has_odds.shape[0]

0.9677731499011317

In [5]:
# get latest price quote prior to odds close time 12 hr after last card quote
# this is my definition of pre-fight odds.
# to avoid garbage data I require that the quote be no more than a week before the fight
# maybe sites stopped offering bets at some point if their quote hasn't 
# updated

out = df[(df['Date'] <= df['odds_close_time']) & 
         (df['Date'] >= df['recent_odds_window'])]
byval = ['fighter1', 'fighter2', 'fight_odds_url', 'Bet', 'betsite', 'Date']
out = out.sort_values(byval).groupby(byval[0:-1]).nth(-1).reset_index()

In [6]:
hasbothsides = out.groupby(['fighter1', 'fighter2', 'fight_odds_url', 'betsite']).size().reset_index()
hasbothsides.rename(columns = {0:'size'}, inplace = True)
print(sum(hasbothsides['size'] ==2)/hasbothsides.shape[0])
print(out.shape)
out['bothsidessize'] = out.groupby(['fighter1', 'fighter2', 'fight_odds_url', 'betsite'])['betsite'].transform(np.size)
out = out[out['bothsidessize'] == 2]
print(out.shape)
del out['bothsidessize']

0.9999565283543809
(92012, 11)
(92010, 12)


In [15]:
keepcol = ['fighter1', 'fighter2', 'fight_odds_url',  'Bet']

finaldf1 = out[keepcol+['odds', 'betsite']].pivot_table(values = ['odds'], columns = ['betsite'], 
                    index = keepcol,
                    aggfunc='first').reset_index()
finaldf1.columns = [a if b == '' else b for (a, b) in finaldf1.columns] 

In [16]:
def nansumwrapper(a, **kwargs):
    if np.isnan(a).all():
        return np.nan
    else:
        return np.nansum(a, **kwargs)

finaldf1.rename({'William\xa0H.':'William_H', 'SportsInt.':'SportsInt'}, inplace = True, axis = 1)
betting_sites= ['5Dimes','BetDSI','BookMaker','SportBet','Bet365','Bovada', 
                'Sportsbook','William_H','Pinnacle','SportsInt','BetOnline','Intertops']
for col in betting_sites:
    finaldf1[col] = pd.to_numeric(finaldf1[col])
    finaldf1[col] = np.where(finaldf1[col] > 0, 1.0 + finaldf1[col]/100.0, 1.0 - 100.0 / finaldf1[col])
    finaldf1[col+"_vig"] = finaldf1.groupby(['fighter1','fighter2', 'fight_odds_url'])[col].transform(lambda x: nansumwrapper(x**-1) -1.0)

In [17]:
finaldf1[[x+"_vig" for x in betting_sites]].describe()

Unnamed: 0,5Dimes_vig,BetDSI_vig,BookMaker_vig,SportBet_vig,Bet365_vig,Bovada_vig,Sportsbook_vig,William_H_vig,Pinnacle_vig,SportsInt_vig,BetOnline_vig,Intertops_vig
count,9334.0,6786.0,9188.0,9170.0,1358.0,9044.0,9016.0,3370.0,9238.0,8420.0,7152.0,2524.0
mean,0.024932,0.043776,0.046139,0.029454,0.047345,0.054283,0.05339,0.054713,0.026725,0.066603,0.037999,0.059803
std,0.010332,0.011181,0.009227,0.010463,0.006554,0.007267,0.008406,0.004632,0.004406,0.00995,0.006304,0.004605
min,0.013621,-0.261438,0.028107,0.013722,0.02852,0.04128,0.037447,0.02852,0.011247,0.032531,0.024155,0.025469
25%,0.017903,0.037793,0.038527,0.023343,0.043142,0.04949,0.047715,0.052939,0.024341,0.059524,0.033751,0.056953
50%,0.020249,0.041929,0.044527,0.02649,0.047619,0.05237,0.050505,0.054738,0.024917,0.064478,0.036364,0.059524
75%,0.032531,0.047619,0.051282,0.030319,0.052133,0.056953,0.056953,0.056532,0.029182,0.075758,0.040404,0.061937
max,0.086663,0.209767,0.079642,0.113843,0.05778,0.086663,0.087146,0.097598,0.103855,0.106975,0.069767,0.069767


In [18]:
print(sum(finaldf1[[x+"_vig" for x in betting_sites]].apply(lambda x: (x < 0).any(), axis = 1)))
display(finaldf1[finaldf1[[x+"_vig" for x in betting_sites]].apply(lambda x: (x < 0).any(), axis = 1)])
# These cases seem to be consequences of unusual line movement
for col in betting_sites:
    finaldf1[col] = np.where(finaldf1[col+"_vig"] < 0, np.nan, finaldf1[col])
    finaldf1[col+"_vig"] = np.where(finaldf1[col+"_vig"] < 0, np.nan, finaldf1[col+"_vig"])

finaldf1['meanodds'] = finaldf1[betting_sites].mean(axis=1)
print(sum(finaldf1[[x+"_vig" for x in betting_sites]].apply(lambda x: (x < 0).any(), axis = 1)))

4


Unnamed: 0,fighter1,fighter2,fight_odds_url,Bet,5Dimes,Bet365,BetDSI,BetOnline,BookMaker,Bovada,Intertops,Pinnacle,SportBet,SportsInt,Sportsbook,William_H,meanodds,5Dimes_vig,BetDSI_vig,BookMaker_vig,SportBet_vig,Bet365_vig,Bovada_vig,Sportsbook_vig,William_H_vig,Pinnacle_vig,SportsInt_vig,BetOnline_vig,Intertops_vig
1532,Austin Arnett,Cory Sandhagen,https://www.bestfightodds.com/events/ufc-on-fo...,Austin Arnett,2.85,,2.87,2.8,2.75,,2.7,2.8,2.8,2.8,2.9,2.75,,0.017544,-0.197022,0.055944,0.029274,,,0.04632,0.057826,0.029274,0.063025,0.034562,0.05787
1533,Austin Arnett,Cory Sandhagen,https://www.bestfightodds.com/events/ufc-on-fo...,Cory Sandhagen,1.5,,2.2,1.47619,1.444444,,1.454545,1.487805,1.487805,1.416667,1.425532,1.440529,,0.017544,-0.197022,0.055944,0.029274,,,0.04632,0.057826,0.029274,0.063025,0.034562,0.05787
4320,Devin Clark,Mike Rodriguez,https://www.bestfightodds.com/events/ufc-223-n...,Devin Clark,2.0,,2.25,,1.862069,1.909091,1.869565,1.952381,1.980392,1.869565,1.869565,1.909091,-106.0,0.02381,-0.261438,0.069747,0.033252,,0.069264,0.069767,0.047619,0.029103,0.069767,,0.069767
4321,Devin Clark,Mike Rodriguez,https://www.bestfightodds.com/events/ufc-223-n...,Mike Rodriguez,1.909091,,3.4,,1.877193,1.833333,1.869565,1.934579,1.892857,1.869565,1.869565,1.909091,104.0,0.02381,-0.261438,0.069747,0.033252,,0.069264,0.069767,0.047619,0.029103,0.069767,,0.069767


0


In [19]:

finaldf2 = pd.merge(finaldf1, actualbets,
                    on = ['fighter1','fighter2', 'fight_odds_url'],
                   validate = "m:1")

finaldf2['bet_won'] = np.where((finaldf2['Bet'] == finaldf2['bestfightodds_winner']),
                              1,0)


display(finaldf2.head())
finaldf2.to_csv('../../data/datasets_for_analysis/final_datasets/odds_w_outcomes.csv', index = False)

Unnamed: 0,fighter1,fighter2,fight_odds_url,Bet,5Dimes,Bet365,BetDSI,BetOnline,BookMaker,Bovada,Intertops,Pinnacle,SportBet,SportsInt,Sportsbook,William_H,meanodds,5Dimes_vig,BetDSI_vig,BookMaker_vig,SportBet_vig,Bet365_vig,Bovada_vig,Sportsbook_vig,William_H_vig,Pinnacle_vig,SportsInt_vig,BetOnline_vig,Intertops_vig,Winner_Cleaned,Loser_Cleaned,wiki_url,bestfightodds_winner,bestfightodds_loser,WeightClass,Winner,Outcome,Loser,Method,Round,Time,Notes,Card,Winner_url,Loser_url,event_order,Method_Cleaned,Card_Cleaned,Champion,Interim_Champion,Card_Date,#,Event,bet_won
0,Aalon Cruz,Spike Carlyle,https://www.bestfightodds.com/events/ufc-on-es...,Aalon Cruz,1.540541,1.571429,1.510204,1.540541,1.5,1.5,1.526316,1.515464,1.537634,1.555556,1.540541,1.5,1.528185,0.019493,0.04239,0.054264,0.022097,0.036364,0.051282,0.049123,0.051282,0.037222,0.05102,0.033738,0.055172,Spike Carlyle,Aalon Cruz,https://en.wikipedia.org/wiki/UFC_Fight_Night:...,Spike Carlyle,Aalon Cruz,Featherweight,Spike Carlyle,def,Aalon Cruz,TKO (punches),1.0,1:25,,Preliminary card (ESPN+),,,11,KO,Prelim,,,2020-02-29,511,UFC Fight Night: Benavidez vs. Figueiredo,0
1,Aalon Cruz,Spike Carlyle,https://www.bestfightodds.com/events/ufc-on-es...,Spike Carlyle,2.7,2.5,2.63,2.6,2.58,2.6,2.5,2.65,2.69,2.45,2.5,2.6,2.583333,0.019493,0.04239,0.054264,0.022097,0.036364,0.051282,0.049123,0.051282,0.037222,0.05102,0.033738,0.055172,Spike Carlyle,Aalon Cruz,https://en.wikipedia.org/wiki/UFC_Fight_Night:...,Spike Carlyle,Aalon Cruz,Featherweight,Spike Carlyle,def,Aalon Cruz,TKO (punches),1.0,1:25,,Preliminary card (ESPN+),,,11,KO,Prelim,,,2020-02-29,511,UFC Fight Night: Benavidez vs. Figueiredo,1
2,Aaron Phillips,Matt Hobar,https://www.bestfightodds.com/events/ufc-fight...,Aaron Phillips,2.88,,2.8,2.8,2.8,2.75,,2.84,2.83,2.7,2.8,,2.8,0.019353,0.034562,0.034562,0.030776,,0.055944,0.049451,,0.024244,0.059811,0.034562,,Matt Hobar,Aaron Phillips,https://en.wikipedia.org/wiki/UFC_Fight_Night:...,Matt Hobar,Aaron Phillips,Bantamweight,Matt Hobar,def,Aaron Phillips,"Decision (unanimous) (29–28, 29–28, 29–28)",3.0,5:00,,Preliminary card (Fox Sports 2),https://en.wikipedia.org/wiki/Matt_Hobar,,9,Unanimous Decision,Prelim,,,2014-08-23,287,UFC Fight Night: Henderson vs. dos Anjos,0
3,Aaron Phillips,Matt Hobar,https://www.bestfightodds.com/events/ufc-fight...,Matt Hobar,1.487805,,1.47619,1.47619,1.47619,1.444444,,1.487805,1.47619,1.45045,1.444444,,1.468857,0.019353,0.034562,0.034562,0.030776,,0.055944,0.049451,,0.024244,0.059811,0.034562,,Matt Hobar,Aaron Phillips,https://en.wikipedia.org/wiki/UFC_Fight_Night:...,Matt Hobar,Aaron Phillips,Bantamweight,Matt Hobar,def,Aaron Phillips,"Decision (unanimous) (29–28, 29–28, 29–28)",3.0,5:00,,Preliminary card (Fox Sports 2),https://en.wikipedia.org/wiki/Matt_Hobar,,9,Unanimous Decision,Prelim,,,2014-08-23,287,UFC Fight Night: Henderson vs. dos Anjos,1
4,Aaron Phillips,Sam Sicilia,https://www.bestfightodds.com/events/ufc-173-b...,Aaron Phillips,2.3,,2.25,2.3,2.25,2.25,,2.32,2.28,2.3,2.3,,2.283333,0.018116,0.036281,0.036281,0.025373,,0.052288,0.050167,,0.02453,0.055995,0.034783,,Sam Sicilia,Aaron Phillips,https://en.wikipedia.org/wiki/UFC_173,Sam Sicilia,Aaron Phillips,Featherweight,Sam Sicilia,def,Aaron Phillips,"Decision (unanimous) (29–28, 29–28, 30–27)",3.0,5:00,,Preliminary card (UFC Fight Pass),https://en.wikipedia.org/wiki/Sam_Sicilia,,11,Unanimous Decision,Prelim,,,2014-05-24,273,UFC 173: Barão vs. Dillashaw,0


In [20]:
# create dataset with one row per fight
cols= ['5Dimes','BetDSI','BookMaker','SportBet','Bet365','Bovada', 
                'Sportsbook','William_H','Pinnacle',
       'SportsInt','BetOnline','Intertops', 'meanodds', 'fighter1',
      'fighter2', 'fight_odds_url']
winner = finaldf2[finaldf2.Bet == finaldf2.bestfightodds_winner]
loser = finaldf2.loc[finaldf2.Bet == finaldf2.bestfightodds_loser, cols]

onerowperfight = pd.merge(winner,loser, 
                          on = ['fighter1', 'fighter2', 'fight_odds_url'],
                          suffixes =('_win', '_lose'))
del onerowperfight['bet_won']
display(onerowperfight.head())
onerowperfight.to_csv('../../data/datasets_for_analysis/final_datasets/odds_w_outcomes_one_row_per_fight.csv', index = False)

Unnamed: 0,fighter1,fighter2,fight_odds_url,Bet,5Dimes_win,Bet365_win,BetDSI_win,BetOnline_win,BookMaker_win,Bovada_win,Intertops_win,Pinnacle_win,SportBet_win,SportsInt_win,Sportsbook_win,William_H_win,meanodds_win,5Dimes_vig,BetDSI_vig,BookMaker_vig,SportBet_vig,Bet365_vig,Bovada_vig,Sportsbook_vig,William_H_vig,Pinnacle_vig,SportsInt_vig,BetOnline_vig,Intertops_vig,Winner_Cleaned,Loser_Cleaned,wiki_url,bestfightodds_winner,bestfightodds_loser,WeightClass,Winner,Outcome,Loser,Method,Round,Time,Notes,Card,Winner_url,Loser_url,event_order,Method_Cleaned,Card_Cleaned,Champion,Interim_Champion,Card_Date,#,Event,5Dimes_lose,BetDSI_lose,BookMaker_lose,SportBet_lose,Bet365_lose,Bovada_lose,Sportsbook_lose,William_H_lose,Pinnacle_lose,SportsInt_lose,BetOnline_lose,Intertops_lose,meanodds_lose
0,Aalon Cruz,Spike Carlyle,https://www.bestfightodds.com/events/ufc-on-es...,Spike Carlyle,2.7,2.5,2.63,2.6,2.58,2.6,2.5,2.65,2.69,2.45,2.5,2.6,2.583333,0.019493,0.04239,0.054264,0.022097,0.036364,0.051282,0.049123,0.051282,0.037222,0.05102,0.033738,0.055172,Spike Carlyle,Aalon Cruz,https://en.wikipedia.org/wiki/UFC_Fight_Night:...,Spike Carlyle,Aalon Cruz,Featherweight,Spike Carlyle,def,Aalon Cruz,TKO (punches),1.0,1:25,,Preliminary card (ESPN+),,,11,KO,Prelim,,,2020-02-29,511,UFC Fight Night: Benavidez vs. Figueiredo,1.540541,1.510204,1.5,1.537634,1.571429,1.5,1.540541,1.5,1.515464,1.555556,1.540541,1.526316,1.528185
1,Aaron Phillips,Matt Hobar,https://www.bestfightodds.com/events/ufc-fight...,Matt Hobar,1.487805,,1.47619,1.47619,1.47619,1.444444,,1.487805,1.47619,1.45045,1.444444,,1.468857,0.019353,0.034562,0.034562,0.030776,,0.055944,0.049451,,0.024244,0.059811,0.034562,,Matt Hobar,Aaron Phillips,https://en.wikipedia.org/wiki/UFC_Fight_Night:...,Matt Hobar,Aaron Phillips,Bantamweight,Matt Hobar,def,Aaron Phillips,"Decision (unanimous) (29–28, 29–28, 29–28)",3.0,5:00,,Preliminary card (Fox Sports 2),https://en.wikipedia.org/wiki/Matt_Hobar,,9,Unanimous Decision,Prelim,,,2014-08-23,287,UFC Fight Night: Henderson vs. dos Anjos,2.88,2.8,2.8,2.83,,2.75,2.8,,2.84,2.7,2.8,,2.8
2,Aaron Phillips,Sam Sicilia,https://www.bestfightodds.com/events/ufc-173-b...,Sam Sicilia,1.714286,,1.689655,1.666667,1.689655,1.645161,,1.684932,1.704225,1.609756,1.625,,1.669926,0.018116,0.036281,0.036281,0.025373,,0.052288,0.050167,,0.02453,0.055995,0.034783,,Sam Sicilia,Aaron Phillips,https://en.wikipedia.org/wiki/UFC_173,Sam Sicilia,Aaron Phillips,Featherweight,Sam Sicilia,def,Aaron Phillips,"Decision (unanimous) (29–28, 29–28, 30–27)",3.0,5:00,,Preliminary card (UFC Fight Pass),https://en.wikipedia.org/wiki/Sam_Sicilia,,11,Unanimous Decision,Prelim,,,2014-05-24,273,UFC 173: Barão vs. Dillashaw,2.3,2.25,2.25,2.28,,2.25,2.3,,2.32,2.3,2.3,,2.283333
3,Aaron Riley,Joe Brammer,https://www.bestfightodds.com/events/ufc-114-j...,Aaron Riley,1.526316,,,,1.512821,1.540541,,1.507614,1.526316,,1.540541,,1.525691,0.032531,,0.053174,0.032531,,0.04128,0.04128,,0.02431,,,,Aaron Riley,Joe Brammer,https://en.wikipedia.org/wiki/UFC_114,Aaron Riley,Joe Brammer,Lightweight,Aaron Riley,def,Joe Brammer,"Decision (unanimous) (30–27, 30–27, 30–27)",3.0,5:00,,Preliminary card,https://en.wikipedia.org/wiki/Aaron_Riley,,10,Unanimous Decision,Prelim,,,2010-05-29,152,UFC 114: Rampage vs. Evans,2.65,,2.55,2.65,,2.55,2.55,,2.77,,,,2.62
4,Aaron Riley,Jorge Gurgel,https://www.bestfightodds.com/events/ufc-91-co...,Aaron Riley,1.952381,,,,1.909091,1.869565,,2.1,1.952381,2.0,1.952381,,1.962257,0.047079,,0.069264,0.067751,,0.069767,0.067751,,0.021645,0.08159,,,Aaron Riley,Jorge Gurgel,https://en.wikipedia.org/wiki/UFC_91,Aaron Riley,Jorge Gurgel,Lightweight,Aaron Riley,def,Jorge Gurgel,"Decision (unanimous) (29–28, 29–28, 29–28)",3.0,5:00,,Preliminary card,https://en.wikipedia.org/wiki/Aaron_Riley,https://en.wikipedia.org/wiki/Jorge_Gurgel,6,Unanimous Decision,Prelim,,,2008-11-15,119,UFC 91: Couture vs. Lesnar,1.869565,,1.833333,1.8,,1.869565,1.8,,1.833333,1.719424,,,1.817889
