# Get Closing Odds From Odds Time Series

Bestfightodds periodically scrapes the various betting sites and posts new odds only when they are updated. Because some betting sites off live betting whereas others don't, the final quoted odds accross sites might not be comparable. As such, I pull pre-fight closing odds as of 12 hours before the final time odds are posted for the UFC card/event by any site. This should closely approximate betting just before the fight (after weigh-ins).

In [125]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
pd.set_option('display.max_columns', 500)
from zipfile import ZipFile
from datetime import timedelta

In [126]:
# load all odds. Since I'm just creating a mapping I only need one row per fight
zip_file = ZipFile('../../data/bestfightodds_data/straight_bets.zip')
dfs = [pd.read_csv(zip_file.open(text_file.filename)) for text_file in zip_file.infolist() 
       if text_file.filename.endswith('.csv')]
df = pd.concat(dfs, sort = False)
del dfs

# get all fights that actually happened and their outcomes
actualbets = pd.read_csv('../../data/bestfightodds_data/outcome_mapping_bfodds_to_wiki.csv')

df = pd.merge(df, actualbets[['fighter1', 'fighter2', 'fight_odds_url']],
             left_on = ['fighter1', 'fighter2', 'url'],
             right_on = ['fighter1', 'fighter2', 'fight_odds_url']
             )
df['Date'] = pd.to_datetime(df['dates'], unit = 'ms')
del df['url']
display(df.head())

Unnamed: 0,Bet,betsite,dates,odds,fighter1,fighter2,class,fight_odds_url,Date
0,Fabio Maldonado,5Dimes,1400173028000,335,Fabio Maldonado,Stipe Miocic,even,https://www.bestfightodds.com/events/ufc-the-u...,2014-05-15 16:57:08
1,Fabio Maldonado,5Dimes,1400176811000,420,Fabio Maldonado,Stipe Miocic,even,https://www.bestfightodds.com/events/ufc-the-u...,2014-05-15 18:00:11
2,Fabio Maldonado,5Dimes,1400635445000,350,Fabio Maldonado,Stipe Miocic,even,https://www.bestfightodds.com/events/ufc-the-u...,2014-05-21 01:24:05
3,Fabio Maldonado,5Dimes,1401040266000,400,Fabio Maldonado,Stipe Miocic,even,https://www.bestfightodds.com/events/ufc-the-u...,2014-05-25 17:51:06
4,Fabio Maldonado,5Dimes,1401472446000,450,Fabio Maldonado,Stipe Miocic,even,https://www.bestfightodds.com/events/ufc-the-u...,2014-05-30 17:54:06


In [127]:
# get time odds quotesd will be taken from. 12 hours before final odd quoted for a UFC card by any site
df['odds_close_time'] = df.groupby('fight_odds_url')['Date'].transform("max") - timedelta(hours = 12)

# confirm all sites had odds quoted prior to this time and not too far behind it (to avoid weird anomalies where maybe 
#                                                                                 bets stopped being offered prior to the time)
df['recent_odds_window']= df['odds_close_time'] - timedelta(days = 7)
has_odds = df.groupby(['fight_odds_url', 'betsite', 'fighter1', 'fighter2', 'Bet']).apply(lambda x: (((x['Date'] <= x['odds_close_time'])   
                                                                     & (x['Date'] >= x['recent_odds_window'])
                                                                                         ).any())).reset_index()
has_odds.rename(columns = {0:'has_bet'}, inplace = True)

In [128]:
sum(has_odds.has_bet)/has_odds.shape[0]

0.9745024106811719

In [129]:
# get latest price quote prior to odds close time 12 hr after last card quote
# this is my definition of pre-fight odds.
# to avoid garbage data I require that the quote be no more than a week before the fight
# maybe sites stopped offering bets at some point if their quote hasn't 
# updated

out = df[(df['Date'] <= df['odds_close_time']) & 
         (df['Date'] >= df['recent_odds_window'])]
byval = ['fighter1', 'fighter2', 'fight_odds_url', 'Bet', 'betsite', 'Date']
out = out.sort_values(byval).groupby(byval[0:-1]).nth(-1).reset_index()

In [130]:
hasbothsides = out.groupby(['fighter1', 'fighter2', 'fight_odds_url', 'betsite']).size().reset_index()
hasbothsides.rename(columns = {0:'size'}, inplace = True)
print(sum(hasbothsides['size'] ==2)/hasbothsides.shape[0])
print(out.shape)
out['bothsidessize'] = out.groupby(['fighter1', 'fighter2', 'fight_odds_url', 'betsite'])['betsite'].transform(np.size)
out = out[out['bothsidessize'] == 2]
print(out.shape)
del out['bothsidessize']

0.9997674516933744
(94593, 11)
(94582, 12)


In [131]:
keepcol = ['fighter1', 'fighter2', 'fight_odds_url',  'Bet']

finaldf1 = out[keepcol+['odds', 'betsite']].pivot_table(values = ['odds'], columns = ['betsite'], 
                    index = keepcol,
                    aggfunc='first').reset_index()
finaldf1.columns = [a if b == '' else b for (a, b) in finaldf1.columns] 

In [132]:
finaldf1.rename({'William\xa0H.':'William_H', 'SportsInt.':'SportsInt'}, inplace = True, axis = 1)
betting_sites= ['5Dimes','BetDSI','BookMaker','SportBet','Bet365','Bovada', 
                'Sportsbook','William_H','Pinnacle','SportsInt','BetOnline','Intertops']
for col in betting_sites:
    finaldf1[col] = pd.to_numeric(finaldf1[col])
    finaldf1[col] = np.where(finaldf1[col] > 0, 1.0 + finaldf1[col]/100.0, 1.0 - 100.0 / finaldf1[col])
finaldf1['meanodds'] = finaldf1[betting_sites].mean(axis=1)

In [133]:
finaldf1.head()

Unnamed: 0,fighter1,fighter2,fight_odds_url,Bet,5Dimes,Bet365,BetDSI,BetOnline,BookMaker,Bovada,Intertops,Pinnacle,SportBet,SportsInt,Sportsbook,William_H,meanodds
0,Aalon Cruz,Spike Carlyle,https://www.bestfightodds.com/events/ufc-on-es...,Aalon Cruz,1.540541,1.571429,1.510204,1.534759,1.5,1.5,1.526316,1.515464,1.537634,1.555556,1.540541,1.5,1.527704
1,Aalon Cruz,Spike Carlyle,https://www.bestfightodds.com/events/ufc-on-es...,Spike Carlyle,2.7,2.5,2.63,2.62,2.58,2.6,2.5,2.65,2.69,2.4,2.5,2.6,2.580833
2,Aaron Phillips,Matt Hobar,https://www.bestfightodds.com/events/ufc-fight...,Aaron Phillips,2.88,,2.8,2.8,2.8,2.75,,2.83,2.83,2.7,2.8,,2.798889
3,Aaron Phillips,Matt Hobar,https://www.bestfightodds.com/events/ufc-fight...,Matt Hobar,1.487805,,1.47619,1.47619,1.47619,1.444444,,1.490196,1.47619,1.45045,1.444444,,1.469122
4,Aaron Phillips,Sam Sicilia,https://www.bestfightodds.com/events/ufc-173-b...,Aaron Phillips,2.3,,2.25,2.3,2.25,2.3,,2.32,2.28,2.3,2.3,,2.288889


In [134]:

finaldf2 = pd.merge(finaldf1, actualbets,
                    on = ['fighter1','fighter2', 'fight_odds_url'],
                   validate = "m:1")

finaldf2['bet_won'] = np.where((finaldf2['Bet'] == finaldf2['bestfightodds_winner']) &
                               (finaldf2['Outcome'] == 'def'),
                              1,0)
display(finaldf2.head())
finaldf2.to_csv('../../data/datasets_for_analysis/final_datasets/odds_w_outcomes.csv', index = False)

Unnamed: 0,fighter1,fighter2,fight_odds_url,Bet,5Dimes,Bet365,BetDSI,BetOnline,BookMaker,Bovada,Intertops,Pinnacle,SportBet,SportsInt,Sportsbook,William_H,meanodds,Winner_Cleaned,Loser_Cleaned,wiki_url,bestfightodds_winner,bestfightodds_loser,WeightClass,Winner,Outcome,Loser,Method,Round,Time,Notes,Card,Winner_url,Loser_url,event_order,Method_Cleaned,Card_Cleaned,Champion,Interim_Champion,Card_Date,#,Event,bet_won
0,Aalon Cruz,Spike Carlyle,https://www.bestfightodds.com/events/ufc-on-es...,Aalon Cruz,1.540541,1.571429,1.510204,1.534759,1.5,1.5,1.526316,1.515464,1.537634,1.555556,1.540541,1.5,1.527704,Spike Carlyle,Aalon Cruz,https://en.wikipedia.org/wiki/UFC_Fight_Night:...,Spike Carlyle,Aalon Cruz,Featherweight,Spike Carlyle,def,Aalon Cruz,TKO (punches),1.0,1:25,,Preliminary card (ESPN+),,,11,KO,Prelim,,,2020-02-29,511,UFC Fight Night: Benavidez vs. Figueiredo,0
1,Aalon Cruz,Spike Carlyle,https://www.bestfightodds.com/events/ufc-on-es...,Spike Carlyle,2.7,2.5,2.63,2.62,2.58,2.6,2.5,2.65,2.69,2.4,2.5,2.6,2.580833,Spike Carlyle,Aalon Cruz,https://en.wikipedia.org/wiki/UFC_Fight_Night:...,Spike Carlyle,Aalon Cruz,Featherweight,Spike Carlyle,def,Aalon Cruz,TKO (punches),1.0,1:25,,Preliminary card (ESPN+),,,11,KO,Prelim,,,2020-02-29,511,UFC Fight Night: Benavidez vs. Figueiredo,1
2,Aaron Phillips,Matt Hobar,https://www.bestfightodds.com/events/ufc-fight...,Aaron Phillips,2.88,,2.8,2.8,2.8,2.75,,2.83,2.83,2.7,2.8,,2.798889,Matt Hobar,Aaron Phillips,https://en.wikipedia.org/wiki/UFC_Fight_Night:...,Matt Hobar,Aaron Phillips,Bantamweight,Matt Hobar,def,Aaron Phillips,"Decision (unanimous) (29–28, 29–28, 29–28)",3.0,5:00,,Preliminary card (Fox Sports 2),https://en.wikipedia.org/wiki/Matt_Hobar,,9,Unanimous Decision,Prelim,,,2014-08-23,287,UFC Fight Night: Henderson vs. dos Anjos,0
3,Aaron Phillips,Matt Hobar,https://www.bestfightodds.com/events/ufc-fight...,Matt Hobar,1.487805,,1.47619,1.47619,1.47619,1.444444,,1.490196,1.47619,1.45045,1.444444,,1.469122,Matt Hobar,Aaron Phillips,https://en.wikipedia.org/wiki/UFC_Fight_Night:...,Matt Hobar,Aaron Phillips,Bantamweight,Matt Hobar,def,Aaron Phillips,"Decision (unanimous) (29–28, 29–28, 29–28)",3.0,5:00,,Preliminary card (Fox Sports 2),https://en.wikipedia.org/wiki/Matt_Hobar,,9,Unanimous Decision,Prelim,,,2014-08-23,287,UFC Fight Night: Henderson vs. dos Anjos,1
4,Aaron Phillips,Sam Sicilia,https://www.bestfightodds.com/events/ufc-173-b...,Aaron Phillips,2.3,,2.25,2.3,2.25,2.3,,2.32,2.28,2.3,2.3,,2.288889,Sam Sicilia,Aaron Phillips,https://en.wikipedia.org/wiki/UFC_173,Sam Sicilia,Aaron Phillips,Featherweight,Sam Sicilia,def,Aaron Phillips,"Decision (unanimous) (29–28, 29–28, 30–27)",3.0,5:00,,Preliminary card (UFC Fight Pass),https://en.wikipedia.org/wiki/Sam_Sicilia,,11,Unanimous Decision,Prelim,,,2014-05-24,273,UFC 173: Barão vs. Dillashaw,0
