# Get Closing Odds From Odds Time Series

Bestfightodds periodically scrapes the various betting sites and posts new odds only when they are updated. Because some betting sites off live betting whereas others don't, the final quoted odds accross sites might not be comparable. As such, I pull pre-fight closing odds as of 12 hours before the final time odds are posted for the UFC card/event by any site. This should closely approximate betting just before the fight (after weigh-ins).

In [27]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
pd.set_option('display.max_columns', 500)
from zipfile import ZipFile
from datetime import timedelta

In [28]:
# load all odds
zip_file = ZipFile('../../data/bestfightodds_data/straight_bets.zip')
dfs = [pd.read_csv(zip_file.open(text_file.filename)) for text_file in zip_file.infolist() 
       if text_file.filename.endswith('.csv')]
df = pd.concat(dfs)

In [29]:
# merge card date onto the bets and filter to only card as of 2008 or later
cards = pd.read_csv("../../data/bestfightodds_data/bestfightodds_urls.csv")
cards.rename(columns = {'Date':'Card_Date'}, inplace = True)
df = pd.merge(cards[['Card_Date', 'fight_odds_url']], df, left_on = 'fight_odds_url', right_on = 'url', validate = "1:m")
df['Card_Date'] = pd.to_datetime(df['Card_Date'] )
df = df.loc[df.Card_Date.dt.year >= 2008, :]

In [32]:
df['Date'] = pd.to_datetime(df['dates'], unit = 'ms')
display(df.head())

Unnamed: 0,Card_Date,fight_odds_url,Bet,betsite,dates,odds,fighter1,fighter2,class,url,Date,odds_close_time
0,2020-03-14,https://www.bestfightodds.com/events/ufc-on-es...,Charles Oliveira,5Dimes,1581348609000,-140,Charles Oliveira,Kevin Lee,even,https://www.bestfightodds.com/events/ufc-on-es...,2020-02-10 15:30:09,2020-03-14 12:18:09
1,2020-03-14,https://www.bestfightodds.com/events/ufc-on-es...,Charles Oliveira,5Dimes,1581348790000,-120,Charles Oliveira,Kevin Lee,even,https://www.bestfightodds.com/events/ufc-on-es...,2020-02-10 15:33:10,2020-03-14 12:18:09
2,2020-03-14,https://www.bestfightodds.com/events/ufc-on-es...,Charles Oliveira,5Dimes,1581351854000,100,Charles Oliveira,Kevin Lee,even,https://www.bestfightodds.com/events/ufc-on-es...,2020-02-10 16:24:14,2020-03-14 12:18:09
3,2020-03-14,https://www.bestfightodds.com/events/ufc-on-es...,Charles Oliveira,5Dimes,1581361215000,115,Charles Oliveira,Kevin Lee,even,https://www.bestfightodds.com/events/ufc-on-es...,2020-02-10 19:00:15,2020-03-14 12:18:09
4,2020-03-14,https://www.bestfightodds.com/events/ufc-on-es...,Charles Oliveira,5Dimes,1581361392000,120,Charles Oliveira,Kevin Lee,even,https://www.bestfightodds.com/events/ufc-on-es...,2020-02-10 19:03:12,2020-03-14 12:18:09


In [None]:
# get time odds quotesd will be taken from. 12 hours before final odd quoted for a UFC card by any site
df['odds_close_time'] = df.groupby('url')['Date'].transform("max") - timedelta(hours = 12)

# confirm all sites had odds quoted prior to this time and not too far behind it (to avoid weird anomalies where maybe 
#                                                                                 bets stopped being offered prior to the time)
df['recent_odds_window']= df['odds_close_time'] - timedelta(days = 3)
has_odds = df.groupby(['url', 'betsite', 'fighter1', 'fighter2', 'Bet']).apply(lambda x: ((x['Date'] <= x['odds_close_time']) &  
                                                                    (x['Date'] >= x['recent_odds_window'])).any()
                                                                   )