# Import Packages, Set some Options

In [1]:
import datetime
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import csv

import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

options = Options()
#This will run Selenium headless, meaning you won't see the browser window open. If you want to see the browser open, set it to False
options.headless = True

pd.set_option('display.max_rows', 1000)

# Define a couple functions for data analysis

In [3]:
#Returns the difference between the max and the minimum of a set of values
def max_minus_min(x): return max(x) - min(x)
#Returns the difference between the last and the average of a set of values
def last_minus_avg(x): return last(x) - mean(x)

# Build the Bovada Scraping Function

In [4]:
#in this function, we pass in a url that we wills scrape and a csv file that will be used to store/retrieve data
def bovada_scrape(url, file):
    #initiate selenium, you'll need to point to a local chromedriver you've downloaded
    browser = webdriver.Chrome('/Users/aaronsmith/chromedriver', options=options)
    browser.get(url)
    browser.implicitly_wait(5)

    #here we are retrieving elements from the page based on their class name
    title = browser.find_elements_by_class_name('market-header')
    heading = browser.find_elements_by_class_name('game-heading')
    sub_title = browser.find_elements_by_class_name('market-name')
    outcomes = browser.find_elements_by_class_name('outcomes')
    bet_price = browser.find_elements_by_class_name('bet-price')
    #storing current timestamp so we can see data over time
    now = datetime.datetime.now()

    #checking to see if there is an input file. There should be, unless this is the first time checking this url
    try:
        in_df = pd.read_csv(file)
    except Exception:
        try:
            del in_df
        except Exception:
            pass

    #initiating lists for the data we want...outcome, bet_price and title
    o = []
    b = []
    t = []

    #for all the objects returned by selenium, will add them to lists
    for i in range(len(outcomes)):
        o.append(outcomes[i].text)
        b.append(bet_price[i].text)
        t.append(title[0].text)
    #create a dataframe from the lists
    df = pd.DataFrame({'outcomes':o,'bet_price':b, 'title':t})
    #add a field for the current timestamp
    df['date'] = now
    #replace "EVEN" with zero and store data as a float
    df['bet_price']=df['bet_price'].replace('EVEN', '0')
    df.bet_price = df.bet_price.astype(float)

    #append this dataframe we created with our historical data
    try:
        df = in_df.append(df)
    except Exception:
        pass

    #save off the file, overwriting the existing
    df.to_csv(file, index = None)
    df = pd.read_csv(file)

    #close the browser
    browser.close()

    #return the dataframe so it can be used for visualization and data analysis
    return df

# Now we need to provide a bovada url and a filename to return the data

In [6]:
url = "https://www.bovada.lv/sports/baseball/mlb/2019-world-series-odds-to-win-201910240000"
file = "mlb_new.csv"
#return the response to a dataframe so that it can be used
df_mlb = bovada_scrape(url, file)
df_mlb.head(5)

Unnamed: 0,outcomes,bet_price,title,date
0,Houston Astros,225.0,2019 World Series - Odds To Win,2019-08-08 09:51:22.544719
1,Los Angeles Dodgers,275.0,2019 World Series - Odds To Win,2019-08-08 09:51:22.544719
2,New York Yankees,425.0,2019 World Series - Odds To Win,2019-08-08 09:51:22.544719
3,Atlanta Braves,850.0,2019 World Series - Odds To Win,2019-08-08 09:51:22.544719
4,Minnesota Twins,1500.0,2019 World Series - Odds To Win,2019-08-08 09:51:22.544719
