# Web Scraping UnderDog

In [260]:
from selenium import webdriver
import pandas as pd
from bs4 import BeautifulSoup
import re
import json
import numpy as np
import os, sys

In [261]:
main_dir = os.path.normpath(os.getcwd() + os.sep + os.pardir)

In [262]:
driver = webdriver.Chrome(main_dir + "/UnderDog/chromedriver")

  """Entry point for launching an IPython kernel.


In [263]:
driver.get('https://underdogfantasy.com/pick-em/higher-lower')

In [264]:
content = driver.page_source
soup = BeautifulSoup(content, "html.parser")

In [265]:
results = soup.find_all("div", class_="styles__overUnderCell__KgzNn")
dataframe = pd.DataFrame(columns = ["player_name","prop_type","UnderDog Line"])
for a in results:
    name_line = a.find_all("div", class_="styles__overUnderCell__qdEk_")
    name = a.find("h1", class_="styles__playerName__jW6mb").text.strip()
    line = a.find("p",class_="styles__statLine__K1NYh").text.strip()
    for x in name_line:
        line = x.find("p",class_="styles__statLine__K1NYh").text.strip()
        bet_type = re.split('\d*\.?\d+',line)[1].strip()
        line = re.findall('\d*\.?\d+',line)[0]
        newRow = [name,bet_type,line]
        dataframe.loc[len(dataframe)] = newRow

dataframe['player_name'] = dataframe['player_name'].str.lower()
dataframe['prop_type'] = dataframe['prop_type'].str.lower()

In [266]:

#NFL Props
dataframe['prop_type'] = np.where(dataframe['prop_type']=='rushing yards', 'rush yds', 
                           np.where(dataframe['prop_type']=='rush yards', 'rush yds', 
                            np.where(dataframe['prop_type']=='passing yards', 'pass yds',
                            np.where(dataframe['prop_type']=='passing attempts', 'pass attempts',
                            np.where(dataframe['prop_type'] =='receiving yards', 'receiving yds',                 
                                            dataframe['prop_type'])))))
#MLB Props
dataframe['prop_type'] = np.where(dataframe['prop_type']=='strikeouts', 'total strikeouts', 
                                  dataframe['prop_type'])

#NHL Props
dataframe['prop_type'] = np.where(dataframe['prop_type']=='shots', 'shots on goal', 
                                  dataframe['prop_type'])

#NBA Props
dataframe['prop_type'] = np.where(dataframe['prop_type']=='pts + rebs + asts', 'pts+rebs+asts',
                                  np.where(dataframe['prop_type']=='blocks + steals', 'steals+blocks', 
                                      dataframe['prop_type']))

# Functions That Will Be Called

In [267]:
def prop_type(string):
    type_ = re.findall('(.+?)\(', string)[1].lower().strip(')')
    return type_

def player_name(string):
    name = re.findall('(.+?)\(', string)[0].lower().strip()
    return name

In [268]:
def get_picks(pp, pin, league):
    df = pp[pp['league']==league]
    df = df.merge(pin, how='left', on=['player_name', 'prop_type'])
    df.dropna(inplace=True)
    df.drop(columns=['prop_id', 'over_id', 'under_id', 'under_points'], inplace=True)
    df.rename(columns={'over_points':'pinnacle_line'}, inplace=True)
    
    df['favored'] = np.where(df['min_price']==df['over_price'], 'over', 'under')
    
    df['edge'] = np.where((df['pinnacle_line']<df['pp_line'])&(df['favored']=='under'), 'edge',
                         np.where((df['pinnacle_line']>df['pp_line'])&(df['favored']=='over'), 'edge',
                                 np.where(df['pinnacle_line']==df['pp_line'], 'equal',
                                         np.where((df['pinnacle_line']<df['pp_line'])&(df['favored']=='over'), 'buffer',
                                                 np.where((df['pinnacle_line']>df['pp_line'])&(df['favored']=='under'), 'buffer', 'else')))))
    df.sort_values(by='min_price', inplace=True)
    return df

def matchup_cleaning(data):
    player_props = []
    for i, d in enumerate(data['data']):
        try:
            if d['special']['category'].lower() == 'player props':
                data = {}
                data['prop_name'] = d['special']['description']
                data['prop_id'] = d['id']
                data['over_id'] = d['participants'][0]['id']
                data['under_id'] = d['participants'][1]['id']
                player_props.append(data)        
                
            
        except:
            continue
    return player_props


def straight_cleaning(data):
    prop_prices = []
    for d in data['data']:
        try:

            row = {}
            row['prop_id'] = d['matchupId']

            row['over_id'] = d['prices'][0]['participantId']
            row['over_points'] = d['prices'][0]['points']
            row['over_price'] = d['prices'][0]['price']

            row['under_id'] = d['prices'][1]['participantId']
            row['under_points'] = d['prices'][1]['points']
            row['under_price'] = d['prices'][1]['price']   

            prop_prices.append(row)

        except:
            continue
    return prop_prices

def df_manipulation(player_props,prop_prices):
    prop_df = pd.DataFrame(player_props)
    prices_df = pd.DataFrame(prop_prices)
    
    temp_df = pd.merge(prop_df, prices_df, how='left', on=['prop_id', 'over_id', 'under_id'])
    temp_df.dropna(inplace=True)
    temp_df['min_price'] = np.where(temp_df['over_price']<=temp_df['under_price'], 
                                   temp_df['over_price'], temp_df['under_price'])
    
    temp_df['player_name'] = temp_df['prop_name'].apply(player_name)
    temp_df['prop_type'] = temp_df['prop_name'].apply(prop_type)
    temp_df.drop(columns='prop_name', inplace=True)
    temp_df.drop_duplicates(inplace=True)
    
    return temp_df


# Pinnacle NFL Scraping

In [269]:
f = open(main_dir + "/Pinnacle/NFL/nfl_matchups.json")

data = json.load(f)
player_props = matchup_cleaning(data)

f.close()

In [270]:
f = open(main_dir + "/Pinnacle/NFL/nfl_straight.json")

data = json.load(f)
prop_prices = straight_cleaning(data)

f.close()

In [271]:
nfl_df = df_manipulation(player_props,prop_prices)

In [272]:
nfl_final = nfl_df.merge(dataframe, how='right', on=['player_name', 'prop_type'])
nfl_final.dropna(inplace = True)
nfl_final = nfl_final.sort_values(by = 'min_price', ascending = True)
nfl_final.drop(columns=['prop_id', 'over_id', 'under_id', 'under_points'], inplace=True)
nfl_final.rename(columns={'over_points':'pinnacle_line'}, inplace=True)
nfl_final['favored'] = np.where(nfl_final['min_price']==nfl_final['over_price'], 'over', 'under')
nfl_final.sort_values(by='min_price', inplace=True)

In [273]:
nfl_final

Unnamed: 0,pinnacle_line,over_price,under_price,min_price,player_name,prop_type,UnderDog Line,favored
19,2.5,129.0,-175.0,-175.0,tre'quan smith,receptions,2.0,under
35,2.5,129.0,-173.0,-173.0,a.j. green,receptions,2.0,under
53,34.5,120.0,-159.0,-159.0,daniel jones,rush yds,34.5,under
73,223.5,120.0,-158.0,-158.0,taylor heinicke,pass yds,225.5,under
31,4.5,-157.0,117.0,-157.0,rondale moore,receptions,5.0,over
15,25.5,-151.0,114.0,-151.0,juwan johnson,receiving yds,25.5,over
17,32.5,109.0,-144.0,-144.0,taysom hill,rush yds,33.5,under
33,4.5,-141.0,107.0,-141.0,zach ertz,receptions,5.0,over
16,2.5,105.0,-140.0,-140.0,juwan johnson,receptions,2.0,under
34,24.5,105.0,-139.0,-139.0,a.j. green,receiving yds,24.5,under


# Pinnacle MLB Scraping

In [274]:
f = open(main_dir + "/Pinnacle/MLB/mlb_matchups.json")
  
data = json.load(f)
player_props = matchup_cleaning(data)

f.close()

In [275]:
f = open(main_dir + "/Pinnacle/MLB/mlb_straight.json")
  
data = json.load(f)
prop_prices = straight_cleaning(data)

f.close()

In [276]:
mlb_df = df_manipulation(player_props, prop_prices)

In [277]:
mlb_final = mlb_df.merge(dataframe, how='right', on=['player_name', 'prop_type'])
mlb_final.dropna(inplace = True)
mlb_final = mlb_final.sort_values(by = 'min_price', ascending = True)
mlb_final.drop(columns=['prop_id', 'over_id', 'under_id', 'under_points'], inplace=True)
mlb_final.rename(columns={'over_points':'pinnacle_line'}, inplace=True)
mlb_final['favored'] = np.where(mlb_final['min_price']==mlb_final['over_price'], 'over', 'under')
mlb_final.sort_values(by='min_price', inplace=True)

In [278]:
mlb_final

Unnamed: 0,pinnacle_line,over_price,under_price,min_price,player_name,prop_type,UnderDog Line,favored


# Pinnacle NHL Scraping

In [279]:
f = open(main_dir + "/Pinnacle/NHL/nhl_matchups.json")
  
data = json.load(f)
player_props = matchup_cleaning(data)

f.close()

In [280]:
f = open(main_dir + "/Pinnacle/NHL/nhl_straight.json")
  
data = json.load(f)
prop_prices = straight_cleaning(data)

f.close()

In [281]:
nhl_df = df_manipulation(player_props, prop_prices)

KeyError: 'prop_id'

In [282]:
nhl_final = nhl_df.merge(dataframe, how='right', on=['player_name', 'prop_type'])
nhl_final.dropna(inplace = True)
nhl_final = nhl_final.sort_values(by = 'min_price', ascending = True)
nhl_final.drop(columns=['prop_id', 'over_id', 'under_id', 'under_points'], inplace=True)
nhl_final.rename(columns={'over_points':'pinnacle_line'}, inplace=True)
nhl_final['favored'] = np.where(nhl_final['min_price']==nhl_final['over_price'], 'over', 'under')
nhl_final.sort_values(by='min_price', inplace=True)
nhl_final

Unnamed: 0,pinnacle_line,over_price,under_price,min_price,player_name,prop_type,UnderDog Line,favored


# Pinnacle NBA Scraping

In [283]:
# Opening JSON file
f = open(main_dir + "/Pinnacle/NBA/nba_matchups.json")

data = json.load(f)
player_props = matchup_cleaning(data)

f.close()

In [284]:
f = open(main_dir + "/Pinnacle/NBA/nba_straight.json")
  
data = json.load(f)
prop_prices = straight_cleaning(data)

f.close()

In [285]:
nba_df = df_manipulation(player_props, prop_prices)
nba_df['prop_type'].unique()

array(['steals+blocks', 'double+double', 'turnovers', 'blocks', 'assists',
       'points', '3 point fg', 'pts+rebs+asts', 'rebounds'], dtype=object)

In [286]:
nba_final = nba_df.merge(dataframe, how = 'right', on = ['player_name','prop_type'])
nba_final.dropna(inplace = True)
nba_final.drop(columns=['prop_id', 'over_id', 'under_id', 'under_points'], inplace=True)
nba_final.rename(columns={'over_points':'pinnacle_line'}, inplace=True)
nba_final['favored'] = np.where(nba_final['min_price']==nba_final['over_price'], 'over', 'under')
nba_final = nba_final.sort_values(by='min_price', ascending=True)
nba_final

Unnamed: 0,pinnacle_line,over_price,under_price,min_price,player_name,prop_type,UnderDog Line,favored
146,6.5,118.0,-159.0,-159.0,james harden,rebounds,6.0,under
182,5.5,-157.0,117.0,-157.0,lebron james,assists,6.0,over
133,4.5,-145.0,109.0,-145.0,brook lopez,rebounds,4.5,over
165,7.5,107.0,-142.0,-142.0,paul george,rebounds,7.5,under
164,4.5,104.0,-138.0,-138.0,paul george,assists,4.5,under
142,11.5,103.0,-136.0,-136.0,joel embiid,rebounds,11.5,under
145,9.5,103.0,-136.0,-136.0,james harden,assists,9.5,under
191,9.5,102.0,-135.0,-135.0,anthony davis,rebounds,9.5,under
183,8.5,-132.0,-101.0,-132.0,lebron james,rebounds,8.5,over
188,1.5,-130.0,-102.0,-130.0,anthony davis,blocks,1.5,over


# All Sports Dataframe

In [287]:
full_df = nhl_final.append(nfl_final).append(mlb_final).append(nba_final)
pd.set_option('display.max_rows',None)
full_df.sort_values(by = 'min_price', ascending = True)

Unnamed: 0,pinnacle_line,over_price,under_price,min_price,player_name,prop_type,UnderDog Line,favored
19,2.5,129.0,-175.0,-175.0,tre'quan smith,receptions,2.0,under
35,2.5,129.0,-173.0,-173.0,a.j. green,receptions,2.0,under
146,6.5,118.0,-159.0,-159.0,james harden,rebounds,6.0,under
53,34.5,120.0,-159.0,-159.0,daniel jones,rush yds,34.5,under
73,223.5,120.0,-158.0,-158.0,taylor heinicke,pass yds,225.5,under
31,4.5,-157.0,117.0,-157.0,rondale moore,receptions,5.0,over
182,5.5,-157.0,117.0,-157.0,lebron james,assists,6.0,over
15,25.5,-151.0,114.0,-151.0,juwan johnson,receiving yds,25.5,over
133,4.5,-145.0,109.0,-145.0,brook lopez,rebounds,4.5,over
17,32.5,109.0,-144.0,-144.0,taysom hill,rush yds,33.5,under


# Creating Buffers

In [288]:
full_df['UnderDog Line'] = full_df['UnderDog Line'].astype(float)
conditions = [
    full_df['UnderDog Line'] == full_df['pinnacle_line'],
    ((full_df['UnderDog Line'] > full_df['pinnacle_line'])
         & (full_df['favored'] == 'under')),
    ((full_df['UnderDog Line'] < full_df['pinnacle_line'])
         & (full_df['favored'] == 'over')),
    full_df['UnderDog Line'] != full_df['pinnacle_line']
]
values = [
    'equal','edge', 'edge', 'buffer'
]
full_df['edge'] = np.select(conditions, values)
full_df

Unnamed: 0,pinnacle_line,over_price,under_price,min_price,player_name,prop_type,UnderDog Line,favored,edge
19,2.5,129.0,-175.0,-175.0,tre'quan smith,receptions,2.0,under,buffer
35,2.5,129.0,-173.0,-173.0,a.j. green,receptions,2.0,under,buffer
53,34.5,120.0,-159.0,-159.0,daniel jones,rush yds,34.5,under,equal
73,223.5,120.0,-158.0,-158.0,taylor heinicke,pass yds,225.5,under,edge
31,4.5,-157.0,117.0,-157.0,rondale moore,receptions,5.0,over,buffer
15,25.5,-151.0,114.0,-151.0,juwan johnson,receiving yds,25.5,over,equal
17,32.5,109.0,-144.0,-144.0,taysom hill,rush yds,33.5,under,edge
33,4.5,-141.0,107.0,-141.0,zach ertz,receptions,5.0,over,buffer
16,2.5,105.0,-140.0,-140.0,juwan johnson,receptions,2.0,under,buffer
34,24.5,105.0,-139.0,-139.0,a.j. green,receiving yds,24.5,under,equal


# Dropping Buffers

In [289]:
no_buffer_df = full_df[full_df['edge']!= 'buffer']
no_buffer_df

Unnamed: 0,pinnacle_line,over_price,under_price,min_price,player_name,prop_type,UnderDog Line,favored,edge
53,34.5,120.0,-159.0,-159.0,daniel jones,rush yds,34.5,under,equal
73,223.5,120.0,-158.0,-158.0,taylor heinicke,pass yds,225.5,under,edge
15,25.5,-151.0,114.0,-151.0,juwan johnson,receiving yds,25.5,over,equal
17,32.5,109.0,-144.0,-144.0,taysom hill,rush yds,33.5,under,edge
34,24.5,105.0,-139.0,-139.0,a.j. green,receiving yds,24.5,under,equal
58,13.5,105.0,-139.0,-139.0,trevor lawrence,rush yds,14.5,under,edge
83,20.5,104.0,-138.0,-138.0,rachaad white,rush yds,20.5,under,equal
11,4.5,100.0,-133.0,-133.0,chris olave,receptions,4.5,under,equal
90,55.5,-100.0,-132.0,-132.0,christian mccaffrey,rush yds,55.5,under,equal
38,0.5,-100.0,-132.0,-132.0,marcus mariota,interceptions,0.5,under,equal
