# Web Scraping UnderDog

In [8]:
from selenium import webdriver
import pandas as pd
from bs4 import BeautifulSoup
import re
import json
import numpy as np
import os, sys

In [9]:
main_dir = os.path.normpath(os.getcwd() + os.sep + os.pardir)

In [10]:
driver = webdriver.Chrome(main_dir + "/UnderDog/chromedriver")

  """Entry point for launching an IPython kernel.


In [11]:
driver.get('https://underdogfantasy.com/pick-em/higher-lower')

In [12]:
content = driver.page_source
soup = BeautifulSoup(content, "html.parser")

In [13]:
results = soup.find_all("div", class_="styles__overUnderCell__KgzNn")
dataframe = pd.DataFrame(columns = ["player_name","prop_type","UnderDog Line"])
for a in results:
    name_line = a.find_all("div", class_="styles__overUnderCell__qdEk_")
    name = a.find("h1", class_="styles__playerName__jW6mb").text.strip()
    line = a.find("p",class_="styles__statLine__K1NYh").text.strip()
    for x in name_line:
        line = x.find("p",class_="styles__statLine__K1NYh").text.strip()
        bet_type = re.split('\d*\.?\d+',line)[1].strip()
        line = re.findall('\d*\.?\d+',line)[0]
        newRow = [name,bet_type,line]
        dataframe.loc[len(dataframe)] = newRow

dataframe['player_name'] = dataframe['player_name'].str.lower()
dataframe['prop_type'] = dataframe['prop_type'].str.lower()

In [14]:

#NFL Props
dataframe['prop_type'] = np.where(dataframe['prop_type']=='rushing yards', 'rush yds', 
                           np.where(dataframe['prop_type']=='rush yards', 'rush yds', 
                            np.where(dataframe['prop_type']=='passing yards', 'pass yds',
                            np.where(dataframe['prop_type']=='passing attempts', 'pass attempts',
                            np.where(dataframe['prop_type'] =='receiving yards', 'receiving yds',                 
                                            dataframe['prop_type'])))))
#MLB Props
dataframe['prop_type'] = np.where(dataframe['prop_type']=='strikeouts', 'total strikeouts', 
                                  dataframe['prop_type'])

#NHL Props
dataframe['prop_type'] = np.where(dataframe['prop_type']=='shots', 'shots on goal', 
                                  dataframe['prop_type'])

# Functions That Will Be Called

In [15]:
def prop_type(string):
    type_ = re.findall('(.+?)\(', string)[1].lower().strip(')')
    return type_

def player_name(string):
    name = re.findall('(.+?)\(', string)[0].lower().strip()
    return name

In [16]:
def get_picks(pp, pin, league):
    df = pp[pp['league']==league]
    df = df.merge(pin, how='left', on=['player_name', 'prop_type'])
    df.dropna(inplace=True)
    df.drop(columns=['prop_id', 'over_id', 'under_id', 'under_points'], inplace=True)
    df.rename(columns={'over_points':'pinnacle_line'}, inplace=True)
    
    df['favored'] = np.where(df['min_price']==df['over_price'], 'over', 'under')
    
    df['edge'] = np.where((df['pinnacle_line']<df['pp_line'])&(df['favored']=='under'), 'edge',
                         np.where((df['pinnacle_line']>df['pp_line'])&(df['favored']=='over'), 'edge',
                                 np.where(df['pinnacle_line']==df['pp_line'], 'equal',
                                         np.where((df['pinnacle_line']<df['pp_line'])&(df['favored']=='over'), 'buffer',
                                                 np.where((df['pinnacle_line']>df['pp_line'])&(df['favored']=='under'), 'buffer', 'else')))))
    df.sort_values(by='min_price', inplace=True)
    return df

# Pinnacle NFL Scraping

In [17]:
# Opening JSON file
f = open(main_dir + "/Pinnacle/NFL/nfl_matchups.json")
  
# returns JSON object as 
# a dictionary
data = json.load(f)
  
# Iterating through the json
# list
player_props = []
#test = []
for i, d in enumerate(data['data']):
    try:
        if d['special']['category'].lower() == 'player props':
            data = {}
            data['prop_name'] = d['special']['description']
            data['prop_id'] = d['id']
            data['over_id'] = d['participants'][0]['id']
            data['under_id'] = d['participants'][1]['id']
            player_props.append(data)        
                
            
    except:
        continue

# Closing file
f.close()
#print(count)

In [18]:
# Opening JSON file
f = open(main_dir + "/Pinnacle/NFL/nfl_straight.json")
  
# returns JSON object as 
# a dictionary
data = json.load(f)
prop_prices = []
for d in data['data']:
    try:

        row = {}
        row['prop_id'] = d['matchupId']
    
        row['over_id'] = d['prices'][0]['participantId']
        row['over_points'] = d['prices'][0]['points']
        row['over_price'] = d['prices'][0]['price']
    
        row['under_id'] = d['prices'][1]['participantId']
        row['under_points'] = d['prices'][1]['points']
        row['under_price'] = d['prices'][1]['price']   
 
        prop_prices.append(row)
    
    except:
        continue


    
# Closing file
f.close()
#print(count)

In [19]:
nfl_propd = pd.DataFrame(player_props)
nfl_prices = pd.DataFrame(prop_prices)

In [20]:
nfl_df = pd.merge(nfl_propd, nfl_prices, how='left', on=['prop_id', 'over_id', 'under_id'])
nfl_df.dropna(inplace=True)
nfl_df['min_price'] = np.where(nfl_df['over_price']<=nfl_df['under_price'], 
                               nfl_df['over_price'], nfl_df['under_price'])
nfl_df

Unnamed: 0,prop_name,prop_id,over_id,under_id,over_points,over_price,under_points,under_price,min_price
0,Rondale Moore (Long Reception)(must play),1561603507,1561603508,1561603509,19.5,-108.0,19.5,-122.0,-122.0
4,Zach Ertz (TD Receptions)(must play),1561528424,1561528425,1561528426,0.5,226.0,0.5,-352.0,-352.0
5,Tre'Quan Smith (Receiving Yds)(must play),1561603385,1561603386,1561603387,27.5,-112.0,27.5,-118.0,-118.0
6,Chris Olave (Receiving Yds)(must play),1561603367,1561603368,1561603369,60.5,-113.0,60.5,-117.0,-117.0
7,Marquez Callaway (Long Reception)(must play),1561603400,1561603401,1561603402,16.5,-119.0,16.5,-111.0,-119.0
8,Kyler Murray (Rush TD)(must play),1561603448,1561603449,1561603450,0.5,239.0,0.5,-392.0,-392.0
9,Tre'Quan Smith (Long Reception)(must play),1561603388,1561603389,1561603390,16.5,-107.0,16.5,-124.0,-124.0
10,DeAndre Hopkins (TD Receptions)(must play),1561603442,1561603443,1561603444,0.5,165.0,0.5,-236.0,-236.0
11,Kyler Murray (Rush Yds)(must play),1561603445,1561603446,1561603447,29.5,-125.0,29.5,-106.0,-125.0
12,Keith Kirkwood (Receiving Yds)(must play),1561603408,1561603409,1561603410,5.5,-118.0,5.5,-112.0,-118.0


In [21]:
nfl_df['player_name'] = nfl_df['prop_name'].apply(player_name)
nfl_df['prop_type'] = nfl_df['prop_name'].apply(prop_type)
nfl_df.drop(columns='prop_name', inplace=True)
nfl_df.drop_duplicates(inplace=True)

In [22]:
nfl_df

Unnamed: 0,prop_id,over_id,under_id,over_points,over_price,under_points,under_price,min_price,player_name,prop_type
0,1561603507,1561603508,1561603509,19.5,-108.0,19.5,-122.0,-122.0,rondale moore,long reception
4,1561528424,1561528425,1561528426,0.5,226.0,0.5,-352.0,-352.0,zach ertz,td receptions
5,1561603385,1561603386,1561603387,27.5,-112.0,27.5,-118.0,-118.0,tre'quan smith,receiving yds
6,1561603367,1561603368,1561603369,60.5,-113.0,60.5,-117.0,-117.0,chris olave,receiving yds
7,1561603400,1561603401,1561603402,16.5,-119.0,16.5,-111.0,-119.0,marquez callaway,long reception
8,1561603448,1561603449,1561603450,0.5,239.0,0.5,-392.0,-392.0,kyler murray,rush td
9,1561603388,1561603389,1561603390,16.5,-107.0,16.5,-124.0,-124.0,tre'quan smith,long reception
10,1561603442,1561603443,1561603444,0.5,165.0,0.5,-236.0,-236.0,deandre hopkins,td receptions
11,1561603445,1561603446,1561603447,29.5,-125.0,29.5,-106.0,-125.0,kyler murray,rush yds
12,1561603408,1561603409,1561603410,5.5,-118.0,5.5,-112.0,-118.0,keith kirkwood,receiving yds


In [23]:
nfl_final = nfl_df.merge(dataframe, how='right', on=['player_name', 'prop_type'])
nfl_final.dropna(inplace = True)
nfl_final = nfl_final.sort_values(by = 'min_price', ascending = True)
nfl_final.drop(columns=['prop_id', 'over_id', 'under_id', 'under_points'], inplace=True)
nfl_final.rename(columns={'over_points':'pinnacle_line'}, inplace=True)
nfl_final['favored'] = np.where(nfl_final['min_price']==nfl_final['over_price'], 'over', 'under')
nfl_final.sort_values(by='min_price', inplace=True)

In [24]:
nfl_final

Unnamed: 0,pinnacle_line,over_price,under_price,min_price,player_name,prop_type,UnderDog Line,favored
629,0.5,-126.0,-105.0,-126.0,kyler murray,interceptions,0.5,over
623,33.5,-123.0,-108.0,-123.0,alvin kamara,receiving yds,34.5,over
621,60.5,-112.0,-119.0,-119.0,alvin kamara,rush yds,64.5,under
633,47.5,-118.0,-112.0,-118.0,zach ertz,receiving yds,48.5,over
632,59.5,-117.0,-113.0,-117.0,deandre hopkins,receiving yds,69.5,over


# Pinnacle MLB Scraping

In [25]:
# Opening JSON file
f = open(main_dir + "/Pinnacle/MLB/mlb_matchups.json")
  
# returns JSON object as 
# a dictionary
data = json.load(f)

# Iterating through the json
# list
player_props = []
#test = []
for i, d in enumerate(data['data']):
    try:
        if d['special']['category'].lower() == 'player props':
            data = {}
            data['prop_name'] = d['special']['description']
            data['prop_id'] = d['id']
            data['over_id'] = d['participants'][0]['id']
            data['under_id'] = d['participants'][1]['id']
            player_props.append(data)        
                
            
    except:
        continue

# Closing file
f.close()

In [27]:
# Opening JSON file
f = open(main_dir + "/Pinnacle/MLB/mlb_straight.json")
  
# returns JSON object as 
# a dictionary
data = json.load(f)
prop_prices = []
for d in data['data']:
    try:

        row = {}
        row['prop_id'] = d['matchupId']
    
        row['over_id'] = d['prices'][0]['participantId']
        row['over_points'] = d['prices'][0]['points']
        row['over_price'] = d['prices'][0]['price']
    
        row['under_id'] = d['prices'][1]['participantId']
        row['under_points'] = d['prices'][1]['points']
        row['under_price'] = d['prices'][1]['price']   
 
        prop_prices.append(row)
    
    except:
        continue


    
# Closing file
f.close()

In [28]:
mlb_propd = pd.DataFrame(player_props)
mlb_prices = pd.DataFrame(prop_prices)

In [29]:
mlb_df = pd.merge(mlb_propd, mlb_prices, how='left', on=['prop_id', 'over_id', 'under_id'])
mlb_df.dropna(inplace=True)
mlb_df['min_price'] = np.where(mlb_df['over_price']<=mlb_df['under_price'], 
                               mlb_df['over_price'], mlb_df['under_price'])

In [30]:
mlb_df['player_name'] = mlb_df['prop_name'].apply(player_name)
mlb_df['prop_type'] = mlb_df['prop_name'].apply(prop_type)
mlb_df.drop(columns='prop_name', inplace=True)
mlb_df.drop_duplicates(inplace=True)

In [31]:
mlb_final = mlb_df.merge(dataframe, how='right', on=['player_name', 'prop_type'])
mlb_final.dropna(inplace = True)
mlb_final = mlb_final.sort_values(by = 'min_price', ascending = True)
mlb_final.drop(columns=['prop_id', 'over_id', 'under_id', 'under_points'], inplace=True)
mlb_final.rename(columns={'over_points':'pinnacle_line'}, inplace=True)
mlb_final['favored'] = np.where(mlb_final['min_price']==mlb_final['over_price'], 'over', 'under')
mlb_final.sort_values(by='min_price', inplace=True)

In [32]:
mlb_final

Unnamed: 0,pinnacle_line,over_price,under_price,min_price,player_name,prop_type,UnderDog Line,favored
733,3.5,-164.0,122.0,-164.0,jameson taillon,total strikeouts,4.0,over
741,5.5,-160.0,119.0,-160.0,justin verlander,total strikeouts,6.0,over
727,6.5,104.0,-139.0,-139.0,blake snell,total strikeouts,6.0,under
720,5.5,-133.0,101.0,-133.0,aaron nola,total strikeouts,5.5,over


# Pinnacle NHL Scraping

In [33]:
# Opening JSON file
f = open(main_dir + "/Pinnacle/NHL/nhl_matchups.json")
  
# returns JSON object as 
# a dictionary
data = json.load(f)
  
# Iterating through the json
# list
player_props = []
#test = []
for i, d in enumerate(data['data']):
    try:
        if d['special']['category'].lower() == 'player props':
            data = {}
            data['prop_name'] = d['special']['description']
            data['prop_id'] = d['id']
            data['over_id'] = d['participants'][0]['id']
            data['under_id'] = d['participants'][1]['id']
            player_props.append(data)        
                
            
    except:
        continue

# Closing file
f.close()
#print(count)

In [34]:
# Opening JSON file
f = open(main_dir + "/Pinnacle/NHL/nhl_straight.json")
  
# returns JSON object as 
# a dictionary
data = json.load(f)
prop_prices = []
for d in data['data']:
    try:

        row = {}
        row['prop_id'] = d['matchupId']
    
        row['over_id'] = d['prices'][0]['participantId']
        row['over_points'] = d['prices'][0]['points']
        row['over_price'] = d['prices'][0]['price']
    
        row['under_id'] = d['prices'][1]['participantId']
        row['under_points'] = d['prices'][1]['points']
        row['under_price'] = d['prices'][1]['price']   
 
        prop_prices.append(row)
    
    except:
        continue


    
# Closing file
f.close()
#print(count)

In [35]:
nhl_propd = pd.DataFrame(player_props)
nhl_prices = pd.DataFrame(prop_prices)

In [36]:
nhl_df = pd.merge(nhl_propd, nhl_prices, how='left', on=['prop_id', 'over_id', 'under_id'])
nhl_df.dropna(inplace=True)
nhl_df['min_price'] = np.where(nhl_df['over_price']<=nhl_df['under_price'], 
                               nhl_df['over_price'], nhl_df['under_price'])

In [37]:
nhl_df['player_name'] = nhl_df['prop_name'].apply(player_name)
nhl_df['prop_type'] = nhl_df['prop_name'].apply(prop_type)

nhl_df.drop(columns='prop_name', inplace=True)
nhl_df.drop_duplicates(inplace=True)

In [38]:
nhl_final = nhl_df.merge(dataframe, how='right', on=['player_name', 'prop_type'])
nhl_final.dropna(inplace = True)
nhl_final = nhl_final.sort_values(by = 'min_price', ascending = True)
nhl_final.drop(columns=['prop_id', 'over_id', 'under_id', 'under_points'], inplace=True)
nhl_final.rename(columns={'over_points':'pinnacle_line'}, inplace=True)
nhl_final['favored'] = np.where(nhl_final['min_price']==nhl_final['over_price'], 'over', 'under')
nhl_final.sort_values(by='min_price', inplace=True)
nhl_final

Unnamed: 0,pinnacle_line,over_price,under_price,min_price,player_name,prop_type,UnderDog Line,favored
750,2.5,146.0,-202.0,-202.0,tony deangelo,shots on goal,2.0,under
768,2.5,-192.0,141.0,-192.0,mikko rantanen,shots on goal,3.0,over
749,2.5,140.0,-191.0,-191.0,ivan provorov,shots on goal,2.0,under
761,2.5,-190.0,139.0,-190.0,kyle connor,shots on goal,3.0,over
762,2.5,135.0,-184.0,-184.0,blake wheeler,shots on goal,2.0,under
767,2.5,-184.0,135.0,-184.0,valeri nichushkin,shots on goal,3.0,over
774,1.5,-184.0,135.0,-184.0,ryan o'reilly,shots on goal,2.0,over
754,2.5,-183.0,134.0,-183.0,matthew tkachuk,shots on goal,3.0,over
769,2.5,-182.0,135.0,-182.0,cale makar,shots on goal,3.0,over
755,2.5,-181.0,134.0,-181.0,sam bennett,shots on goal,3.0,over


# All Sports Dataframe

In [39]:
full_df = nhl_final.append(nfl_final).append(mlb_final)
pd.set_option('display.max_rows',None)
full_df.sort_values(by = 'min_price', ascending = True)

Unnamed: 0,pinnacle_line,over_price,under_price,min_price,player_name,prop_type,UnderDog Line,favored
750,2.5,146.0,-202.0,-202.0,tony deangelo,shots on goal,2.0,under
768,2.5,-192.0,141.0,-192.0,mikko rantanen,shots on goal,3.0,over
749,2.5,140.0,-191.0,-191.0,ivan provorov,shots on goal,2.0,under
761,2.5,-190.0,139.0,-190.0,kyle connor,shots on goal,3.0,over
762,2.5,135.0,-184.0,-184.0,blake wheeler,shots on goal,2.0,under
767,2.5,-184.0,135.0,-184.0,valeri nichushkin,shots on goal,3.0,over
774,1.5,-184.0,135.0,-184.0,ryan o'reilly,shots on goal,2.0,over
754,2.5,-183.0,134.0,-183.0,matthew tkachuk,shots on goal,3.0,over
769,2.5,-182.0,135.0,-182.0,cale makar,shots on goal,3.0,over
755,2.5,-181.0,134.0,-181.0,sam bennett,shots on goal,3.0,over


# Dropping Buffered Lines

In [None]:
#equal_lines_df = full_df.loc[full_df['pinnacle_line']!=full_df['UnderDog Line']]
#equal_lines_df.sort_values(by = 'min_price', ascending = True)