In [1]:
import urllib
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow,Flow
from google.auth.transport.requests import Request
import os
import pickle

In [2]:
# dates
year = datetime.today().strftime("%Y")
start = datetime(2020,9,8)
week = str(round(((datetime.today() - start).days+3)/7))

# add option to pass week
# url parser
def crawler(url,week_num=week):
    html = urllib.request.urlopen(url).read()
    soup = BeautifulSoup(html, 'html.parser')
    return soup

# pull soup from fivethirtyeight and vegasinsider
fte_url = 'https://projects.fivethirtyeight.com/'+year+'-nfl-predictions/games/'
fte_soup = crawler(fte_url)
vegas_url = 'https://www.vegasinsider.com/nfl/matchups/matchups.cfm/week/'+week+'/season/'+year
vegas_soup = crawler(vegas_url)

In [3]:
# function to match team names w/city names
def team_match(x):
    if 'jets' in x.lower():
        return 'Jets'
    if 'indiana' in x.lower():
        return 'Colts'
    if 'denver'in x.lower():
        return 'Broncos'
    if 'chica'in x.lower():
        return 'Bears'
    if 'orlean'in x.lower():
        return 'Saints'
    if 'arizon' in x.lower():
        return 'Cardinals'
    if 'carolin' in x.lower():
        return 'Panthers'
    if 'detroit' in x.lower():
        return 'Lions'
    if 'cinci' in x.lower():
        return 'Bengals'
    if 'jackson'in x.lower():
        return 'Jaguars'
    if 'dallas' in x.lower():
        return 'Cowboys'
    if 'cleve' in x.lower():
        return 'Browns'
    if 'houst'in x.lower():
        return 'Texans'
    if 'minne' in x.lower():
        return 'Vikings'
    if 'seat' in x.lower():
        return 'Seahwaks'
    if 'miami' in x.lower():
        return 'Dolphins'
    if 'tampa' in x.lower():
        return 'Buccaneers'
    if 'charg' in x.lower():
        return 'Chargers'
    if 'pitt' in x.lower():
        return 'Steelers'
    if 'tenn' in x.lower():
        return 'Titans'
    if 'balti' in x.lower():
        return 'Ravens'
    if 'washing' in x.lower():
        return 'Washington'
    if 'rams' in x.lower():
        return 'Rams'
    if 'giant' in x.lower():
        return 'Giants'
    if 'kansas' in x.lower():
        return 'Chiefs'
    if 'england' in x.lower():
        return 'Patriots'
    if 'buff' in x.lower():
        return "Bills"
    if 'vegas' in x.lower():
        return 'Raiders'
    if 'franc' in x.lower():
        return '49ers'
    if 'phila' in x.lower():
        return 'Eagles'
    if 'green' in x.lower():
        return 'Packers'
    if 'atlan' in x.lower():
        return 'Falcons'
    else:
        return 'ERROR'
    
def pickem(x):
    if x == 0:
        return 'PK'
    else:
        x = str(x)
        return x

In [5]:
# generate elo spreads
elo_favorites = []
elo_underdogs = []
elo_spreads = []
current_week_tag_list = fte_soup.find("div", class_="days").find_all("tr")

for tag in current_week_tag_list:
    if tag.contents[0].get('class') != ['th', 'time']:
        if len(tag.contents[2].text) > 1:
            elo_favorites.append(tag.contents[1].text.strip())   # favorite
            try:
                elo_spreads.append(float(tag.contents[2].text[2:]))    # favorite spread
            except:
                elo_spreads.append(float(0))        # PK
        else:
            elo_underdogs.append(tag.contents[1].text.strip())    # dog
    else:
        pass

elo_tuple = list(zip(elo_favorites, elo_underdogs, elo_spreads))


# generate vegas spreads
vegas_favorites = []
vegas_underdogs = []
vegas_spreads = []

current_week_game_tags = vegas_soup.find_all("div",class_='SLTables1')[1].find_all("div",class_='SLTables1')

for game_tag in current_week_game_tags:
    for row_tag in game_tag.find_all("tr")[4:6]:
        if '-' in row_tag.find_all('td')[4].text:
            vegas_favorites.append(team_match(row_tag.find('a').text))        # favorite
            vegas_spreads.append(float(row_tag.find_all('td')[4].text[1:]))   # favorite spread
        elif 'PK' in row_tag.find_all('td')[4].text:
            vegas_favorites.append(team_match(row_tag.find('a').text))
            vegas_spreads.append(float(0))                                   # PK
        else:
            vegas_underdogs.append(team_match(row_tag.find('a').text))       # dog

vegas_tuple = list(zip(vegas_favorites,vegas_underdogs,vegas_spreads))


# compare spreads and select picks
teams_to_bet = []
elo_to_vegas_abs_diffs = []

for elo_tup in elo_tuple:
    for vegas_tup in vegas_tuple:
        if elo_tup[0] not in vegas_tup:
            pass
        elif elo_tup[0] == vegas_tup[0] and elo_tup[1] == vegas_tup[1]:
            elo_to_vegas_diff = elo_tup[2] - vegas_tup[2]
            if elo_to_vegas_diff <= 0:
                teams_to_bet.append(elo_tup[1]+' +'+pickem(vegas_tup[2])+' vs '+ vegas_tup[0])   # bet dog
            else:
                teams_to_bet.append(elo_tup[0]+' -'+pickem(vegas_tup[2])+' vs '+ vegas_tup[1])      # bet favorite
            elo_to_vegas_abs_diffs.append(abs(elo_to_vegas_diff))
        elif elo_tup[0] == vegas_tup[1] and elo_tup[1] == vegas_tup[0]:
            elo_to_vegas_diff = elo_tup[2] + vegas_tup[2]
            teams_to_bet.append(elo_tup[0]+' '+pickem(vegas_tup[2])+' vs '+ vegas_tup[0])         # straddle spread
            elo_to_vegas_abs_diffs.append(abs(elo_to_vegas_diff))

plays = list(zip(teams_to_bet,elo_to_vegas_abs_diffs))
plays.sort(key=lambda tup: tup[1], reverse=True)


for i in range(len(plays)):
    if plays[i][1] > 1.5:
        if i==0 and plays[0][1]>plays[1][1]:
            print('{} is the top play with a {}pt difference'.format(plays[i][0],plays[i][1]), sep='\n')
        else:
            print('{} is the number {} play with a {}pt difference'.format(plays[i][0],
                                                                        i+1,plays[i][1]), sep='\n')

49ers +6.0 vs Packers is the top play with a 5.5pt difference
Bears +6.5 vs Titans is the number 2 play with a 3.5pt difference
Chiefs -10.5 vs Panthers is the number 3 play with a 3.0pt difference
Broncos +4.0 vs Falcons is the number 4 play with a 3.0pt difference
Cardinals -4.5 vs Dolphins is the number 5 play with a 2.5pt difference
Jets +8.5 vs Patriots is the number 6 play with a 2.5pt difference


In [11]:
# record data
df = pd.DataFrame()
for i in plays:
    if i[1] > 1.5:
        row= {}
        row['Team_to_bet'] = i[0].split()[0]
        row['Vegas_spread'] = i[0].split()[1]
        row['Opponent'] = i[0].split()[3]
        row['Elo_diff'] = i[1]
        df = df.append(row, ignore_index=True)

# add week column for tracking
df['Week'] = week

df[['Team_to_bet','Opponent','Vegas_spread','Elo_diff','Week']]

Unnamed: 0,Team_to_bet,Opponent,Vegas_spread,Elo_diff,Week
0,49ers,Packers,6.0,5.5,9
1,Bears,Titans,6.5,3.5,9
2,Chiefs,Panthers,-10.5,3.0,9
3,Broncos,Falcons,4.0,3.0,9
4,Cardinals,Dolphins,-4.5,2.5,9
5,Jets,Patriots,8.5,2.5,9


In [8]:
# append data to g-sheet
# sheets API
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
sheet_id='1d2G32M8mn2Va-JaVAwV1JfwXjhJMPJbb-1UYOXjKTC4'
data_range='Sheet1!A1:E'

# setup credentials
creds = None
if os.path.exists('token.pickle'):
    with open('token.pickle', 'rb') as token:
        creds = pickle.load(token)
if not creds or not creds.valid:
    if creds and creds.expired and creds.refresh_token:
        creds.refresh(Request())
    else:
        flow = InstalledAppFlow.from_client_secrets_file('/Users/ccaspar/downloads/credentials.json', SCOPES)
        creds = flow.run_local_server(port=0)
    with open('token.pickle', 'wb') as token:
        pickle.dump(creds, token)
        
service = build('sheets', 'v4', credentials=creds)

# need to create valuerange object
body = {'values': df[['Team_to_bet','Opponent','Vegas_spread','Elo_diff','Week']].values.tolist()}


response_date = service.spreadsheets().values().append(
    spreadsheetId=sheet_id,
    range=data_range,
    valueInputOption='RAW',
    insertDataOption='INSERT_ROWS',
    body=body).execute()

In [None]:
# Need to add results data and O/U data
# Maybe add change log
# Maybe only add results if it's a new week

In [None]:
results_url = 'https://www.vegasinsider.com/nfl/scoreboard/scores.cfm/week/'+week+'/season/'+year
crawler(results_url)