In [84]:
import pickle
import re
import requests, bs4
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import os
pd.options.mode.chained_assignment = None  # default='warn'
path = '/Users/ahelgeso/Documents/GitHub/bball-master/Stat Sheets'
os.chdir(path)

In [85]:
names = ['2015stats','2016stats','2017stats','2018stats','2019stats']

In [86]:
def scrape_odds(db):
    
    team_abbrevs = {
    'Cleveland': 'CLE',
    'Boston':'BOS',
    'Washington':'WAS',
    'Charlotte':'CHO',
    'Minnesota':'MIN',
    'Dallas':'DAL',
    'Milwaukee':'MIL',
    'Philadelphia':'PHI',
    'Phoenix':'PHO',
    'L.A. Lakers':'LAL',
    'Utah':'UTA',
    'Sacramento':'SAC',
    'New York':'NYK',
    'New Orleans':'NOP',
    'Detroit':'DET',
    'Atlanta':'ATL',
    'Chicago':'CHI',
    'Miami':'MIA',
    'Memphis':'MEM',
    'Golden State':'GSW',
    'Denver':'DEN',
    'Brooklyn':'BRK',
    'L.A. Clippers':'LAC',
    'Portland':'POR',
    'Indiana':'IND',
    'San Antonio':'SAS',
    'Houston':'HOU',
    'Oklahoma City':'OKC',
    'Toronto':'TOR',
    'Orlando':'ORL'
    }
    
    scraped_dates = []
    moneydb = pd.DataFrame()

    for date in db['Date']:

        if date not in scraped_dates:
            url = f'https://www.sportsbookreview.com/betting-odds/nba-basketball/money-line/?date={date}'
            res = requests.get(url, 'html.parser')
            res.raise_for_status()
            soup = bs4.BeautifulSoup(res.text, 'lxml')
            linesoup = soup.find('div', {'id':'bettingOddsGridContainer'})
            test_teams = linesoup.find_all('span', {'class':'_3O1Gx'})
            teams = [team_abbrevs[team.get_text()] for team in test_teams]
            test_lines = linesoup.find_all('span', {'class':'opener'})
            lines = [line.get_text() for line in test_lines if line.get_text().startswith('-') and 
                     len(line.get_text()) > 1 or line.get_text().startswith('+') and len(line.get_text()) > 1]

            if len(teams) != len(lines):
                pass
            else:
                moneylines = {'Team':teams, 'ML Odds': lines, 'Date':[date]*len(teams)}

                MLdb = pd.DataFrame.from_dict(moneylines)
                moneydb = pd.concat([moneydb, MLdb])

            scraped_dates.append(date)
    
    return moneydb.reset_index(drop = True)
    # moneydb.to_csv('MLodds.csv')

In [87]:
def scrape_spreads(db):
    
    team_abbrevs = {
    'Cleveland': 'CLE',
    'Boston':'BOS',
    'Washington':'WAS',
    'Charlotte':'CHO',
    'Minnesota':'MIN',
    'Dallas':'DAL',
    'Milwaukee':'MIL',
    'Philadelphia':'PHI',
    'Phoenix':'PHO',
    'L.A. Lakers':'LAL',
    'Utah':'UTA',
    'Sacramento':'SAC',
    'New York':'NYK',
    'New Orleans':'NOP',
    'Detroit':'DET',
    'Atlanta':'ATL',
    'Chicago':'CHI',
    'Miami':'MIA',
    'Memphis':'MEM',
    'Golden State':'GSW',
    'Denver':'DEN',
    'Brooklyn':'BRK',
    'L.A. Clippers':'LAC',
    'Portland':'POR',
    'Indiana':'IND',
    'San Antonio':'SAS',
    'Houston':'HOU',
    'Oklahoma City':'OKC',
    'Toronto':'TOR',
    'Orlando':'ORL'
    }
    
    scraped_dates = []
    spreaddb = pd.DataFrame()

    for date in db['Date']:
        if date not in scraped_dates:
            url = f'https://www.sportsbookreview.com/betting-odds/nba-basketball/pointspread/?date={date}'
            res = requests.get(url, 'html.parser')
            res.raise_for_status()
            soup = bs4.BeautifulSoup(res.text, 'lxml')
            linesoup = soup.find('div', {'id':'bettingOddsGridContainer'})
            test_teams = linesoup.find_all('span', {'class':'_3O1Gx'})
            teams = [team_abbrevs[team.get_text()] for team in test_teams]
            test_lines = linesoup.find_all('span', {'class':'_3Nv_7 opener'})
            lines = [line.get_text() for line in test_lines if line.get_text().startswith('-') or 
                     line.get_text().startswith('+') or line.get_text().startswith('P')]

            if len(teams) != len(lines):
                pass
            else:
                spreads = {'Team':teams, 'Spread': lines, 'Date':[date]*len(lines)}
                spreads = pd.DataFrame.from_dict(spreads)
                spreaddb = pd.concat([spreaddb, spreads])

            scraped_dates.append(date)
            
    return spreaddb.reset_index(drop = True)

In [88]:
for name in names:
    db = pd.read_csv(f'{name}.csv')
    moneydb = scrape_odds(db)
    spreaddb = scrape_spreads(db)
    
    for i, spread in enumerate(spreaddb['Spread']):
        spreaddb['Spread'][i] = re.sub("½", ".5", spread)
        
    spreaddb['Spread'] = spreaddb['Spread'].replace('PK',0)
    spreaddb['Spread'] = spreaddb['Spread'].apply(lambda x: float(x))
    moneydb['ML Odds'] = moneydb['ML Odds'].apply(lambda x: int(x))
    
    db['Odds'] = np.zeros(len(db['Team']))
    db['Line'] = np.zeros(len(db['Team']))
    db['Cover'] = np.zeros(len(db['Team']))
    
    for i, date in enumerate(db['Date']):

        mmask = (moneydb['Date'] == date) & (moneydb['Team'] == db['Team'][i])
        smask = (spreaddb['Date'] == date) & (spreaddb['Team'] == db['Team'][i])

        if mmask.any():
            db['Odds'][i] = moneydb[mmask]['ML Odds']
        else:
            db['Odds'][i] = np.nan

        if smask.any():
            db['Line'][i] = spreaddb[smask]['Spread']
        else:
            db['Line'][i] = np.nan
    
    for i in range(len(db['Cover'])):
        cover = db['Team Points'][i] + db['Line'][i] - db['Opp Points'][i]
        if cover > 0:
            db['Cover'][i] = 1
        elif cover < 0:
            db['Cover'][i] = -1
        else:
            db['Cover'][i] = 0

    db.to_csv(f'{name}.csv', index = False)

Date:  20141030
Date:  20141031
Date:  20141104
Date:  20141105
Date:  20141107
Date:  20141110
Date:  20141114
Date:  20141115
Date:  20141117
Date:  20141119
Date:  20141121
Date:  20141122
Date:  20141124
Date:  20141126
Date:  20141129
Date:  20141202
Date:  20141204
Date:  20141205
Date:  20141208
Date:  20141209
Date:  20141211
Date:  20141212
Date:  20141215
Date:  20141217
Date:  20141219
Date:  20141221
Date:  20141223
Date:  20141225
Date:  20141226
Date:  20141228
Date:  20141230
Date:  20141231
Date:  20150102
Date:  20150104
Date:  20150105
Date:  20150107
Date:  20150109
Date:  20150111
Date:  20150113
Date:  20150115
Date:  20150116
Date:  20150119
Date:  20150121
Date:  20150123
Date:  20150125
Date:  20150127
Date:  20150128
Date:  20150130
Date:  20150131
Date:  20150202
Date:  20150205
Date:  20150206
Date:  20150208
Date:  20150211
Date:  20150212
Date:  20150220
Date:  20150222
Date:  20150224
Date:  20150226
Date:  20150227
Date:  20150301
Date:  20150303
Date:  2

In [70]:
db['Cover'] = np.zeros(len(db['Team']))
for i in range(len(db['Cover'])):
    cover = db['Team Points'][i] + db['Line'][i] - db['Opp Points'][i]
    if cover > 0:
        db['Cover'][i] = 1
    elif cover < 0:
        db['Cover'][i] = -1
    else:
        db['Cover'][i] = 0
        

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
