In [1]:
import json
import requests
from bs4 import BeautifulSoup
import time
import csv
import os
import pandas as pd

In [2]:
def get_box_score_stats(file):
    soup = BeautifulSoup(open(os.getcwd()+("/NBARefFiles/"+file)), 'html.parser')
    gid = str(file[:-5])
    year = int(file[:4])
    month = int(file[4:6])
    day = int(file[6:8])
    soup_text = str(soup)
    df = pd.DataFrame()
    for team in team_handles_dict.values():
        label = "box-"+team+"-game-basic"
        if label in soup_text:
            table = soup.find( "table", {"id":label})
            df1 = pd.read_html(str(table), header = 1)[0]
            df1.insert(0,"GID",gid)
            df1.insert(1,"Year",year)
            df1.insert(2,"Month",month)
            df1.insert(3,"Day",day)
            df1.insert(4,"Team",team)
            df1.insert(1,"Date",str(year)+'-'+str(month)+'-'+str(day))
            df1= df1.drop(df1.index[[5]])
            df1 = df1.drop(['FG%','3P%','FT%'],axis = 1)
            df1= df1[~df1.FG.str.contains("Did")]
            df1 = df1[~df1.Starters.str.contains("Team")]
            df1 = df1[~df1.FG.str.contains("Not")]
            df1 = df1[~df1.FG.str.contains("Player")]
            mp_list = []
            for index, row in df1.iterrows():
                if(len(str(row['MP']))) > 5:
                    x=str(row['MP'])[:-3]
                else:
                    x = row['MP']
                mp_list.append(x)
            df1['MP']=mp_list
            df = pd.concat([df, df1], ignore_index=True)
    df = df.rename(index=str, columns={"Starters": "Name"})
    return df

In [3]:
def get_game_id_for_team(team_handle, year):
    season_page = requests.get(f'https://www.basketball-reference.com/teams/{team_handle}/{year}_games.html')
    season_page = BeautifulSoup(season_page.text, 'html.parser')
    games = {}
    for row in season_page.find('table', {'id': 'games'}).tbody.find_all('td'):
        if row['data-stat'] == 'date_game':
            game_date = row['csk'].replace('-', '')
        if row['data-stat'] == 'game_location':
            away = row.text
            if not away:
                games[game_date] = 'home'
            else:
                games[game_date] = 'away'
    gid_list = [x + '0' + team_handle for x in games.keys() if games[x] == 'home']
    return gid_list

In [4]:
def get_dates_list(dates):
    date_list = []
    for date in dates:
        if "/" in date:
            date_split = date.split("/")
        if "-" in date:
            date_split = date.split("-")
        year = date_split[0]
        if len(date_split[1]) > 1:
            month = date_split[1]
        else:
            month = '0'+date_split[1]
        if len(date_split[2]) > 1:
            day = date_split[2]
        else:
            day = '0'+date_split[2]
        date = year+month+day
        if date not in date_list:
            date_list.append(date)
    return date_list

In [5]:
def saveDKFiles(date_list):
    for date in date_list:
        year = str((date)[0:4])
        month = str((date)[4:6])
        day = str((date)[6:8])
        url = 'http://rotoguru1.com/cgi-bin/hyday.pl?mon='+month+'&day='+day+'&year='+year+'&game=dk'
        page = requests.get(url)
        file = open(os.getcwd()+("/NBADKSaves/"+date+'.html'), "w")
        file.write(page.text)

In [6]:
def savePlayerProjections(date_list):
    for date in date_list:
        year = str((date)[0:4])
        month = str((date)[4:6])
        day = str((date)[6:8])
        url = 'http://www.dailyfantasyfuel.com/nba/projections/draftkings/'+year+'-'+month+'-'+day+'/'
        page = requests.get(url)
        file = open(os.getcwd()+("/ProjectionSaves/"+date+'.html'), "w")
        file.write(page.text)

In [7]:
def saveOddsFiles(dates):
    dates_old=[]
    for date in dates:
        if date not in dates_old:
            if "/" in date:
                date_split = date.split('/')
            if "-" in date:
                date_split = date.split('-')
            year = date_split[0]
            if len(date_split[1]) > 1:
                month = date_split[1]
            else:
                month = '0'+date_split[1]
            if len(date_split[2]) > 1:
                day = date_split[2]
            else:
                day = '0'+date_split[2]
            date_formatted = year+'-'+month+'-'+day
            date_plain = year+month+day
            pagelink = "https://www.sportsbookreview.com/betting-odds/nba-basketball/merged/?date="+date_formatted
            page = requests.get(pagelink)
            file = open(os.getcwd()+("/NBAOddsFiles/"+date_plain+'.html'), "w")
            file.write(page.text)
            dates_old.append(date)

In [8]:
def abbreviate(word_list, dictionary):
    new_list = []
    for city in word_list:
        city = dictionary.get(city)
        new_list.append(city)
    return (new_list)

In [9]:
def get_dk_info(date):
    names = []
    scores = []
    salaries = []
    teams = []
    gids = []
    opps = []
    positions = []
    df = pd.DataFrame()
    soup = BeautifulSoup(open(os.path.expanduser("~/Desktop/NBAProject/NBADKSaves/"+date+".html")), 'html.parser')
    orig_text = str(soup)
    text = orig_text.split('"Guards"',1)[1]
    while 'target="_blank">' in text and 'BOTTOM BOILERPLATE' in text:
        #append position
        try:
            pos = text.split('--><tr><td>')[1].split('</td><td><a')[0]
        except:
            pass
        positions.append(pos)
        #append name
        name = text.split('target="_blank">')[1].split('</a>')[0]
        text = text.split(name)[1]
        if 'border="no"' not in name:
            name = name.split(', ')[1]+' '+name.split(', ')[0]
            #fixing nicknames
            if name == 'Maurice Williams':
                name = 'Mo Williams'
            if name == 'Louis Williams':
                name = 'Lou Williams'
            if name == 'Amare Stoudemire':
                name = "Amar'e Stoudemire"
            if name == 'Tim Hardaway Jr.':
                name = 'Tim Hardaway'
            if name == 'Perry Jones III':
                name = 'Perry Jones'
            if name == 'Wes Matthews':
                name = 'Wesley Matthews'
            if name == 'C.J. McCollum':
                name = 'CJ McCollum'
            if name == 'Glen Rice Jr.':
                name = "Glen Rice"
            if name == 'Jose Barea':
                name = 'J.J. Barea'
            if name == 'Dennis Smith Jr.':
                name = 'Dennis Smith'
            if name == 'Guillermo Hernangomez':
                name = 'Willy Hernangomez'
            if name == 'Wes Iwundu.':
                name = "Wesley Iwundu"  
            names.append(name)
        #append score
        try:
            score = float(text.split('align="center">')[1].split('</td><td',1)[0])
        except:
            pass
        scores.append(score)
        text = text[35:]
        #append salary
        try:
            salary = text.split('$',1)[1].split('</td><td',1)[0]
            salary = salary.split(',')[0]+salary.split(',')[1]
        except:
            salary = 'N/A'
        salaries.append(salary)
        #append team
        try:
            team = text.split('align="center">',1)[1].split('</td>')[0].upper()
            if team == 'CHA':
                team = 'CHO'
            if team =='NOR':
                team = 'NOP'
            if team =='BKN':
                team = 'BRK'
            teams.append(team)
        except:
            pass
        text = text[48:]
        #append GID
        try:
            opp = text.split('<td>',1)[1].split('</td>')[0]
        except:
            gid = 0
        if '@' in opp:
            opp = str(opp)[2:].upper()
            if opp == 'CHA':
                opp = 'CHO'
            if opp =='NOR':
                opp = 'NOP'
            if opp =='BKN':
                opp = 'BRK'
            opps.append(opp)
            gid = str(date)+'0'+opp
        else:
            gid = str(date)+'0'+str(team)
            opp = str(opp)[2:].upper()
            opps.append(opp)
        gids.append(gid)
    positions = positions[:-1]
    opps = opps[:-1]
    gids = gids[:-1]
    scores = scores[:-1]
    salaries = salaries[:-1]
    df['GID'] = gids
    df['Position'] = positions
    df['Name'] = names
    df['Score'] = scores
    df["Salary"] = salaries
    df['Team'] = teams
    df['Opponent'] = opps
    return df

In [10]:
def get_betting_lines(date):
    df = pd.DataFrame()
    abbrvreviated=[]
    teams = []
    gid_list = []
    odds_list = []
    spread_list = []
    over_under_list = []
    betting_page = BeautifulSoup(open(os.getcwd()+("/NBAOddsFiles/"+date+".html")), 'html.parser')
    orig_text = str(betting_page)
    text = str(betting_page)
    #list of team names abbreviated
    while '_3O1Gx">' in text:
        abbr = text.split('_3O1Gx">')[1].split('</span></a><span')[0]
        text = text.split(abbr)[1]
        teams.append(abbr)
    abbreviated = abbreviate(teams, city_handles_dict)
    df['Team'] = abbreviated
    #GID list
    for name in abbreviated[1::2]:
        gid = str(date)+'0'+str(name)
        gid_list.append(gid)
        gid_list.append(gid)
    df['GID'] = gid_list
    #Lines List
    text = orig_text
    while '_3Nv_7 opener">' in text:
        odds = text.split('_3Nv_7 opener">')[1].split('</span>')[0]
        if '-<!-- -->' in odds:
            odds = odds.split('-<!-- -->')[1]
        text = text.split('>'+odds+'</span><span',1)[1]
        if len(odds)>5:
            odds = odds[9:]
        if odds == 'PK':
            odds = 0.0
        else:
            odds = float(odds.replace('½','.5'))
        odds_list.append(odds)
    #get list of spreads for each team
    i = 1
    for odds in odds_list:
        if odds > 150:
            over_under_list.append(odds)
            over_under_list.append(odds)
            i = i+1
            continue
        if odds<20:
            if i%2 == 1:
                spread_list.append(odds*-1)
                spread_list.append(odds)
                i = i+1
            else:
                spread_list.append(odds)
                spread_list.append(odds*-1)
                i = i+1
    df['Spread'] = spread_list
    df['Over/Under'] = over_under_list
    return df

In [1]:
def get_proj_info(date):
    df = pd.DataFrame()
    soup = BeautifulSoup(open(os.getcwd()+("/ProjectionSaves/"+str(date)+".html")), 'html.parser')
    names = []
    rest = []
    teams = []
    gids = []
    opps = []
    positions = []
    dvp = []
    projections = []
    df = pd.DataFrame()
    orig_text = str(soup)
    text = orig_text.split('visible-block">PROJECTED</span> <i class',1)[1]
    i=1
    while 'data-name="' in text:
        #append name
        name = text.split('data-name="')[1].split('" data-opp')[0]
        if name == 'Maurice Williams':
            name = 'Mo Williams'
        if name == 'Louis Williams':
            name = 'Lou Williams'
        if name == 'Amare Stoudemire':
            name = "Amar'e Stoudemire"
        if name == 'Tim Hardaway Jr.':
            name = 'Tim Hardaway'
        if name == 'Perry Jones III':
            name = 'Perry Jones'
        if name == 'Wes Matthews':
            name = 'Wesley Matthews'
        if name == 'C.J. McCollum':
            name = 'CJ McCollum'
        if name == 'Glen Rice Jr.':
            name = "Glen Rice"
        if name == 'Jose Barea':
            name = 'J.J. Barea'
        if name == 'Dennis Smith Jr.':
            name = 'Dennis Smith'
        if name == 'Guillermo Hernangomez':
            name = 'Willy Hernangomez'
        if name == 'Wes Iwundu.':
            name = "Wesley Iwundu"   
#         print(name)
        names.append(name)       
        #append position
        try:
            pos = text.split('data-position="')[1].split('" data-pp_line')[0]
        except:
            pass
        positions.append(pos)
        #append rest
        rest_days = 3
        try:
            rest_days = int(text.split('data-days_rest="')[1].split('" data-first_name',1)[0])
        except:
            pass
        rest.append(rest_days)
         #append dvp
        L5_dvp = 999
        try:
            L5_dvp = float(text.split('data-opp_rank="')[1].split('" data-play_time_seconds',1)[0])
        except:
            pass
        dvp.append(L5_dvp)
        #append team
        team = '999'
        try:
            team = text.split('" data-team="',1)[1].split('" data-value="')[0].upper()
            if team == 'SA':
                team = 'SAS'
            if team =='NO':
                team = 'NOP'
            if team =='NY':
                team = 'NYK'
            if team =='NYC':
                team = 'NYK'
            if team =='GS':
                team = 'GSW'
            if team =='CHA':
                team = 'CHO'
            if team =='BKN':
                team = 'BRK'
            teams.append(team)
        except:
            pass
        #append projection
        projection = 999
        try:
            projection = float(text.split('data-ppg_projected="')[1].split('" data-projected_team_score',1)[0])
        except:
            pass
        projections.append(projection)
        #append GID
        try:
            opp = text.split('data-opp="',1)[1].split('" data-opp_rank="')[0]
            home_away = text.split('style="left:-0.5em">',1)[1].split('</div><span>')[0]
        except:
            gid = 0
        if '@' in home_away:
            if opp == 'SA':
                opp = 'SAS'
            if opp =='NO':
                opp = 'NOP'
            if opp =='NY':
                opp = 'NYK'
            if opp =='GS':
                opp = 'GSW'
            if opp =='CHA':
                opp = 'CHO'
            if opp =='BKN':
                opp = 'BRK'
            opps.append(opp)
            gid = str(date)+'0'+opp
        else:
            gid = str(date)+'0'+str(team)
            opp = str(opp)[2:].upper()
            opps.append(opp)
        gids.append(gid)
        i = i+1
        try:
            text = orig_text.split('row-pad-xs-1 projections-listing')[i]
        except:
            break
    dvp = dvp
    positions = positions
    rest = rest
    gids = gids
    projections = projections
    df['GID'] = gids
    df['DK Positions'] = positions
    df['Name'] = names
    df['L5 DvP'] = dvp
    df["Rest"] = rest
    df['Projected Points'] = projections
    return df

In [12]:
team_handles_dict = {'Toronto Raptors': 'TOR',
                     'Boston Celtics': 'BOS',
                     'Philadelphia 76ers': 'PHI',
                     'Cleveland Cavaliers': 'CLE',
                     'Indiana Pacers': 'IND',
                     'Miami Heat': 'MIA',
                     'Milwaukee Bucks': 'MIL',
                     'Washington Wizards': 'WAS',
                     'Detroit Pistons': 'DET',
                     'Charlotte Hornets': 'CHO',
                     'Charlotte Bobcats': 'CHA',
                     'New York Knicks': 'NYK',
                     'Brooklyn Nets': 'BRK',
                     'Chicago Bulls': 'CHI',
                     'Orlando Magic': 'ORL',
                     'Atlanta Hawks': 'ATL',
                     'Houston Rockets': 'HOU',
                     'Golden State Warriors': 'GSW',
                     'Portland Trail Blazers': 'POR',
                     'Oklahoma City Thunder': 'OKC',
                     'Utah Jazz': 'UTA',
                     'New Orleans Pelicans': 'NOP',
                     'San Antonio Spurs': 'SAS',
                     'Minnesota Timberwolves': 'MIN',
                     'Denver Nuggets': 'DEN',
                     'Los Angeles Clippers': 'LAC',
                     'Los Angeles Lakers': 'LAL',
                     'Sacramento Kings': 'SAC',
                     'Dallas Mavericks': 'DAL',
                     'Memphis Grizzlies': 'MEM',
                     'Phoenix Suns': 'PHO'}

city_handles_dict = {'Toronto': 'TOR',
                     'Boston': 'BOS',
                     'Philadelphia': 'PHI',
                     'Cleveland': 'CLE',
                     'Indiana': 'IND',
                     'Miami': 'MIA',
                     'Milwaukee': 'MIL',
                     'Washington': 'WAS',
                     'Detroit': 'DET',
                     'Charlotte': 'CHO',
                     'Charlotte': 'CHO',
                     'New York': 'NYK',
                     'Brooklyn': 'BRK',
                     'Chicago': 'CHI',
                     'Orlando': 'ORL',
                     'Atlanta': 'ATL',
                     'Houston': 'HOU',
                     'Golden State': 'GSW',
                     'Portland': 'POR',
                     'Oklahoma City': 'OKC',
                     'Utah': 'UTA',
                     'New Orleans': 'NOP',
                     'San Antonio': 'SAS',
                     'Minnesota': 'MIN',
                     'Denver': 'DEN',
                     'L.A. Clippers': 'LAC',
                     'L.A. Lakers': 'LAL',
                     'Sacramento': 'SAC',
                     'Dallas': 'DAL',
                     'Memphis': 'MEM',
                     'Phoenix': 'PHO'}