In [28]:
from datetime import datetime
import csv
import requests, bs4
import re
import pandas as pd
import os


In [104]:
def get_4fac15(team):
    
    team_abbrevs = {
    'Cleveland Cavaliers': 'CLE',
    'Boston Celtics':'BOS',
    'Washington Wizards':'WAS',
    'Charlotte Hornets':'CHO',
    'Minnesota Timberwolves':'MIN',
    'Dallas Mavericks':'DAL',
    'Milwaukee Bucks':'MIL',
    'Philadelphia 76ers':'PHI',
    'Phoenix Suns':'PHO',
    'Los Angeles Lakers':'LAL',
    'Utah Jazz':'UTA',
    'Sacramento Kings':'SAC',
    'New York Knicks':'NYK',
    'New Orleans Pelicans':'NOP',
    'Detroit Pistons':'DET',
    'Atlanta Hawks':'ATL',
    'Chicago Bulls':'CHI',
    'Miami Heat':'MIA',
    'Memphis Grizzlies':'MEM',
    'Golden State Warriors':'GSW',
    'Denver Nuggets':'DEN',
    'Brooklyn Nets':'BRK',
    'Los Angeles Clippers':'LAC',
    'Portland Trail Blazers':'POR',
    'Indiana Pacers':'IND',
    'San Antonio Spurs':'SAS',
    'Houston Rockets':'HOU',
    'Oklahoma City Thunder':'OKC',
    'Toronto Raptors':'TOR',
    'Orlando Magic':'ORL'
    }
    
    # Formats webscraped dates to match bball reference URLs for later scraping
    def date_formatter(date):

        if len(str(date.month)) != 2:
            month = '0'+ str(date.month)
        else:
            month = str(date.month)
        
        if len(str(date.day)) != 2:
            day = '0' + str(date.day)
        else:
            day = str(date.day)
        
        newdate = str(date.year) + month + day
        return newdate


    dates = []
    efgs = []
    tovpct = []
    orbpct = []
    ftr = []
    pace = []
    OEff = []
    DEff = []
        
    # Get team page
    url = f'https://www.basketball-reference.com/teams/{team}/2015_games.html'
    res = requests.get(url, 'html_parser')
    res.raise_for_status()
    soup = bs4.BeautifulSoup(res.text, 'html.parser')
        
    # Get just regular season stats
    reg_season = soup.find('table',{'id':'games'})
        
    # Pull regular season game dates, generate list, turn it into datetime objects
    datesoup = reg_season.find_all(attrs = {'data-stat':'date_game'})
    gamedates = [entry.get_text() for entry in datesoup if entry.get_text() != 'Date' ]
    game_dates = [datetime.strptime(date, '%a, %b %d, %Y') for date in gamedates]
        
        
    # Find regular season game locations, translate to 1 = home, 0 = away
    souplocs = reg_season.find_all(attrs = {'data-stat':'game_location'})
    locs = [entry.get_text() for entry in souplocs if entry.get_text() == '' or entry.get_text() == '@']
    game_locs = list(map(lambda x: 1 if x =='' else 0, locs))
    
    # Find opponent team names
    oppts = reg_season.find_all(attrs = {'data-stat':'opp_name'})
    opps = [entry.get_text() for entry in oppts if entry.get_text() != "Opponent"]
    opp_abbrev = [team_abbrevs[opp] for opp in opps]
        
    # Get Results
    soupresults = reg_season.find_all(attrs = {'data-stat':'game_result'})
    results = [entry.get_text() for entry in soupresults if entry.get_text() == 'W' or entry.get_text() == 'L']
    rslts = list(map(lambda x: 1 if x =='W' else 0, results))

    souppoints = reg_season.find_all(attrs = {'data-stat':'pts'})
    soupopp = reg_season.find_all(attrs = {'data-stat':'opp_pts'})

    teampoints = [entry.get_text() for entry in souppoints if not entry.get_text().startswith('T')]
    opppoints = [entry.get_text() for entry in soupopp if not entry.get_text().startswith('O')]

    teampoints = list(map(lambda x: int(x), teampoints))
    opppoints = list(map(lambda x: int(x), opppoints))
    
    # Get game stats
    for i, gamedate in enumerate(game_dates):
            
        date = date_formatter(gamedate)
        dates.append(date)
            
        if game_locs[i] == 1:
            url = f'https://www.basketball-reference.com/boxscores/{date}0{team}.html'
            res = requests.get(url)
            res.raise_for_status()
            soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
            fourfac = soup.find('div',{'id':'div_four_factors'})
            efgs.append(fourfac.find_all('td',{'data-stat':'efg_pct'})[1].get_text())
            tovpct.append(fourfac.find_all('td',{'data-stat':'tov_pct'})[1].get_text())
            orbpct.append(fourfac.find_all('td',{'data-stat':'orb_pct'})[1].get_text())
            ftr.append(fourfac.find_all('td',{'data-stat':'ft_rate'})[1].get_text())
            pace.append(fourfac.find_all('td',{'data-stat':'pace'})[1].get_text())
            OEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[1].get_text())
            DEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[0].get_text())
        else:
            opp = team_abbrevs[opps[i]]
            url = f'https://www.basketball-reference.com/boxscores/{date}0{opp}.html'
            res = requests.get(url)
            res.raise_for_status()
            soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
            fourfac = soup.find('div',{'id':'div_four_factors'})
            efgs.append(fourfac.find_all('td',{'data-stat':'efg_pct'})[0].get_text())
            tovpct.append(fourfac.find_all('td',{'data-stat':'tov_pct'})[0].get_text())
            orbpct.append(fourfac.find_all('td',{'data-stat':'orb_pct'})[0].get_text())
            ftr.append(fourfac.find_all('td',{'data-stat':'ft_rate'})[0].get_text())
            pace.append(fourfac.find_all('td',{'data-stat':'pace'})[0].get_text())
            OEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[0].get_text())
            DEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[1].get_text())

#         print("Date: " + date + "Location: " + str(game_locs[i]) + " Team Score " + 
#               " Opponent: " + team_abbrevs[opps[i]] + " Result: " + 
#               str(rslts[i]) + " EFG%: " + efgs[i] + " TOV%: " + tovpct[i] + 
#               " ORB%: " + orbpct[i] + " FTR: " + ftr[i] + " Pace: " + pace[i])
    
    teamlist= [team] * 82
    data = {'Team': teamlist, "Location":game_locs, "Game Number":list(range(1,83)), "Team Points": teampoints,
            "Opp Points": opppoints, "Result":rslts, "Date": dates, "Opponent": opp_abbrev, 
            "EFG%": efgs, "TOV%" : tovpct, "ORB%":orbpct, "FTR":ftr, "Pace": pace, "OEff": OEff, "DEff":DEff}
    df = pd.DataFrame.from_dict(data)
    df['EFG%'] = df['EFG%'].apply(lambda x: float(x))
    df['TOV%'] = df['TOV%'].apply(lambda x: float(x))
    df['ORB%'] = df['ORB%'].apply(lambda x: float(x))
    df['FTR'] = df['FTR'].apply(lambda x: float(x))
    df['Pace'] = df['Pace'].apply(lambda x: float(x))
    df['OEff'] = df['OEff'].apply(lambda x: float(x))
    df['DEff'] = df['DEff'].apply(lambda x: float(x))
    df['TOV%'] = df['TOV%']/100.0
    df['ORB%'] = df['ORB%']/100.0
    
    return df

In [33]:
url = f'https://www.basketball-reference.com/boxscores/pbp/201410290IND.html'
res = requests.get(url)
res.raise_for_status()
soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
pbp = soup.find('table',{'id':'pbp'})
PHI = pd.read_html(str(pbp), flavor='bs4')[0]


In [35]:
PHI.to_csv('PHI_pbp_test.csv',index=False)

In [36]:
PHI

Unnamed: 0_level_0,1st Q,1st Q,1st Q,1st Q,1st Q,1st Q
Unnamed: 0_level_1,Time,Philadelphia,Unnamed: 2_level_1,Score,Unnamed: 4_level_1,Indiana
0,12:00.0,Start of 1st quarter,Start of 1st quarter,Start of 1st quarter,Start of 1st quarter,Start of 1st quarter
1,12:00.0,Jump ball: R. Hibbert vs. N. Noel (L. Scola ga...,Jump ball: R. Hibbert vs. N. Noel (L. Scola ga...,Jump ball: R. Hibbert vs. N. Noel (L. Scola ga...,Jump ball: R. Hibbert vs. N. Noel (L. Scola ga...,Jump ball: R. Hibbert vs. N. Noel (L. Scola ga...
2,11:43.0,,,0-0,,Turnover by D. Sloan (bad pass; steal by T. Wr...
3,11:42.0,Personal take foul by L. Scola (drawn by T. Wr...,,0-0,,
4,11:30.0,H. Thompson misses 3-pt jump shot from 26 ft,,0-0,,
5,11:28.0,,,0-0,,Defensive rebound by R. Hibbert
6,11:17.0,,,0-0,,C. Miles misses 2-pt jump shot from 21 ft
7,11:16.0,Defensive rebound by N. Noel,,0-0,,
8,11:10.0,T. Wroten makes 2-pt layup at rim,+2,2-0,,
9,10:53.0,,,2-2,+2,L. Scola makes 2-pt jump shot from 17 ft (assi...


In [13]:
PHI.drop('Unnamed: 0_level_0', level = 0,axis = 1)['Basic Box Score Stats']

Unnamed: 0,MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-
0,35:10,9,21,.429,1,4,.250,3,8,.375,1,7,8,7,3,0,1,2,22,+8
1,34:53,2,11,.182,0,0,,2,6,.333,4,6,10,0,0,3,2,4,6,+2
2,32:12,5,10,.500,1,4,.250,1,2,.500,0,1,1,1,1,0,2,1,12,+1
3,28:30,2,5,.400,1,2,.500,0,0,,1,2,3,2,4,0,0,5,5,0
4,23:29,4,9,.444,0,0,,2,2,1.000,1,3,4,0,0,0,1,4,10,-16
5,MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-
6,23:56,5,11,.455,3,6,.500,5,6,.833,1,2,3,5,0,0,0,4,18,-16
7,19:42,1,5,.200,0,1,.000,0,2,.000,1,1,2,1,1,1,1,1,2,-15
8,17:58,5,11,.455,0,2,.000,2,2,1.000,4,1,5,1,1,0,1,3,12,+1
9,16:15,1,2,.500,0,0,,2,2,1.000,2,3,5,0,0,1,2,3,4,-12


In [35]:
PHI['Team']='PHI'

In [105]:
def get_4fac16(team):
    
    team_abbrevs = {
    'Cleveland Cavaliers': 'CLE',
    'Boston Celtics':'BOS',
    'Washington Wizards':'WAS',
    'Charlotte Hornets':'CHO',
    'Minnesota Timberwolves':'MIN',
    'Dallas Mavericks':'DAL',
    'Milwaukee Bucks':'MIL',
    'Philadelphia 76ers':'PHI',
    'Phoenix Suns':'PHO',
    'Los Angeles Lakers':'LAL',
    'Utah Jazz':'UTA',
    'Sacramento Kings':'SAC',
    'New York Knicks':'NYK',
    'New Orleans Pelicans':'NOP',
    'Detroit Pistons':'DET',
    'Atlanta Hawks':'ATL',
    'Chicago Bulls':'CHI',
    'Miami Heat':'MIA',
    'Memphis Grizzlies':'MEM',
    'Golden State Warriors':'GSW',
    'Denver Nuggets':'DEN',
    'Brooklyn Nets':'BRK',
    'Los Angeles Clippers':'LAC',
    'Portland Trail Blazers':'POR',
    'Indiana Pacers':'IND',
    'San Antonio Spurs':'SAS',
    'Houston Rockets':'HOU',
    'Oklahoma City Thunder':'OKC',
    'Toronto Raptors':'TOR',
    'Orlando Magic':'ORL'
    }
    
    # Formats webscraped dates to match bball reference URLs for later scraping
    def date_formatter(date):

        if len(str(date.month)) != 2:
            month = '0'+ str(date.month)
        else:
            month = str(date.month)
        
        if len(str(date.day)) != 2:
            day = '0' + str(date.day)
        else:
            day = str(date.day)
        
        newdate = str(date.year) + month + day
        return newdate


    dates = []
    efgs = []
    tovpct = []
    orbpct = []
    ftr = []
    pace = []
    OEff = []
    DEff = []
        
    # Get team page
    url = f'https://www.basketball-reference.com/teams/{team}/2016_games.html'
    res = requests.get(url, 'html_parser')
    res.raise_for_status()
    soup = bs4.BeautifulSoup(res.text, 'lxml')
    
    # Get just regular season stats
    reg_season = soup.find('table',{'id':'games'})
    
    # Pull regular season game dates, generate list, turn it into datetime objects
    datesoup = reg_season.find_all(attrs = {'data-stat':'date_game'})
    gamedates = [entry.get_text() for entry in datesoup if entry.get_text() != 'Date' ]
    game_dates = [datetime.strptime(date, '%a, %b %d, %Y') for date in gamedates]
        
    
    # Find regular season game locations, translate to 1 = home, 0 = away
    souplocs = reg_season.find_all(attrs = {'data-stat':'game_location'})
    locs = [entry.get_text() for entry in souplocs if entry.get_text() == '' or entry.get_text() == '@']
    game_locs = list(map(lambda x: 1 if x =='' else 0, locs))
    
    # Find opponent team names
    oppts = reg_season.find_all(attrs = {'data-stat':'opp_name'})
    opps = [entry.get_text() for entry in oppts if entry.get_text() != "Opponent"]
    opp_abbrev = [team_abbrevs[opp] for opp in opps]
        
    # Get Results
    soupresults = reg_season.find_all(attrs = {'data-stat':'game_result'})
    results = [entry.get_text() for entry in soupresults if entry.get_text() == 'W' or entry.get_text() == 'L']
    rslts = list(map(lambda x: 1 if x =='W' else 0, results))

    souppoints = reg_season.find_all(attrs = {'data-stat':'pts'})
    soupopp = reg_season.find_all(attrs = {'data-stat':'opp_pts'})

    teampoints = [entry.get_text() for entry in souppoints if not entry.get_text().startswith('T')]
    opppoints = [entry.get_text() for entry in soupopp if not entry.get_text().startswith('O')]

    teampoints = list(map(lambda x: int(x), teampoints))
    opppoints = list(map(lambda x: int(x), opppoints))

    # Get game stats
    for i, gamedate in enumerate(game_dates):
            
        date = date_formatter(gamedate)
        dates.append(date)
            
        if game_locs[i] == 1:
            url = f'https://www.basketball-reference.com/boxscores/{date}0{team}.html'
            res = requests.get(url)
            res.raise_for_status()
            soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
            fourfac = soup.find('div',{'id':'div_four_factors'})
            efgs.append(fourfac.find_all('td',{'data-stat':'efg_pct'})[1].get_text())
            tovpct.append(fourfac.find_all('td',{'data-stat':'tov_pct'})[1].get_text())
            orbpct.append(fourfac.find_all('td',{'data-stat':'orb_pct'})[1].get_text())
            ftr.append(fourfac.find_all('td',{'data-stat':'ft_rate'})[1].get_text())
            pace.append(fourfac.find_all('td',{'data-stat':'pace'})[1].get_text())
            OEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[1].get_text())
            DEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[0].get_text())
        else:
            url = f'https://www.basketball-reference.com/boxscores/{date}0{team_abbrevs[opps[i]]}.html'
            res = requests.get(url)
            res.raise_for_status()
            soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
            fourfac = soup.find('div',{'id':'div_four_factors'})
            efgs.append(fourfac.find_all('td',{'data-stat':'efg_pct'})[0].get_text())
            tovpct.append(fourfac.find_all('td',{'data-stat':'tov_pct'})[0].get_text())
            orbpct.append(fourfac.find_all('td',{'data-stat':'orb_pct'})[0].get_text())
            ftr.append(fourfac.find_all('td',{'data-stat':'ft_rate'})[0].get_text())
            pace.append(fourfac.find_all('td',{'data-stat':'pace'})[0].get_text())
            OEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[0].get_text())
            DEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[1].get_text())

#         print("Date: " + date + "Location: " + str(game_locs[i]) + " Team Score " + 
#               " Opponent: " + team_abbrevs[opps[i]] + " Result: " + 
#               str(rslts[i]) + " EFG%: " + efgs[i] + " TOV%: " + tovpct[i] + 
#               " ORB%: " + orbpct[i] + " FTR: " + ftr[i] + " Pace: " + pace[i])
    
    teamlist= [team] * 82
    data = {'Team': teamlist, "Location":game_locs, "Game Number":list(range(1,83)), "Team Points": teampoints,
            "Opp Points": opppoints, "Result":rslts, "Date": dates, "Opponent": opp_abbrev, 
            "EFG%": efgs, "TOV%" : tovpct, "ORB%":orbpct, "FTR":ftr, "Pace": pace, "OEff": OEff, "DEff":DEff}
    df = pd.DataFrame.from_dict(data)
    df['EFG%'] = df['EFG%'].apply(lambda x: float(x))
    df['TOV%'] = df['TOV%'].apply(lambda x: float(x))
    df['ORB%'] = df['ORB%'].apply(lambda x: float(x))
    df['FTR'] = df['FTR'].apply(lambda x: float(x))
    df['Pace'] = df['Pace'].apply(lambda x: float(x))
    df['OEff'] = df['OEff'].apply(lambda x: float(x))
    df['DEff'] = df['DEff'].apply(lambda x: float(x))
    df['TOV%'] = df['TOV%']/100.0
    df['ORB%'] = df['ORB%']/100.0
    
    return df

In [106]:
def get_4fac17(team):
    
    team_abbrevs = {
    'Cleveland Cavaliers': 'CLE',
    'Boston Celtics':'BOS',
    'Washington Wizards':'WAS',
    'Charlotte Hornets':'CHO',
    'Minnesota Timberwolves':'MIN',
    'Dallas Mavericks':'DAL',
    'Milwaukee Bucks':'MIL',
    'Philadelphia 76ers':'PHI',
    'Phoenix Suns':'PHO',
    'Los Angeles Lakers':'LAL',
    'Utah Jazz':'UTA',
    'Sacramento Kings':'SAC',
    'New York Knicks':'NYK',
    'New Orleans Pelicans':'NOP',
    'Detroit Pistons':'DET',
    'Atlanta Hawks':'ATL',
    'Chicago Bulls':'CHI',
    'Miami Heat':'MIA',
    'Memphis Grizzlies':'MEM',
    'Golden State Warriors':'GSW',
    'Denver Nuggets':'DEN',
    'Brooklyn Nets':'BRK',
    'Los Angeles Clippers':'LAC',
    'Portland Trail Blazers':'POR',
    'Indiana Pacers':'IND',
    'San Antonio Spurs':'SAS',
    'Houston Rockets':'HOU',
    'Oklahoma City Thunder':'OKC',
    'Toronto Raptors':'TOR',
    'Orlando Magic':'ORL'
    }
    
    # Formats webscraped dates to match bball reference URLs for later scraping
    def date_formatter(date):

        if len(str(date.month)) != 2:
            month = '0'+ str(date.month)
        else:
            month = str(date.month)
        
        if len(str(date.day)) != 2:
            day = '0' + str(date.day)
        else:
            day = str(date.day)
        
        newdate = str(date.year) + month + day
        return newdate


    dates = []
    efgs = []
    tovpct = []
    orbpct = []
    ftr = []
    pace = []
    OEff = []
    DEff = []
        
    # Get team page
    url = f'https://www.basketball-reference.com/teams/{team}/2017_games.html'
    res = requests.get(url, 'html_parser')
    res.raise_for_status()
    soup = bs4.BeautifulSoup(res.text, 'lxml')
    
    # Get just regular season stats
    reg_season = soup.find('table',{'id':'games'})
    
    # Pull regular season game dates, generate list, turn it into datetime objects
    datesoup = reg_season.find_all(attrs = {'data-stat':'date_game'})
    gamedates = [entry.get_text() for entry in datesoup if entry.get_text() != 'Date' ]
    game_dates = [datetime.strptime(date, '%a, %b %d, %Y') for date in gamedates]
        
    
    # Find regular season game locations, translate to 1 = home, 0 = away
    souplocs = reg_season.find_all(attrs = {'data-stat':'game_location'})
    locs = [entry.get_text() for entry in souplocs if entry.get_text() == '' or entry.get_text() == '@']
    game_locs = list(map(lambda x: 1 if x =='' else 0, locs))
    
    # Find opponent team names
    oppts = reg_season.find_all(attrs = {'data-stat':'opp_name'})
    opps = [entry.get_text() for entry in oppts if entry.get_text() != "Opponent"]
    opp_abbrev = [team_abbrevs[opp] for opp in opps]
        
    # Get Results
    soupresults = reg_season.find_all(attrs = {'data-stat':'game_result'})
    results = [entry.get_text() for entry in soupresults if entry.get_text() == 'W' or entry.get_text() == 'L']
    rslts = list(map(lambda x: 1 if x =='W' else 0, results))

    souppoints = reg_season.find_all(attrs = {'data-stat':'pts'})
    soupopp = reg_season.find_all(attrs = {'data-stat':'opp_pts'})

    teampoints = [entry.get_text() for entry in souppoints if not entry.get_text().startswith('T')]
    opppoints = [entry.get_text() for entry in soupopp if not entry.get_text().startswith('O')]

    teampoints = list(map(lambda x: int(x), teampoints))
    opppoints = list(map(lambda x: int(x), opppoints))

    # Get game stats
    for i, gamedate in enumerate(game_dates):
            
        date = date_formatter(gamedate)
        dates.append(date)
            
        if game_locs[i] == 1:
            url = f'https://www.basketball-reference.com/boxscores/{date}0{team}.html'
            res = requests.get(url)
            res.raise_for_status()
            soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
            fourfac = soup.find('div',{'id':'div_four_factors'})
            efgs.append(fourfac.find_all('td',{'data-stat':'efg_pct'})[1].get_text())
            tovpct.append(fourfac.find_all('td',{'data-stat':'tov_pct'})[1].get_text())
            orbpct.append(fourfac.find_all('td',{'data-stat':'orb_pct'})[1].get_text())
            ftr.append(fourfac.find_all('td',{'data-stat':'ft_rate'})[1].get_text())
            pace.append(fourfac.find_all('td',{'data-stat':'pace'})[1].get_text())
            OEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[1].get_text())
            DEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[0].get_text())
        else:
            url = f'https://www.basketball-reference.com/boxscores/{date}0{team_abbrevs[opps[i]]}.html'
            res = requests.get(url)
            res.raise_for_status()
            soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
            fourfac = soup.find('div',{'id':'div_four_factors'})
            efgs.append(fourfac.find_all('td',{'data-stat':'efg_pct'})[0].get_text())
            tovpct.append(fourfac.find_all('td',{'data-stat':'tov_pct'})[0].get_text())
            orbpct.append(fourfac.find_all('td',{'data-stat':'orb_pct'})[0].get_text())
            ftr.append(fourfac.find_all('td',{'data-stat':'ft_rate'})[0].get_text())
            pace.append(fourfac.find_all('td',{'data-stat':'pace'})[0].get_text())
            OEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[0].get_text())
            DEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[1].get_text())

#         print("Date: " + date + "Location: " + str(game_locs[i]) + " Team Score " + 
#               " Opponent: " + team_abbrevs[opps[i]] + " Result: " + 
#               str(rslts[i]) + " EFG%: " + efgs[i] + " TOV%: " + tovpct[i] + 
#               " ORB%: " + orbpct[i] + " FTR: " + ftr[i] + " Pace: " + pace[i])
    
    teamlist= [team] * 82
    data = {'Team': teamlist, "Location":game_locs, "Game Number":list(range(1,83)), "Team Points": teampoints,
            "Opp Points": opppoints, "Result":rslts, "Date": dates, "Opponent": opp_abbrev, 
            "EFG%": efgs, "TOV%" : tovpct, "ORB%":orbpct, "FTR":ftr, "Pace": pace, "OEff": OEff, "DEff":DEff}
    df = pd.DataFrame.from_dict(data)
    df['EFG%'] = df['EFG%'].apply(lambda x: float(x))
    df['TOV%'] = df['TOV%'].apply(lambda x: float(x))
    df['ORB%'] = df['ORB%'].apply(lambda x: float(x))
    df['FTR'] = df['FTR'].apply(lambda x: float(x))
    df['Pace'] = df['Pace'].apply(lambda x: float(x))
    df['OEff'] = df['OEff'].apply(lambda x: float(x))
    df['DEff'] = df['DEff'].apply(lambda x: float(x))
    df['TOV%'] = df['TOV%']/100.0
    df['ORB%'] = df['ORB%']/100.0
    
    return df

In [107]:
def get_4fac18(team):
    
    team_abbrevs = {
    'Cleveland Cavaliers': 'CLE',
    'Boston Celtics':'BOS',
    'Washington Wizards':'WAS',
    'Charlotte Hornets':'CHO',
    'Minnesota Timberwolves':'MIN',
    'Dallas Mavericks':'DAL',
    'Milwaukee Bucks':'MIL',
    'Philadelphia 76ers':'PHI',
    'Phoenix Suns':'PHO',
    'Los Angeles Lakers':'LAL',
    'Utah Jazz':'UTA',
    'Sacramento Kings':'SAC',
    'New York Knicks':'NYK',
    'New Orleans Pelicans':'NOP',
    'Detroit Pistons':'DET',
    'Atlanta Hawks':'ATL',
    'Chicago Bulls':'CHI',
    'Miami Heat':'MIA',
    'Memphis Grizzlies':'MEM',
    'Golden State Warriors':'GSW',
    'Denver Nuggets':'DEN',
    'Brooklyn Nets':'BRK',
    'Los Angeles Clippers':'LAC',
    'Portland Trail Blazers':'POR',
    'Indiana Pacers':'IND',
    'San Antonio Spurs':'SAS',
    'Houston Rockets':'HOU',
    'Oklahoma City Thunder':'OKC',
    'Toronto Raptors':'TOR',
    'Orlando Magic':'ORL'
    }
    
    # Formats webscraped dates to match bball reference URLs for later scraping
    def date_formatter(date):

        if len(str(date.month)) != 2:
            month = '0'+ str(date.month)
        else:
            month = str(date.month)
        
        if len(str(date.day)) != 2:
            day = '0' + str(date.day)
        else:
            day = str(date.day)
        
        newdate = str(date.year) + month + day
        return newdate


    dates = []
    efgs = []
    tovpct = []
    orbpct = []
    ftr = []
    pace = []
    OEff = []
    DEff = []
        
    # Get team page
    url = f'https://www.basketball-reference.com/teams/{team}/2018_games.html'
    res = requests.get(url, 'html_parser')
    res.raise_for_status()
    soup = bs4.BeautifulSoup(res.text, 'lxml')
    
    # Get just regular season stats
    reg_season = soup.find('table',{'id':'games'})
    
    # Pull regular season game dates, generate list, turn it into datetime objects
    datesoup = reg_season.find_all(attrs = {'data-stat':'date_game'})
    gamedates = [entry.get_text() for entry in datesoup if entry.get_text() != 'Date' ]
    game_dates = [datetime.strptime(date, '%a, %b %d, %Y') for date in gamedates]
        
    
    # Find regular season game locations, translate to 1 = home, 0 = away
    souplocs = reg_season.find_all(attrs = {'data-stat':'game_location'})
    locs = [entry.get_text() for entry in souplocs if entry.get_text() == '' or entry.get_text() == '@']
    game_locs = list(map(lambda x: 1 if x =='' else 0, locs))
    
    # Find opponent team names
    oppts = reg_season.find_all(attrs = {'data-stat':'opp_name'})
    opps = [entry.get_text() for entry in oppts if entry.get_text() != "Opponent"]
    opp_abbrev = [team_abbrevs[opp] for opp in opps]
        
    # Get Results
    soupresults = reg_season.find_all(attrs = {'data-stat':'game_result'})
    results = [entry.get_text() for entry in soupresults if entry.get_text() == 'W' or entry.get_text() == 'L']
    rslts = list(map(lambda x: 1 if x =='W' else 0, results))

    souppoints = reg_season.find_all(attrs = {'data-stat':'pts'})
    soupopp = reg_season.find_all(attrs = {'data-stat':'opp_pts'})

    teampoints = [entry.get_text() for entry in souppoints if not entry.get_text().startswith('T')]
    opppoints = [entry.get_text() for entry in soupopp if not entry.get_text().startswith('O')]

    teampoints = list(map(lambda x: int(x), teampoints))
    opppoints = list(map(lambda x: int(x), opppoints))

    # Get game stats
    for i, gamedate in enumerate(game_dates):
            
        date = date_formatter(gamedate)
        dates.append(date)
            
        if game_locs[i] == 1:
            url = f'https://www.basketball-reference.com/boxscores/{date}0{team}.html'
            res = requests.get(url)
            res.raise_for_status()
            soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
            fourfac = soup.find('div',{'id':'div_four_factors'})
            efgs.append(fourfac.find_all('td',{'data-stat':'efg_pct'})[1].get_text())
            tovpct.append(fourfac.find_all('td',{'data-stat':'tov_pct'})[1].get_text())
            orbpct.append(fourfac.find_all('td',{'data-stat':'orb_pct'})[1].get_text())
            ftr.append(fourfac.find_all('td',{'data-stat':'ft_rate'})[1].get_text())
            pace.append(fourfac.find_all('td',{'data-stat':'pace'})[1].get_text())
            OEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[1].get_text())
            DEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[0].get_text())
        else:
            url = f'https://www.basketball-reference.com/boxscores/{date}0{team_abbrevs[opps[i]]}.html'
            res = requests.get(url)
            res.raise_for_status()
            soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
            fourfac = soup.find('div',{'id':'div_four_factors'})
            efgs.append(fourfac.find_all('td',{'data-stat':'efg_pct'})[0].get_text())
            tovpct.append(fourfac.find_all('td',{'data-stat':'tov_pct'})[0].get_text())
            orbpct.append(fourfac.find_all('td',{'data-stat':'orb_pct'})[0].get_text())
            ftr.append(fourfac.find_all('td',{'data-stat':'ft_rate'})[0].get_text())
            pace.append(fourfac.find_all('td',{'data-stat':'pace'})[0].get_text())
            OEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[0].get_text())
            DEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[1].get_text())

#         print("Date: " + date + "Location: " + str(game_locs[i]) + " Team Score " + 
#               " Opponent: " + team_abbrevs[opps[i]] + " Result: " + 
#               str(rslts[i]) + " EFG%: " + efgs[i] + " TOV%: " + tovpct[i] + 
#               " ORB%: " + orbpct[i] + " FTR: " + ftr[i] + " Pace: " + pace[i])
    
    teamlist= [team] * 82
    data = {'Team': teamlist, "Location":game_locs, "Game Number":list(range(1,83)), "Team Points": teampoints,
            "Opp Points": opppoints, "Result":rslts, "Date": dates, "Opponent": opp_abbrev, 
            "EFG%": efgs, "TOV%" : tovpct, "ORB%":orbpct, "FTR":ftr, "Pace": pace, "OEff": OEff, "DEff":DEff}
    df = pd.DataFrame.from_dict(data)
    df['EFG%'] = df['EFG%'].apply(lambda x: float(x))
    df['TOV%'] = df['TOV%'].apply(lambda x: float(x))
    df['ORB%'] = df['ORB%'].apply(lambda x: float(x))
    df['FTR'] = df['FTR'].apply(lambda x: float(x))
    df['Pace'] = df['Pace'].apply(lambda x: float(x))
    df['OEff'] = df['OEff'].apply(lambda x: float(x))
    df['DEff'] = df['DEff'].apply(lambda x: float(x))
    df['TOV%'] = df['TOV%']/100.0
    df['ORB%'] = df['ORB%']/100.0
    
    return df

In [108]:
def get_4fac19(team):
    
    team_abbrevs = {
    'Cleveland Cavaliers': 'CLE',
    'Boston Celtics':'BOS',
    'Washington Wizards':'WAS',
    'Charlotte Hornets':'CHO',
    'Minnesota Timberwolves':'MIN',
    'Dallas Mavericks':'DAL',
    'Milwaukee Bucks':'MIL',
    'Philadelphia 76ers':'PHI',
    'Phoenix Suns':'PHO',
    'Los Angeles Lakers':'LAL',
    'Utah Jazz':'UTA',
    'Sacramento Kings':'SAC',
    'New York Knicks':'NYK',
    'New Orleans Pelicans':'NOP',
    'Detroit Pistons':'DET',
    'Atlanta Hawks':'ATL',
    'Chicago Bulls':'CHI',
    'Miami Heat':'MIA',
    'Memphis Grizzlies':'MEM',
    'Golden State Warriors':'GSW',
    'Denver Nuggets':'DEN',
    'Brooklyn Nets':'BRK',
    'Los Angeles Clippers':'LAC',
    'Portland Trail Blazers':'POR',
    'Indiana Pacers':'IND',
    'San Antonio Spurs':'SAS',
    'Houston Rockets':'HOU',
    'Oklahoma City Thunder':'OKC',
    'Toronto Raptors':'TOR',
    'Orlando Magic':'ORL'
    }
    
    # Formats webscraped dates to match bball reference URLs for later scraping
    def date_formatter(date):

        if len(str(date.month)) != 2:
            month = '0'+ str(date.month)
        else:
            month = str(date.month)
        
        if len(str(date.day)) != 2:
            day = '0' + str(date.day)
        else:
            day = str(date.day)
        
        newdate = str(date.year) + month + day
        return newdate


    dates = []
    efgs = []
    tovpct = []
    orbpct = []
    ftr = []
    pace = []
    OEff = []
    DEff = []
        
    # Get team page
    url = f'https://www.basketball-reference.com/teams/{team}/2019_games.html'
    res = requests.get(url, 'html_parser')
    res.raise_for_status()
    soup = bs4.BeautifulSoup(res.text, 'lxml')
    
    # Get just regular season stats
    reg_season = soup.find('table',{'id':'games'})
    
    # Pull regular season game dates, generate list, turn it into datetime objects
    datesoup = reg_season.find_all(attrs = {'data-stat':'date_game'})
    gamedates = [entry.get_text() for entry in datesoup if entry.get_text() != 'Date' ]
    game_dates = [datetime.strptime(date, '%a, %b %d, %Y') for date in gamedates]
        
    
    # Find regular season game locations, translate to 1 = home, 0 = away
    souplocs = reg_season.find_all(attrs = {'data-stat':'game_location'})
    locs = [entry.get_text() for entry in souplocs if entry.get_text() == '' or entry.get_text() == '@']
    game_locs = list(map(lambda x: 1 if x =='' else 0, locs))
    
    # Find opponent team names
    oppts = reg_season.find_all(attrs = {'data-stat':'opp_name'})
    opps = [entry.get_text() for entry in oppts if entry.get_text() != "Opponent"]
    opp_abbrev = [team_abbrevs[opp] for opp in opps]
        
    # Get Results
    soupresults = reg_season.find_all(attrs = {'data-stat':'game_result'})
    results = [entry.get_text() for entry in soupresults if entry.get_text() == 'W' or entry.get_text() == 'L']
    rslts = list(map(lambda x: 1 if x =='W' else 0, results))

    souppoints = reg_season.find_all(attrs = {'data-stat':'pts'})
    soupopp = reg_season.find_all(attrs = {'data-stat':'opp_pts'})

    teampoints = [entry.get_text() for entry in souppoints if not entry.get_text().startswith('T')]
    opppoints = [entry.get_text() for entry in soupopp if not entry.get_text().startswith('O')]

    teampoints = list(map(lambda x: int(x), teampoints))
    opppoints = list(map(lambda x: int(x), opppoints))

    # Get game stats
    for i, gamedate in enumerate(game_dates):
            
        date = date_formatter(gamedate)
        dates.append(date)
            
        if game_locs[i] == 1:
            url = f'https://www.basketball-reference.com/boxscores/{date}0{team}.html'
            res = requests.get(url)
            res.raise_for_status()
            soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
            fourfac = soup.find('div',{'id':'div_four_factors'})
            efgs.append(fourfac.find_all('td',{'data-stat':'efg_pct'})[1].get_text())
            tovpct.append(fourfac.find_all('td',{'data-stat':'tov_pct'})[1].get_text())
            orbpct.append(fourfac.find_all('td',{'data-stat':'orb_pct'})[1].get_text())
            ftr.append(fourfac.find_all('td',{'data-stat':'ft_rate'})[1].get_text())
            pace.append(fourfac.find_all('td',{'data-stat':'pace'})[1].get_text())
            OEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[1].get_text())
            DEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[0].get_text())
        else:
            url = f'https://www.basketball-reference.com/boxscores/{date}0{team_abbrevs[opps[i]]}.html'
            res = requests.get(url)
            res.raise_for_status()
            soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
            fourfac = soup.find('div',{'id':'div_four_factors'})
            efgs.append(fourfac.find_all('td',{'data-stat':'efg_pct'})[0].get_text())
            tovpct.append(fourfac.find_all('td',{'data-stat':'tov_pct'})[0].get_text())
            orbpct.append(fourfac.find_all('td',{'data-stat':'orb_pct'})[0].get_text())
            ftr.append(fourfac.find_all('td',{'data-stat':'ft_rate'})[0].get_text())
            pace.append(fourfac.find_all('td',{'data-stat':'pace'})[0].get_text())
            OEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[0].get_text())
            DEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[1].get_text())

#         print("Date: " + date + "Location: " + str(game_locs[i]) + " Team Score " + 
#               " Opponent: " + team_abbrevs[opps[i]] + " Result: " + 
#               str(rslts[i]) + " EFG%: " + efgs[i] + " TOV%: " + tovpct[i] + 
#               " ORB%: " + orbpct[i] + " FTR: " + ftr[i] + " Pace: " + pace[i])
    
    teamlist= [team] * 82
    data = {'Team': teamlist, "Location":game_locs, "Game Number":list(range(1,83)), "Team Points": teampoints,
            "Opp Points": opppoints, "Result":rslts, "Date": dates, "Opponent": opp_abbrev, 
            "EFG%": efgs, "TOV%" : tovpct, "ORB%":orbpct, "FTR":ftr, "Pace": pace, "OEff": OEff, "DEff":DEff}
    df = pd.DataFrame.from_dict(data)
    df['EFG%'] = df['EFG%'].apply(lambda x: float(x))
    df['TOV%'] = df['TOV%'].apply(lambda x: float(x))
    df['ORB%'] = df['ORB%'].apply(lambda x: float(x))
    df['FTR'] = df['FTR'].apply(lambda x: float(x))
    df['Pace'] = df['Pace'].apply(lambda x: float(x))
    df['OEff'] = df['OEff'].apply(lambda x: float(x))
    df['DEff'] = df['DEff'].apply(lambda x: float(x))
    df['TOV%'] = df['TOV%']/100.0
    df['ORB%'] = df['ORB%']/100.0
    
    return df

In [8]:
def get_4fac20(team, today = datetime.today()):
    
    team_abbrevs = {
    'Cleveland Cavaliers': 'CLE',
    'Boston Celtics':'BOS',
    'Washington Wizards':'WAS',
    'Charlotte Hornets':'CHO',
    'Minnesota Timberwolves':'MIN',
    'Dallas Mavericks':'DAL',
    'Milwaukee Bucks':'MIL',
    'Philadelphia 76ers':'PHI',
    'Phoenix Suns':'PHO',
    'Los Angeles Lakers':'LAL',
    'Utah Jazz':'UTA',
    'Sacramento Kings':'SAC',
    'New York Knicks':'NYK',
    'New Orleans Pelicans':'NOP',
    'Detroit Pistons':'DET',
    'Atlanta Hawks':'ATL',
    'Chicago Bulls':'CHI',
    'Miami Heat':'MIA',
    'Memphis Grizzlies':'MEM',
    'Golden State Warriors':'GSW',
    'Denver Nuggets':'DEN',
    'Brooklyn Nets':'BRK',
    'Los Angeles Clippers':'LAC',
    'Portland Trail Blazers':'POR',
    'Indiana Pacers':'IND',
    'San Antonio Spurs':'SAS',
    'Houston Rockets':'HOU',
    'Oklahoma City Thunder':'OKC',
    'Toronto Raptors':'TOR',
    'Orlando Magic':'ORL'
    }
    
    # Formats webscraped dates to match bball reference URLs for later scraping
    def date_formatter(date):

        if len(str(date.month)) != 2:
            month = '0'+ str(date.month)
        else:
            month = str(date.month)
        
        if len(str(date.day)) != 2:
            day = '0' + str(date.day)
        else:
            day = str(date.day)
        
        newdate = str(date.year) + month + day
        return newdate


    dates = []
    efgs = []
    tovpct = []
    orbpct = []
    ftr = []
    pace = []
    OEff = []
    DEff = []
        
    # Get team page
    url = f'https://www.basketball-reference.com/teams/{team}/2020_games.html'
    res = requests.get(url, 'html_parser')
    res.raise_for_status()
    soup = bs4.BeautifulSoup(res.text, 'lxml')
    
    # Get just regular season stats
    reg_season = soup.find('table',{'id':'games'})
    
    # Pull regular season game dates, generate list of links to those games
    links = reg_season.find_all(attrs = {'data-stat':'date_game'})
    
    urls = []
    gamedates = []
    
    for i in links:
        if i.find('a'):
            urls.append(i.find('a').attrs['href'])
            gamedates.append(i.get_text())
    print(urls)
    print(gamedates)
    
#     gamedates = [entry.get_text() for entry in datesoup if entry.get_text() != 'Date' ]
    game_dates = [datetime.strptime(date, '%a, %b %d, %Y') for date in gamedates]
    
    # Find regular season game locations, translate to 1 = home, 0 = away
    souplocs = reg_season.find_all(attrs = {'data-stat':'game_location'})
    locs = [entry.get_text() for entry in souplocs if entry.get_text() == '' or entry.get_text() == '@']
    game_locs = list(map(lambda x: 1 if x =='' else 0, locs))
    
    # Find opponent team names
    oppts = reg_season.find_all(attrs = {'data-stat':'opp_name'})
    opps = [entry.get_text() for entry in oppts if entry.get_text() != "Opponent"]
    opp_abbrev = [team_abbrevs[opp] for opp in opps]
        
    # Get Results
    soupresults = reg_season.find_all(attrs = {'data-stat':'game_result'})
    results = [entry.get_text() for entry in soupresults if entry.get_text() == 'W' or entry.get_text() == 'L']
    rslts = list(map(lambda x: 1 if x =='W' else 0, results))
    
    souppoints = reg_season.find_all(attrs = {'data-stat':'pts'})
    soupopp = reg_season.find_all(attrs = {'data-stat':'opp_pts'})

    teampoints = [entry.get_text() for entry in souppoints if not entry.get_text().startswith('T')]
    opppoints = [entry.get_text() for entry in soupopp if not entry.get_text().startswith('O')]

    teampoints = list(map(lambda x: int(x), teampoints))
    opppoints = list(map(lambda x: int(x), opppoints))

    # Get game stats
    for i, gamedate in enumerate(game_dates):
            
        date = date_formatter(gamedate)
        dates.append(date)
            
        if game_locs[i] == 1:
            url = f'https://www.basketball-reference.com/boxscores/{date}0{team}.html'
            res = requests.get(url)
            res.raise_for_status()
            soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
            fourfac = soup.find('div',{'id':'div_four_factors'})
            efgs.append(fourfac.find_all('td',{'data-stat':'efg_pct'})[1].get_text())
            tovpct.append(fourfac.find_all('td',{'data-stat':'tov_pct'})[1].get_text())
            orbpct.append(fourfac.find_all('td',{'data-stat':'orb_pct'})[1].get_text())
            ftr.append(fourfac.find_all('td',{'data-stat':'ft_rate'})[1].get_text())
            pace.append(fourfac.find_all('td',{'data-stat':'pace'})[1].get_text())
            OEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[1].get_text())
            DEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[0].get_text())
        else:
            url = f'https://www.basketball-reference.com/boxscores/{date}0{team_abbrevs[opps[i]]}.html'
            res = requests.get(url)
            res.raise_for_status()
            soup = bs4.BeautifulSoup(re.sub("<!--|-->", "", res.text), 'lxml')
            fourfac = soup.find('div',{'id':'div_four_factors'})
            efgs.append(fourfac.find_all('td',{'data-stat':'efg_pct'})[0].get_text())
            tovpct.append(fourfac.find_all('td',{'data-stat':'tov_pct'})[0].get_text())
            orbpct.append(fourfac.find_all('td',{'data-stat':'orb_pct'})[0].get_text())
            ftr.append(fourfac.find_all('td',{'data-stat':'ft_rate'})[0].get_text())
            pace.append(fourfac.find_all('td',{'data-stat':'pace'})[0].get_text())
            OEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[0].get_text())
            DEff.append(fourfac.find_all('td',{'data-stat':'off_rtg'})[1].get_text())

#         print("Date: " + date + "Location: " + str(game_locs[i]) + " Team Score " + 
#               " Opponent: " + team_abbrevs[opps[i]] + " Result: " + 
#               str(rslts[i]) + " EFG%: " + efgs[i] + " TOV%: " + tovpct[i] + 
#               " ORB%: " + orbpct[i] + " FTR: " + ftr[i] + " Pace: " + pace[i])
    
    teamlist= [team] * 82
    data = {'Team': teamlist, "Location":game_locs, "Game Number":list(range(1,83)), "Team Points": teampoints,
            "Opp Points": opppoints, "Result":rslts, "Date": dates, "Opponent": opp_abbrev, 
            "EFG%": efgs, "TOV%" : tovpct, "ORB%":orbpct, "FTR":ftr, "Pace": pace, "OEff": OEff, "DEff":DEff}
    df = pd.DataFrame.from_dict(data)
    df['EFG%'] = df['EFG%'].apply(lambda x: float(x))
    df['TOV%'] = df['TOV%'].apply(lambda x: float(x))
    df['ORB%'] = df['ORB%'].apply(lambda x: float(x))
    df['FTR'] = df['FTR'].apply(lambda x: float(x))
    df['Pace'] = df['Pace'].apply(lambda x: float(x))
    df['OEff'] = df['OEff'].apply(lambda x: float(x))
    df['DEff'] = df['DEff'].apply(lambda x: float(x))
    df['TOV%'] = df['TOV%']/100.0
    df['ORB%'] = df['ORB%']/100.0
    
    return df

In [2]:
teams = ['CLE', 'BOS', 'WAS', 'CHO', 'MIN', 'DAL', 
         'MIL', 'PHI', 'PHO', 'LAL', 'UTA', 'SAC', 
         'NYK', 'NOP', 'DET', 'ATL', 'CHI', 'MIA', 
         'MEM', 'GSW', 'DEN', 'BRK', 'LAC', 'POR', 
         'IND', 'SAS', 'HOU', 'OKC', 'TOR', 'ORL']

In [99]:
df = pd.DataFrame()
year = '2015'
for team in teams:
    team_db = get_4fac15(team)
    df = pd.concat([df, team_db]).reset_index(drop = True)
    
path = '/Users/ahelgeso/Documents/GitHub/bball-master/Stat Sheets'
os.chdir(path)

df.to_csv(f'{year}stats.csv', index = False)

In [None]:
df = pd.DataFrame()
year = '2016'
for team in teams:
    team_db = get_4fac16(team)
    df = pd.concat([df, team_db]).reset_index(drop = True)
    
path = '/Users/ahelgeso/Documents/GitHub/bball-master/Stat Sheets'
os.chdir(path)

df.to_csv(f'{year}stats.csv', index = False)

In [None]:
df = pd.DataFrame()
year = '2017'
for team in teams:
    team_db = get_4fac17(team)
    df = pd.concat([df, team_db]).reset_index(drop = True)
    
path = '/Users/ahelgeso/Documents/GitHub/bball-master/Stat Sheets'
os.chdir(path)

df.to_csv(f'{year}stats.csv', index = False)

In [110]:
df = pd.DataFrame()
year = '2018'
for team in teams:
    team_db = get_4fac18(team)
    df = pd.concat([df, team_db]).reset_index(drop = True)
    
path = '/Users/ahelgeso/Documents/GitHub/bball-master/Stat Sheets'
os.chdir(path)

df.to_csv(f'{year}stats.csv', index = False)

In [111]:
df = pd.DataFrame()
year = '2019'
for team in teams:
    team_db = get_4fac19(team)
    df = pd.concat([df, team_db]).reset_index(drop = True)
    
path = '/Users/ahelgeso/Documents/GitHub/bball-master/Stat Sheets'
os.chdir(path)

df.to_csv(f'{year}stats.csv', index = False)

In [9]:
df = pd.DataFrame()
year = '2020'
for team in teams:
    team_db = get_4fac20(team)
    df = pd.concat([df, team_db]).reset_index(drop = True)
    
path = '/Users/ahelgeso/Documents/GitHub/bball-master/Stat Sheets'
os.chdir(path)

df.to_csv(f'{year}stats.csv', index = False)

['/boxscores/index.cgi?month=10&day=23&year=2019', '/boxscores/index.cgi?month=10&day=26&year=2019', '/boxscores/index.cgi?month=10&day=28&year=2019', '/boxscores/index.cgi?month=10&day=30&year=2019', '/boxscores/index.cgi?month=11&day=1&year=2019', '/boxscores/index.cgi?month=11&day=3&year=2019', '/boxscores/index.cgi?month=11&day=5&year=2019', '/boxscores/index.cgi?month=11&day=8&year=2019', '/boxscores/index.cgi?month=11&day=10&year=2019', '/boxscores/index.cgi?month=11&day=12&year=2019', '/boxscores/index.cgi?month=11&day=14&year=2019', '/boxscores/index.cgi?month=11&day=17&year=2019', '/boxscores/index.cgi?month=11&day=18&year=2019', '/boxscores/index.cgi?month=11&day=20&year=2019', '/boxscores/index.cgi?month=11&day=22&year=2019', '/boxscores/index.cgi?month=11&day=23&year=2019', '/boxscores/index.cgi?month=11&day=25&year=2019', '/boxscores/index.cgi?month=11&day=27&year=2019', '/boxscores/index.cgi?month=11&day=29&year=2019', '/boxscores/index.cgi?month=12&day=3&year=2019', '/bo

ValueError: invalid literal for int() with base 10: ''