In [325]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pandas as pd
from datetime import date, timedelta
import time
import pickle

In [301]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [13]:
path_to_chromedriver = '/Users/skylershi/Data Science/chromedriver' # Path to access a chrome driver
browser = webdriver.Chrome(executable_path=path_to_chromedriver)

url = 'https://stats.nba.com/scores'
browser.get(url)

## Specify Scraping Date Range

In [3]:
start_date = date(2019, 12, 1)
end_date   = date(2020, 1, 1)
delta      = timedelta(days=1)

The endpoint **stats.nba.com/scores** accepts dates in format**stats.nba.com/scores/{month}/{day}/{year}** where month and day are both zero-padded if necessary

In [10]:
date_scrape = start_date
while date_scrape <= end_date:
    date_str = date_scrape.strftime("%m/%d/%Y")
    date_scrape += delta
    
    

## Scrape Games Off Base Page

In [108]:
url = 'https://stats.nba.com/scores/12/07/2019'
browser.get(url)

In [110]:
browser.find_elements_by_partial_link_text('Box Score')

[<selenium.webdriver.remote.webelement.WebElement (session="d170af94a5571d2f7689ee0c629aede0", element="724a4893-9ba2-436f-a033-ea19dd6f449f")>,
 <selenium.webdriver.remote.webelement.WebElement (session="d170af94a5571d2f7689ee0c629aede0", element="a7a5d774-3628-4333-99a9-cbb4097eeb61")>,
 <selenium.webdriver.remote.webelement.WebElement (session="d170af94a5571d2f7689ee0c629aede0", element="69a67e1e-59de-4dc7-810f-f15a2b10402d")>,
 <selenium.webdriver.remote.webelement.WebElement (session="d170af94a5571d2f7689ee0c629aede0", element="5c37f24b-58a3-4d23-8bbb-8fdda2c8fa84")>,
 <selenium.webdriver.remote.webelement.WebElement (session="d170af94a5571d2f7689ee0c629aede0", element="b96d6b2f-a1c8-42a1-8d86-28b9da394500")>]

In [45]:
games = browser.find_elements_by_xpath('/html/body/main/div[2]/div/div[2]/div/div[2]/div[1]/div/div/*')

#xpath indexing starts at 1!
for game_id in range(3, len(games)+1):
    game_box_score_xpath = '''
                                /html/body/main/div[2]/div/div[2]/div/div[2]/div[1]/div/div/
                                div[{}]/div[2]/div[1]/div/div[2]/div/a[1]
                           '''.format(game_id)
    
    browser.find_element_by_xpath(game_box_score_xpath).click()
    time.sleep(1)
    
    # scrape boxscores
    
    
    browser.back()
    time.sleep(1)


## Scrape Boxscore off Game Page

In [198]:
url = 'https://stats.nba.com/game/0021900001/'
browser.get(url)

In [172]:
table = browser.find_element_by_xpath('/html/body/main/div[2]/div/div/div[4]/div/div[2]/div/nba-stat-table[1]')
table_text = table.text.replace('SCREEN\nASSISTS', 'SCREEN_ASSISTS')
for line_id, line in enumerate(table_text.split('\n')):
    print(line_id, '\t', line)
    if(line_id > 8):
        break

0 	 PLAYER MIN SCREEN ASSISTS SCREEN_ASSISTS PTS DEFLECTIONS OFF LOOSE BALLS
1 	 RECOVERED DEF LOOSE BALLS
2 	 RECOVERED LOOSE BALLS
3 	 RECOVERED CHARGES
4 	 DRAWN CONTESTED
5 	 2PT SHOTS CONTESTED
6 	 3PT SHOTS CONTESTED
7 	 SHOTS OFF BOX OUTS DEF BOX OUTS BOX
8 	 OUTS
9 	 Jrue Holiday F


In [194]:
table = browser.find_element_by_xpath('/html/body/main/div[2]/div/div/div[4]/div/div[2]/div/nba-stat-table[1]')

column_names = []
player_stats = []
temp_player_stat = []

text_replacements = {
    # Hustle stats replacement strings
    'SCREEN\nASSISTS': 'SCREEN_ASSISTS',
    'SCREEN ASSISTS' : 'SCREEN_ASSISTS',
    'SCREEN_ASSISTS PTS': 'SCREEN_ASSISTS_PTS',
    'OFF LOOSE BALLS\nRECOVERED': 'OFF_LOOSE_BALLS_RECOVERED',
    'DEF LOOSE BALLS\nRECOVERED': 'DEF_LOOSE_BALLS_RECOVERED',
    'LOOSE BALLS\nRECOVERED': 'LOOSE_BALLS_RECOVERED',
    'CHARGES\nDRAWN': 'CHARGES_DRAWN',
    'CONTESTED\n2PT SHOTS': 'CONTESTED_2PT_SHOTS',
    'CONTESTED\n3PT SHOTS': 'CONTESTED_3PT_SHOTS',
    'CONTESTED\nSHOTS': 'CONTESTED_SHOTS',
    'OFF BOX OUTS' : 'OFF_BOX_OUTS',
    'DEF BOX OUTS' : 'DEF_BOX_OUTS',
    'BOX\nOUTS': 'BOX_OUTS',
    # Advanced stats replacement strings
    ' RATIO': '_RATIO',
    # Defense stats replacement strings
    'DEF\nMIN': 'DEF_MIN',
    'PARTIAL\nPOSS': 'PARTIAL_POSS'
}

table_text = table.text
for orig_str, replace_str in text_replacements.items():
    table_text = table_text.replace(orig_str, replace_str)


for line_id, line in enumerate(table_text.split('\n')):
    if line_id == 0:
        column_names = line.split(' ')[1:]
        column_names.insert(0,'PLAYER')
    else:
        
        # stop reading once we see totals or DNP line
        if ('Totals' in line) or ('DNP' in line) or ('DND' in line):
            break

        if line_id % 2 == 1:
            line_cleaned = line[:-2] + line[-2:].replace(' F', '').replace(' C', '').replace(' G', '')
            temp_player_stat.append(line_cleaned)
        if line_id % 2 == 0:
            temp_player_stat.extend(line.split(' '))
            player_stats.append(temp_player_stat)
            temp_player_stat = []

In [302]:
df = pd.DataFrame(player_stats, columns = column_names)
df.head()

Unnamed: 0,PLAYER,DEF_MIN,PARTIAL_POSS,PTS,DREB,AST,TOV,STL,BLK,DFGM,DFGA,DFG%,D3PM,D3PA,D3P%
0,OG Anunoby,12:59,53.5,24,4,2,2,0,2,8,16,50.0,3,8,37.5
1,Pascal Siakam,13:29,57.8,25,12,2,2,0,1,8,17,47.1,2,3,66.7
2,Marc Gasol,11:20,47.5,21,3,4,2,0,0,9,13,69.2,2,4,50.0
3,Kyle Lowry,16:37,68.9,19,4,4,2,2,0,6,15,40.0,5,9,55.6
4,Fred VanVleet,16:05,65.7,5,5,8,4,2,0,2,12,16.7,1,6,16.7


## Scrape Multiple Boxscore views off of Game Page for Both Teams

In [284]:
url = 'https://stats.nba.com/game/0021900001/'
browser.get(url)

In [None]:
/html/body/main/div[2]/div/div/div[4]/div/nav-dropdown/nav/section[2]/div/a

In [None]:
/html/body/main/div[2]/div/div/div[4]/div/nav-dropdown/nav/section[2]/ul/li[1]/a

In [None]:
/html/body/main/div[2]/div/div/div[4]/div/nav-dropdown/nav/section[2]/ul/li[1]/a

In [266]:
text_replacements = {
    # Hustle stats replacement strings
    'SCREEN\nASSISTS': 'SCREEN_ASSISTS',
    'SCREEN ASSISTS' : 'SCREEN_ASSISTS',
    'SCREEN_ASSISTS PTS': 'SCREEN_ASSISTS_PTS',
    'OFF LOOSE BALLS\nRECOVERED': 'OFF_LOOSE_BALLS_RECOVERED',
    'DEF LOOSE BALLS\nRECOVERED': 'DEF_LOOSE_BALLS_RECOVERED',
    'LOOSE BALLS\nRECOVERED': 'LOOSE_BALLS_RECOVERED',
    'CHARGES\nDRAWN': 'CHARGES_DRAWN',
    'CONTESTED\n2PT SHOTS': 'CONTESTED_2PT_SHOTS',
    'CONTESTED\n3PT SHOTS': 'CONTESTED_3PT_SHOTS',
    'CONTESTED\nSHOTS': 'CONTESTED_SHOTS',
    'OFF BOX OUTS' : 'OFF_BOX_OUTS',
    'DEF BOX OUTS' : 'DEF_BOX_OUTS',
    'BOX\nOUTS': 'BOX_OUTS',
    # Advanced stats replacement strings
    ' RATIO': '_RATIO',
    # Defense stats replacement strings
    'DEF\nMIN': 'DEF_MIN',
    'PARTIAL\nPOSS': 'PARTIAL_POSS'
}

In [267]:
options = ['Traditional', 'Advanced', 'Usage', 'Player Tracking', 'Hustle', 'Defense']

In [285]:
# keep track of all stats with a list of dfs
team1_stats_dfs = []
team2_stats_dfs = []
team1 = browser.find_elements_by_class_name('nba-stat-table__caption')[0].text
team2 = browser.find_elements_by_class_name('nba-stat-table__caption')[1].text

# scrape multiple options designated above
for i, option in enumerate(options):
    # only select new options item if not first-time loading page
    if i != 0:
        browser.find_element_by_partial_link_text(option).click()
        time.sleep(2)


    # scrape 2 boxscore tables
    table1 = browser.find_element_by_xpath('/html/body/main/div[2]/div/div/div[4]/div/div[2]/div/nba-stat-table[1]')
    table2 = browser.find_element_by_xpath('/html/body/main/div[2]/div/div/div[4]/div/div[2]/div/nba-stat-table[2]')


    
    column_names = []
    player_stats = []
    temp_player_stat = []
    
    for table_idx, table in enumerate([table1, table2]):
        column_names.clear()
        player_stats.clear()
        temp_player_stat.clear()
        
        # replace all the unexpected spaces and newlines strings by our script
        table_text = table.text
        for orig_str, replace_str in text_replacements.items():
            table_text = table_text.replace(orig_str, replace_str)

        # read table text into python list of lists
        for line_id, line in enumerate(table_text.split('\n')):
            if line_id == 0:
                column_names = line.split(' ')[1:]
                column_names.insert(0,'PLAYER')
            else:
                # stop reading once we see totals or DNP/DND line
                if ('Totals' in line) or ('DNP' in line) or ('DND' in line):
                    break

                if line_id % 2 == 1:
                    line_cleaned = line[:-2] + line[-2:].replace(' F', '').replace(' C', '').replace(' G', '')
                    temp_player_stat.append(line_cleaned)
                if line_id % 2 == 0:
                    temp_player_stat.extend(line.split(' '))
                    if (len(temp_player_stat) == len(column_names)):
                        player_stats.append(temp_player_stat)
                    temp_player_stat = []

        # convert list of lists into pandas df
        df = pd.DataFrame(player_stats, columns = column_names)
        if table_idx == 0:
            team1_stats_dfs.append(df)
        else:
            team2_stats_dfs.append(df)

    # scroll back to top of page
    browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.HOME)
    # open the options menu
    browser.find_element_by_partial_link_text(option).click()
    time.sleep(2)

In [326]:
team1_df = pd.merge(team1_stats_dfs[0], team1_stats_dfs[1].drop(columns = ['MIN']),
                    how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
team1_df = pd.merge(team1_df, team1_stats_dfs[2].drop(columns = ['MIN', 'USG%']),
                    how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
team1_df = pd.merge(team1_df, team1_stats_dfs[3].drop(columns = ['MIN', 'AST', 'FG%']),
                    how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
team1_df = pd.merge(team1_df, team1_stats_dfs[4].drop(columns = ['MIN']),
                    how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')

team1_stats_dfs[5] = team1_stats_dfs[5].rename(columns = {
    'PTS' : 'ALLOWED_PTS',
    'AST' : 'ALLOWED_AST',
    'TOV' : 'FORCED_TOV'
})
team1_df = pd.merge(team1_df, team1_stats_dfs[5].drop(columns = ['DREB', 'STL', 'BLK', 'DFGM', 'DFGA', 'DFG%']),
                    how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
# team 2
team2_df = pd.merge(team2_stats_dfs[0], team2_stats_dfs[1].drop(columns = ['MIN']),
                    how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
team2_df = pd.merge(team2_df, team2_stats_dfs[2].drop(columns = ['MIN', 'USG%']),
                    how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
team2_df = pd.merge(team2_df, team2_stats_dfs[3].drop(columns = ['MIN', 'AST', 'FG%']),
                    how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
team2_df = pd.merge(team2_df, team2_stats_dfs[4].drop(columns = ['MIN']),
                    how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')

team2_stats_dfs[5] = team2_stats_dfs[5].rename(columns = {
    'PTS' : 'ALLOWED_PTS',
    'AST' : 'ALLOWED_AST',
    'TOV' : 'FORCED_TOV'
})
team2_df = pd.merge(team2_df, team2_stats_dfs[5].drop(columns = ['DREB', 'STL', 'BLK', 'DFGM', 'DFGA', 'DFG%']),
                    how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
team1_df.insert(0, 'TEAM', team1)
team2_df.insert(0, 'TEAM', team2)

In [327]:
pd.concat([team1_df, team2_df], ignore_index=True)

Unnamed: 0,TEAM,PLAYER,MIN,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,OREB,DREB,REB,AST,TOV,STL,BLK,PF,PTS,+/-,OFFRTG,DEFRTG,NETRTG,AST%,AST/TO,AST_RATIO,OREB%,DREB%,REB%,TO_RATIO,EFG%,TS%,USG%,PACE,PIE,%FGM,%FGA,%3PM,%3PA,%FTM,%FTA,%OREB,%DREB,%REB,%AST,%TO,%STL,%BLK,%BLKA,%PF,%PFD,%PTS,DIST,SPD,TCHS,PASS,SAST,DFGM,DFGA,DFG%,ORBC,DRBC,RBC,CFGM,CFGA,CFG%,UFGM,UFGA,UFG%,SCREEN_ASSISTS,SCREEN_ASSISTS_PTS,DEFLECTIONS,OFF_LOOSE_BALLS_RECOVERED,DEF_LOOSE_BALLS_RECOVERED,LOOSE_BALLS_RECOVERED,CHARGES_DRAWN,CONTESTED_2PT_SHOTS,CONTESTED_3PT_SHOTS,CONTESTED_SHOTS,OFF_BOX_OUTS,DEF_BOX_OUTS,BOX_OUTS,DEF_MIN,PARTIAL_POSS,ALLOWED_PTS,ALLOWED_AST,FORCED_TOV,D3PM,D3PA,D3P%
0,New Orleans Pelicans,Jrue Holiday,41:05,6,15,40.0,1,6,16.7,0,2,0.0,2,2,4,6,5,0,2,2,13,-14,107.8,122.0,-14.2,20.7,1.2,23.1,5.0,4.3,4.7,19.2,43.3,40.9,20.8,105.74,2.9,17.1,20.0,7.1,20.0,0.0,12.5,33.3,7.1,11.8,25.0,33.3,0.0,33.3,0.0,7.7,11.1,13.4,3.23,4.32,101,77,2,0,2,0.0,2,5,7,5,9,55.6,1,6,16.7,0,0,3,1,1,2,1,4,5,9,0,4,4,14:29,58.4,6,6,1,2,7,28.6
1,New Orleans Pelicans,Brandon Ingram,35:06,8,19,42.1,2,5,40.0,4,4,100.0,0,5,5,5,2,1,2,4,22,-19,102.6,122.5,-19.9,22.7,2.5,18.5,0.0,12.5,6.8,7.4,47.4,53.0,27.2,107.35,11.2,26.7,30.2,18.2,21.7,50.0,36.4,0.0,20.8,17.9,23.8,14.3,100.0,40.0,0.0,19.0,37.5,27.8,2.78,4.33,69,46,2,4,8,50.0,1,9,9,5,9,55.6,3,10,30.0,2,5,3,0,0,0,0,8,4,12,0,0,0,12:37,50.1,16,3,0,0,4,0.0
2,New Orleans Pelicans,Derrick Favors,20:45,3,6,50.0,0,0,0.0,0,0,0.0,1,6,7,2,1,0,1,5,6,-12,108.2,132.7,-24.5,11.8,2.0,22.2,4.8,26.1,15.9,11.1,50.0,50.0,13.7,113.31,5.6,15.0,14.6,0.0,0.0,0.0,0.0,50.0,42.9,43.8,15.4,12.5,0.0,33.3,0.0,38.5,0.0,11.3,1.75,4.6,43,35,0,7,8,87.5,4,8,10,3,3,100.0,0,3,0.0,4,9,3,0,0,0,0,7,2,9,1,3,4,7:34,31.1,26,0,0,3,4,75.0
3,New Orleans Pelicans,JJ Redick,27:02,6,9,66.7,4,6,66.7,0,0,0.0,0,2,2,1,3,0,0,3,16,-14,100.0,122.2,-22.2,5.6,0.33,7.7,0.0,6.7,3.4,23.1,88.9,88.9,17.9,111.86,8.0,25.0,17.0,44.4,30.0,0.0,0.0,0.0,10.0,8.3,6.3,25.0,0.0,0.0,0.0,20.0,18.2,25.4,2.29,4.64,30,17,0,1,3,33.3,0,4,4,1,2,50.0,5,7,71.4,0,0,2,0,0,0,1,1,3,4,0,0,0,9:21,38.7,7,5,1,0,3,0.0
4,New Orleans Pelicans,Lonzo Ball,24:50,2,7,28.6,2,3,66.7,2,2,100.0,0,5,5,5,1,0,0,2,8,-7,112.3,122.4,-10.1,22.7,5.0,35.7,0.0,17.2,9.6,7.1,42.9,50.8,15.3,111.14,8.0,8.3,14.9,20.0,18.8,33.3,33.3,0.0,27.8,25.0,29.4,10.0,0.0,0.0,100.0,14.3,10.0,12.5,2.0,4.44,52,42,1,3,4,75.0,0,9,9,0,2,0.0,2,5,40.0,0,0,1,1,1,2,1,2,2,4,0,2,2,8:46,36.3,13,6,5,3,4,75.0
5,New Orleans Pelicans,Josh Hart,28:10,4,9,44.4,3,5,60.0,4,4,100.0,4,6,10,1,1,0,1,4,15,-1,105.5,101.7,3.7,6.7,1.0,7.7,11.1,16.7,13.9,7.7,61.1,69.7,17.4,96.28,21.3,21.1,16.4,33.3,17.2,36.4,28.6,28.6,31.6,30.3,7.7,11.1,0.0,20.0,50.0,20.0,28.6,25.9,2.22,4.31,42,27,0,2,4,50.0,5,8,13,2,5,40.0,2,4,50.0,0,0,1,1,0,1,1,6,1,7,0,0,0,9:37,36.1,11,3,1,1,1,100.0
6,New Orleans Pelicans,Jahlil Okafor,12:29,3,3,100.0,0,0,0.0,2,3,66.7,2,0,2,0,1,0,1,3,8,-7,91.7,111.5,-19.9,0.0,0.0,0.0,9.5,0.0,5.7,20.0,100.0,92.6,15.2,96.13,16.7,37.5,10.3,0.0,0.0,50.0,42.9,22.2,0.0,12.5,0.0,50.0,0.0,50.0,0.0,30.0,57.1,36.4,0.98,4.26,28,21,2,3,5,60.0,5,1,6,3,3,100.0,0,0,0.0,1,2,0,0,0,0,0,7,1,8,0,2,2,4:02,16.0,9,1,1,0,1,0.0
7,New Orleans Pelicans,E'Twaun Moore,12:06,2,7,28.6,1,3,33.3,0,0,0.0,1,2,3,2,0,0,0,0,5,-1,91.3,88.0,3.3,40.0,0.0,22.2,5.3,11.1,8.1,0.0,35.7,35.7,21.2,95.21,23.1,28.6,26.9,33.3,23.1,0.0,0.0,10.0,25.0,16.7,33.3,0.0,0.0,0.0,0.0,0.0,0.0,23.8,0.0,0.0,0,0,0,0,0,0.0,0,0,0,0,2,0.0,0,-2,0.0,0,0,0,1,1,2,0,2,1,3,0,0,0,3:55,14.0,2,0,0,0,1,0.0
8,New Orleans Pelicans,Nickeil Alexander-Walker,11:55,1,10,10.0,1,7,14.3,0,0,0.0,1,3,4,2,1,2,0,2,3,6,108.7,76.0,32.7,28.6,2.0,15.4,5.3,15.8,10.5,7.7,15.0,15.0,33.3,96.67,-7.7,12.5,37.0,20.0,46.7,0.0,0.0,10.0,33.3,21.1,33.3,25.0,100.0,0.0,100.0,25.0,33.3,12.0,1.01,4.7,40,28,0,1,1,100.0,2,6,8,0,2,0.0,1,8,12.5,0,0,1,0,0,0,1,1,1,2,0,1,1,3:54,14.0,6,0,1,0,0,0.0
9,New Orleans Pelicans,Kenrich Williams,18:03,0,4,0.0,0,2,0.0,3,3,100.0,3,3,6,3,1,1,2,5,3,11,119.4,82.1,37.4,23.1,3.0,33.3,11.5,11.5,11.5,11.1,0.0,28.2,12.5,99.73,6.2,0.0,10.3,0.0,9.1,33.3,33.3,25.0,23.1,24.0,33.3,20.0,33.3,50.0,0.0,38.5,12.5,7.0,1.43,4.41,36,30,0,0,4,0.0,10,5,15,0,2,0.0,0,2,0.0,0,0,2,0,0,0,0,6,3,9,0,3,3,5:55,23.2,9,0,3,1,3,33.3


## Putting All the Scraping Altogether

In [328]:
date_url = 'https://stats.nba.com/scores/12/07/2019'
browser.get(date_url)

In [329]:
# these are replacement strings for unexpected spaces and newlines by our script
text_replacements = {
    # Hustle stats replacement strings
    'SCREEN\nASSISTS': 'SCREEN_ASSISTS',
    'SCREEN ASSISTS' : 'SCREEN_ASSISTS',
    'SCREEN_ASSISTS PTS': 'SCREEN_ASSISTS_PTS',
    'OFF LOOSE BALLS\nRECOVERED': 'OFF_LOOSE_BALLS_RECOVERED',
    'DEF LOOSE BALLS\nRECOVERED': 'DEF_LOOSE_BALLS_RECOVERED',
    'LOOSE BALLS\nRECOVERED': 'LOOSE_BALLS_RECOVERED',
    'CHARGES\nDRAWN': 'CHARGES_DRAWN',
    'CONTESTED\n2PT SHOTS': 'CONTESTED_2PT_SHOTS',
    'CONTESTED\n3PT SHOTS': 'CONTESTED_3PT_SHOTS',
    'CONTESTED\nSHOTS': 'CONTESTED_SHOTS',
    'OFF BOX OUTS' : 'OFF_BOX_OUTS',
    'DEF BOX OUTS' : 'DEF_BOX_OUTS',
    'BOX\nOUTS': 'BOX_OUTS',
    # Advanced stats replacement strings
    ' RATIO': '_RATIO',
    # Defense stats replacement strings
    'DEF\nMIN': 'DEF_MIN',
    'PARTIAL\nPOSS': 'PARTIAL_POSS'
}

In [330]:
# all the options we want to scrape for
options = ['Traditional', 'Advanced', 'Usage', 'Player Tracking', 'Hustle', 'Defense']

In [331]:
# get all boxscore links
box_score_links = browser.find_elements_by_partial_link_text('Box Score')

for i in range(len(box_score_links)):
    
    browser.find_elements_by_partial_link_text('Box Score')[i].click()
    time.sleep(2)
    
    # browser is within boxscore page now
    
    # keep track of all stats with a list of dfs
    team1_stats_dfs = []
    team2_stats_dfs = []
    team1 = browser.find_elements_by_class_name('nba-stat-table__caption')[0].text
    team2 = browser.find_elements_by_class_name('nba-stat-table__caption')[1].text
    
    # scrape multiple options designated above
    for i, option in enumerate(options):
        # only select new options item if not first-time loading page
        if i != 0:
            browser.find_element_by_partial_link_text(option).click()
            time.sleep(2)

        
        # scrape 2 boxscore tables
        table1 = browser.find_element_by_xpath('/html/body/main/div[2]/div/div/div[4]/div/div[2]/div/nba-stat-table[1]')
        table2 = browser.find_element_by_xpath('/html/body/main/div[2]/div/div/div[4]/div/div[2]/div/nba-stat-table[2]')
        
        
        
        # scrape tables for both teams
        for table_idx, table in enumerate([table1, table2]):
            column_names = []
            player_stats = []
            temp_player_stat = []
            
            # replace all the unexpected spaces and newlines strings by our script
            table_text = table.text
            for orig_str, replace_str in text_replacements.items():
                table_text = table_text.replace(orig_str, replace_str)

            # read table text into python list of lists
            for line_id, line in enumerate(table_text.split('\n')):
                if line_id == 0:
                    column_names = line.split(' ')[1:]
                    column_names.insert(0,'PLAYER')
                else:
                    # stop reading once we see totals or DNP/DND line
                    if ('Totals' in line) or ('DNP' in line) or ('DND' in line):
                        break

                    if line_id % 2 == 1:
                        line_cleaned = line[:-2] + line[-2:].replace(' F', '').replace(' C', '').replace(' G', '')
                        temp_player_stat.append(line_cleaned)
                    if line_id % 2 == 0:
                        temp_player_stat.extend(line.split(' '))
                        # only append if formatting is correct
                        if (len(temp_player_stat) == len(column_names)):
                            player_stats.append(temp_player_stat)
                        temp_player_stat = []

            # convert list of lists into pandas df
            df = pd.DataFrame(player_stats, columns = column_names)
            if table_idx == 0:
                team1_stats_dfs.append(df)
            else:
                team2_stats_dfs.append(df)
        

        # open the options menu
        browser.find_element_by_partial_link_text(option).click()
        time.sleep(2)
        
    # combine all the stats and write to a pickle file
    team1_df = pd.merge(team1_stats_dfs[0], team1_stats_dfs[1].drop(columns = ['MIN']),
                        how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
    team1_df = pd.merge(team1_df, team1_stats_dfs[2].drop(columns = ['MIN', 'USG%']),
                        how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
    team1_df = pd.merge(team1_df, team1_stats_dfs[3].drop(columns = ['MIN', 'AST', 'FG%']),
                        how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
    team1_df = pd.merge(team1_df, team1_stats_dfs[4].drop(columns = ['MIN']),
                        how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')

    team1_stats_dfs[5] = team1_stats_dfs[5].rename(columns = {
        'PTS' : 'ALLOWED_PTS',
        'AST' : 'ALLOWED_AST',
        'TOV' : 'FORCED_TOV'
    })
    team1_df = pd.merge(team1_df, team1_stats_dfs[5].drop(columns = ['DREB', 'STL', 'BLK', 'DFGM', 'DFGA', 'DFG%']),
                        how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
    # team 2
    team2_df = pd.merge(team2_stats_dfs[0], team2_stats_dfs[1].drop(columns = ['MIN']),
                        how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
    team2_df = pd.merge(team2_df, team2_stats_dfs[2].drop(columns = ['MIN', 'USG%']),
                        how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
    team2_df = pd.merge(team2_df, team2_stats_dfs[3].drop(columns = ['MIN', 'AST', 'FG%']),
                        how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
    team2_df = pd.merge(team2_df, team2_stats_dfs[4].drop(columns = ['MIN']),
                        how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')

    team2_stats_dfs[5] = team2_stats_dfs[5].rename(columns = {
        'PTS' : 'ALLOWED_PTS',
        'AST' : 'ALLOWED_AST',
        'TOV' : 'FORCED_TOV'
    })
    team2_df = pd.merge(team2_df, team2_stats_dfs[5].drop(columns = ['DREB', 'STL', 'BLK', 'DFGM', 'DFGA', 'DFG%']),
                        how = 'outer', left_on = 'PLAYER', right_on = 'PLAYER')
    team1_df.insert(0, 'TEAM', team1)
    team2_df.insert(0, 'TEAM', team2)
    
    teams_df = pd.concat([team1_df, team2_df], ignore_index=True)
    teams_df.to_pickle('../Pickles/Test/{}_{}_{}'.format('20200320', team1, team2))
    
    # finish scraping the stats from all the options, go back to the previous page listing all games in one day
    browser.get(date_url)
    time.sleep(3)