<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Scrape-Player-Data-for-Transition-Plays" data-toc-modified-id="Scrape-Player-Data-for-Transition-Plays-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Scrape Player Data for Transition Plays</a></span></li><li><span><a href="#Scrape-Player-Data-for-Isolation-Plays" data-toc-modified-id="Scrape-Player-Data-for-Isolation-Plays-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Scrape Player Data for Isolation Plays</a></span></li><li><span><a href="#Scrape-Player-Data-for-Pick-and-Roll-Plays" data-toc-modified-id="Scrape-Player-Data-for-Pick-and-Roll-Plays-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Scrape Player Data for Pick-and-Roll Plays</a></span></li><li><span><a href="#Scrape-Player-Data-for-Post-Up-Plays" data-toc-modified-id="Scrape-Player-Data-for-Post-Up-Plays-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Scrape Player Data for Post-Up Plays</a></span></li><li><span><a href="#Scrape-Player-Data-for-Spot-Up-Plays" data-toc-modified-id="Scrape-Player-Data-for-Spot-Up-Plays-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Scrape Player Data for Spot-Up Plays</a></span></li><li><span><a href="#Scrape-Player-Data-for-Hand-Off-Plays" data-toc-modified-id="Scrape-Player-Data-for-Hand-Off-Plays-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Scrape Player Data for Hand-Off Plays</a></span></li><li><span><a href="#Scrape-Player-Data-for-Cutting-Plays" data-toc-modified-id="Scrape-Player-Data-for-Cutting-Plays-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Scrape Player Data for Cutting Plays</a></span></li><li><span><a href="#Scrape-Player-Data-for-Off-Screen-Plays" data-toc-modified-id="Scrape-Player-Data-for-Off-Screen-Plays-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>Scrape Player Data for Off-Screen Plays</a></span></li><li><span><a href="#Scrape-Player-Data-for-Put-Back-Plays" data-toc-modified-id="Scrape-Player-Data-for-Put-Back-Plays-9"><span class="toc-item-num">9&nbsp;&nbsp;</span>Scrape Player Data for Put-Back Plays</a></span></li><li><span><a href="#Scrape-Player-Data-for-Miscellaneous-Play-Types" data-toc-modified-id="Scrape-Player-Data-for-Miscellaneous-Play-Types-10"><span class="toc-item-num">10&nbsp;&nbsp;</span>Scrape Player Data for Miscellaneous Play-Types</a></span></li><li><span><a href="#Merge-and-Store-Dataframes" data-toc-modified-id="Merge-and-Store-Dataframes-11"><span class="toc-item-num">11&nbsp;&nbsp;</span>Merge and Store Dataframes</a></span></li></ul></div>

In [1]:
import numpy as np
import urllib.request
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.chrome.options import Options as ChromeOptions
import lxml.html
from lxml import etree
import re
import time
import pandas as pd
from functools import reduce

In [2]:
# Modify dataframe to have appropriate data types
def ConvertDataFrame(df):
    new_df = df.loc[:, df.columns != 'name'].astype(float)
    df[new_df.columns] = new_df
    df['name'] = df['name'].astype('str')
    df['year'] = df['year'].astype('int')
    df = df.groupby(['name', 'year']).mean().reset_index()
    return df


In [4]:
# Scrape play-type data tables from NBA.com webpages, grabbing only
# the specified columns (by index) and for the specified seasons
def FetchPlayTypeTables(urls, years, button):
    # Create a headless Firefox browser instance
    opt = FirefoxOptions()
    opt.add_argument("--headless")
    driver = webdriver.Firefox(options=opt)
    
    arr = []
    for i,url in enumerate(urls):
        year = years[i]
        print("Fetching play type data from Year", year, "...")
        
        driver.get(url)
        wait = WebDriverWait(driver, 30)
        wait.until(EC.presence_of_element_located((By.XPATH, "//select[@name='TypeGrouping']")))
        sel = Select(driver.find_element_by_name('TypeGrouping'))
        sel.select_by_visible_text(button)
        wait.until(EC.presence_of_element_located((By.XPATH, "//select[contains(@class, 'stats-table-pagination__select')]")))
        sel = Select(driver.find_element_by_class_name('stats-table-pagination__select'))
        sel.select_by_visible_text("All")
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
        
        root = lxml.html.fromstring(driver.page_source)
        results = root.xpath("//*[@class='nba-stat-table__overflow']//table/tbody/tr")
    
        # Process the table text and break into columns, 
        # stripping extraneous newline characters and inserting the season year
        counter = 0
        for result in results:
            item = result.xpath("./td//text()")
            item = [re.sub('\n +', '', x) for x in item]
            data = [x for x in item if x != '' and x != '\n']            
            if len(data) != 17:
                continue
            del data[-15]
            del data[-15]
            #% on indices 2, 7-14
            data = [s.strip('%') for s in data]
            data.insert(1, int(year))
            arr.append(data)
            counter += 1
            #print(data)
            
        print("Fetched stats for", counter, "NBA players.")
        
    driver.quit()
    #print(arr)
    return np.array(arr)

In [5]:
# Scrape play-type data tables from NBA.com webpages, grabbing only
# the specified columns (by index) and for the specified seasons.
# ADDITIONALLY: completely reset the web driver for each URL,
# which is needed for specific webpages.
def FetchPlayTypeTables_ResetPage(urls, years, button):
    # Create a headless Firefox browser instance
    opt = FirefoxOptions()
    opt.add_argument("--headless")
    
    arr = []
    for i,url in enumerate(urls):
        driver = webdriver.Firefox(options=opt)
        year = years[i]
        print("Fetching play type data from Year", year, "...")
        
        driver.get(url)
        wait = WebDriverWait(driver, 30)
        wait.until(EC.presence_of_element_located((By.XPATH, "//select[@name='TypeGrouping']")))
        sel = Select(driver.find_element_by_name('TypeGrouping'))
        sel.select_by_visible_text(button)
        wait.until(EC.presence_of_element_located((By.XPATH, "//select[contains(@class, 'stats-table-pagination__select')]")))
        sel = Select(driver.find_element_by_class_name('stats-table-pagination__select'))
        sel.select_by_visible_text("All")
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
        
        root = lxml.html.fromstring(driver.page_source)
        results = root.xpath("//*[@class='nba-stat-table__overflow']//table/tbody/tr")
    
        # Process the table text and break into columns, 
        # stripping extraneous newline characters and inserting the season year
        counter = 0
        for result in results:
            item = result.xpath("./td//text()")
            item = [re.sub('\n +', '', x) for x in item]
            data = [x for x in item if x != '' and x != '\n']            
            if len(data) != 17:
                continue
            del data[-15]
            del data[-15]
            #% on indices 2, 7-14
            data = [s.strip('%') for s in data]
            data.insert(1, int(year))
            arr.append(data)
            counter += 1
            #print(data)
            
        print("Fetched stats for", counter, "NBA players.")
        driver.quit()

    #print(arr)
    return np.array(arr)

# Scrape Player Data for Transition Plays

In [6]:
#off_play_types = ['TR', 'CUT', 'PB', 'MISC', 'ISO', 'PRBH', 'PRRM', 'PU', 'SU', 'HO', 'OS']
#def_play_types = ['ISO', 'PRBH', 'PRRM', 'PU', 'SU', 'HO', 'OS']
cols = ['name', 'year', '_POSS_', '_FREQ_', '_PPP_', '_PTS_', '_FGM_', '_FGA_', '_FGP_', '_EFGP_', '_FT_FREQ_', '_TO_FREQ_', '_SF_FREQ_', '_AND1_FREQ_', '_SCORE_FREQ_', '_PERC_']
#for play_type in play_types:
#    off_cols = ['OFF'+s+play_type for i,s in enumerate(cols) if i > 1]
#    def_cols = ['DEF'+s+play_type for i,s in enumerate(cols) if i > 1]
#    print(off_cols, '\n', def_cols)

In [7]:
# Establish the years for which we want to fetch play-type data from NBA.com
ya = [str(n).zfill(2) for n in range(15, 20)]
yb = [str(n).zfill(2) for n in range(16, 21)]
years = [int("20"+y) for y in yb]

In [8]:
# Scrape transition play-type data, fetch the data in 2D array format,
# and convert to a dataframe format with the appropriate data types
play_type = "TR"
off_cols = ['name', 'year'] + ['OFF'+s+play_type for i,s in enumerate(cols) if i > 1]
#urls = [ "https://stats.nba.com/players/transition/?SeasonYear=2018-19&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1" ]
urls = [ "https://stats.nba.com/players/transition/?SeasonYear=20{0}-{1}&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1".format(ya[i], yb[i]) for i in range(len(ya)) ]
#print(urls)
np_arr_trans = FetchPlayTypeTables(urls, years, "Offensive")
df_trans = pd.DataFrame(np_arr_trans, columns=off_cols)
df_trans = ConvertDataFrame(df_trans)
#print(df_trans.dtypes)
print(df_trans)


Fetching play type data from Year 2016 ...
Fetched stats for 396 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 399 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 423 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 458 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 347 NBA players.
               name  year  OFF_POSS_TR  OFF_FREQ_TR  OFF_PPP_TR  OFF_PTS_TR  \
0      Aaron Brooks  2016          0.9         10.4        0.95         0.8   
1      Aaron Brooks  2017          0.6         10.2        0.72         0.4   
2      Aaron Gordon  2016          1.6         17.4        1.08         1.8   
3      Aaron Gordon  2017          2.6         19.9        1.03         2.7   
4      Aaron Gordon  2018          3.7         20.1        0.96         3.6   
...             ...   ...          ...          ...         ...         ...   
1892  Zach Randolph  2018          0.7          4.5      

# Scrape Player Data for Isolation Plays

In [9]:
# Scrape isolation play-type data, fetch the data in 2D array format,
# and convert to a dataframe format with the appropriate data types
play_type = "ISO"
off_cols = ['name', 'year'] + ['OFF'+s+play_type for i,s in enumerate(cols) if i > 1]
def_cols = ['name', 'year'] + ['DEF'+s+play_type for i,s in enumerate(cols) if i > 1]
urls = [ "https://stats.nba.com/players/isolation/?SeasonYear=20{0}-{1}&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1".format(ya[i], yb[i]) for i in range(len(ya)) ]

np_arr_iso = FetchPlayTypeTables(urls, years, "Offensive")
df_iso = pd.DataFrame(np_arr_iso, columns=off_cols)
df_iso = ConvertDataFrame(df_iso)

np_arr_iso = FetchPlayTypeTables(urls, years, "Defensive")
df_iso2 = pd.DataFrame(np_arr_iso, columns=def_cols)
df_iso2 = ConvertDataFrame(df_iso2)


Fetching play type data from Year 2016 ...
Fetched stats for 287 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 288 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 277 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 287 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 165 NBA players.
Fetching play type data from Year 2016 ...
Fetched stats for 380 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 387 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 383 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 411 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 304 NBA players.


# Scrape Player Data for Pick-and-Roll Plays

In [10]:
# Scrape pick-and-roll ball handler play-type data, fetch the data in 2D array format,
# and convert to a dataframe format with the appropriate data types
play_type = "PRBH"
off_cols = ['name', 'year'] + ['OFF'+s+play_type for i,s in enumerate(cols) if i > 1]
def_cols = ['name', 'year'] + ['DEF'+s+play_type for i,s in enumerate(cols) if i > 1]
urls = [ "https://stats.nba.com/players/ball-handler/#!?SeasonYear=20{0}-{1}&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1".format(ya[i], yb[i]) for i in range(len(ya)) ]
#print(urls)

np_arr_prbh = FetchPlayTypeTables_ResetPage(urls, years, "Offensive")
df_prbh = pd.DataFrame(np_arr_prbh, columns=off_cols)
df_prbh = ConvertDataFrame(df_prbh)

np_arr_prbh = FetchPlayTypeTables_ResetPage(urls, years, "Defensive")
df_prbh2 = pd.DataFrame(np_arr_prbh, columns=def_cols)
df_prbh2 = ConvertDataFrame(df_prbh2)


Fetching play type data from Year 2016 ...
https://stats.nba.com/players/ball-handler/#!?SeasonYear=2015-16&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1
Fetched stats for 272 NBA players.
Fetching play type data from Year 2017 ...
https://stats.nba.com/players/ball-handler/#!?SeasonYear=2016-17&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1
Fetched stats for 265 NBA players.
Fetching play type data from Year 2018 ...
https://stats.nba.com/players/ball-handler/#!?SeasonYear=2017-18&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1
Fetched stats for 302 NBA players.
Fetching play type data from Year 2019 ...
https://stats.nba.com/players/ball-handler/#!?SeasonYear=2018-19&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1
Fetched stats for 308 NBA players.
Fetching play type data from Year 2020 ...
https://stats.nba.com/players/ball-handler/#!?SeasonYear=2019-20&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1
Fetched stats for 241 NBA players.
Fetching play type data f

In [11]:
# Scrape pick-and-roll roll man play-type data, fetch the data in 2D array format,
# and convert to a dataframe format with the appropriate data types
play_type = "PRRM"
off_cols = ['name', 'year'] + ['OFF'+s+play_type for i,s in enumerate(cols) if i > 1]
def_cols = ['name', 'year'] + ['DEF'+s+play_type for i,s in enumerate(cols) if i > 1]
urls = [ "https://stats.nba.com/players/roll-man/?SeasonYear=20{0}-{1}&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1".format(ya[i], yb[i]) for i in range(len(ya)) ]

np_arr_prrm = FetchPlayTypeTables(urls, years, "Offensive")
df_prrm = pd.DataFrame(np_arr_prrm, columns=off_cols)
df_prrm = ConvertDataFrame(df_prrm)

np_arr_prrm = FetchPlayTypeTables(urls, years, "Defensive")
df_prrm2 = pd.DataFrame(np_arr_prrm, columns=def_cols)
df_prrm2 = ConvertDataFrame(df_prrm2)


Fetching play type data from Year 2016 ...
Fetched stats for 202 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 196 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 190 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 201 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 157 NBA players.
Fetching play type data from Year 2016 ...
Fetched stats for 272 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 282 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 293 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 268 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 161 NBA players.


# Scrape Player Data for Post-Up Plays

In [12]:
# Scrape post up play-type data, fetch the data in 2D array format,
# and convert to a dataframe format with the appropriate data types
play_type = "PU"
off_cols = ['name', 'year'] + ['OFF'+s+play_type for i,s in enumerate(cols) if i > 1]
def_cols = ['name', 'year'] + ['DEF'+s+play_type for i,s in enumerate(cols) if i > 1]
urls = [ "https://stats.nba.com/players/playtype-post-up/?SeasonYear=20{0}-{1}&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1".format(ya[i], yb[i]) for i in range(len(ya)) ]

np_arr_pu = FetchPlayTypeTables(urls, years, "Offensive")
df_pu = pd.DataFrame(np_arr_pu, columns=off_cols)
df_pu = ConvertDataFrame(df_pu)

np_arr_pu = FetchPlayTypeTables(urls, years, "Defensive")
df_pu2 = pd.DataFrame(np_arr_pu, columns=def_cols)
df_pu2 = ConvertDataFrame(df_pu2)


Fetching play type data from Year 2016 ...
Fetched stats for 216 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 184 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 192 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 195 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 113 NBA players.
Fetching play type data from Year 2016 ...
Fetched stats for 374 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 358 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 371 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 387 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 250 NBA players.


# Scrape Player Data for Spot-Up Plays

In [13]:
# Scrape spot up play-type data, fetch the data in 2D array format,
# and convert to a dataframe format with the appropriate data types
play_type = "SU"
off_cols = ['name', 'year'] + ['OFF'+s+play_type for i,s in enumerate(cols) if i > 1]
def_cols = ['name', 'year'] + ['DEF'+s+play_type for i,s in enumerate(cols) if i > 1]
urls = [ "https://stats.nba.com/players/spot-up/?SeasonYear=20{0}-{1}&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1".format(ya[i], yb[i]) for i in range(len(ya)) ]

np_arr_su = FetchPlayTypeTables(urls, years, "Offensive")
df_su = pd.DataFrame(np_arr_su, columns=off_cols)
df_su = ConvertDataFrame(df_su)

np_arr_su = FetchPlayTypeTables(urls, years, "Defensive")
df_su2 = pd.DataFrame(np_arr_su, columns=def_cols)
df_su2 = ConvertDataFrame(df_su2)


Fetching play type data from Year 2016 ...
Fetched stats for 392 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 396 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 428 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 458 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 359 NBA players.
Fetching play type data from Year 2016 ...
Fetched stats for 417 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 413 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 438 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 477 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 382 NBA players.


# Scrape Player Data for Hand-Off Plays

In [14]:
# Scrape hand off play-type data, fetch the data in 2D array format,
# and convert to a dataframe format with the appropriate data types
play_type = "HO"
off_cols = ['name', 'year'] + ['OFF'+s+play_type for i,s in enumerate(cols) if i > 1]
def_cols = ['name', 'year'] + ['DEF'+s+play_type for i,s in enumerate(cols) if i > 1]
urls = [ "https://stats.nba.com/players/hand-off/?SeasonYear=20{0}-{1}&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1".format(ya[i], yb[i]) for i in range(len(ya)) ]

np_arr_ho = FetchPlayTypeTables(urls, years, "Offensive")
df_ho = pd.DataFrame(np_arr_ho, columns=off_cols)
df_ho = ConvertDataFrame(df_ho)

np_arr_ho = FetchPlayTypeTables(urls, years, "Defensive")
df_ho2 = pd.DataFrame(np_arr_ho, columns=def_cols)
df_ho2 = ConvertDataFrame(df_ho2)


Fetching play type data from Year 2016 ...
Fetched stats for 233 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 235 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 256 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 280 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 198 NBA players.
Fetching play type data from Year 2016 ...
Fetched stats for 269 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 279 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 302 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 340 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 239 NBA players.


# Scrape Player Data for Cutting Plays

In [15]:
# Scrape cutting play-type data, fetch the data in 2D array format,
# and convert to a dataframe format with the appropriate data types
play_type = "CUT"
off_cols = ['name', 'year'] + ['OFF'+s+play_type for i,s in enumerate(cols) if i > 1]
def_cols = ['name', 'year'] + ['DEF'+s+play_type for i,s in enumerate(cols) if i > 1]
urls = [ "https://stats.nba.com/players/cut/?SeasonYear=20{0}-{1}&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1".format(ya[i], yb[i]) for i in range(len(ya)) ]

np_arr_cut = FetchPlayTypeTables(urls, years, "Offensive")
df_cut = pd.DataFrame(np_arr_cut, columns=off_cols)
df_cut = ConvertDataFrame(df_cut)


Fetching play type data from Year 2016 ...
Fetched stats for 334 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 334 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 340 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 360 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 245 NBA players.


# Scrape Player Data for Off-Screen Plays

In [18]:
# Scrape off screen play-type data, fetch the data in 2D array format,
# and convert to a dataframe format with the appropriate data types
play_type = "OS"
off_cols = ['name', 'year'] + ['OFF'+s+play_type for i,s in enumerate(cols) if i > 1]
def_cols = ['name', 'year'] + ['DEF'+s+play_type for i,s in enumerate(cols) if i > 1]
urls = [ "https://stats.nba.com/players/off-screen/?SeasonYear=20{0}-{1}&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1".format(ya[i], yb[i]) for i in range(len(ya)) ]

np_arr_os = FetchPlayTypeTables(urls, years, "Offensive")
df_os = pd.DataFrame(np_arr_os, columns=off_cols)
df_os = ConvertDataFrame(df_os)

np_arr_os = FetchPlayTypeTables(urls, years, "Defensive")
df_os2 = pd.DataFrame(np_arr_os, columns=def_cols)
df_os2 = ConvertDataFrame(df_os2)


Fetching play type data from Year 2016 ...
Fetched stats for 238 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 246 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 247 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 229 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 172 NBA players.
Fetching play type data from Year 2016 ...
Fetched stats for 331 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 336 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 339 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 351 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 239 NBA players.


# Scrape Player Data for Put-Back Plays

In [20]:
# Scrape put-back play-type data, fetch the data in 2D array format,
# and convert to a dataframe format with the appropriate data types
play_type = "PB"
off_cols = ['name', 'year'] + ['OFF'+s+play_type for i,s in enumerate(cols) if i > 1]
def_cols = ['name', 'year'] + ['DEF'+s+play_type for i,s in enumerate(cols) if i > 1]
urls = [ "https://stats.nba.com/players/putbacks/?SeasonYear=20{0}-{1}&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1".format(ya[i], yb[i]) for i in range(len(ya)) ]

np_arr_pb = FetchPlayTypeTables(urls, years, "Offensive")
df_pb = pd.DataFrame(np_arr_pb, columns=off_cols)
df_pb = ConvertDataFrame(df_pb)


Fetching play type data from Year 2016 ...
Fetched stats for 287 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 280 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 282 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 309 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 202 NBA players.


# Scrape Player Data for Miscellaneous Play-Types

In [21]:
# Scrape miscellaneous play-type data, fetch the data in 2D array format,
# and convert to a dataframe format with the appropriate data types
play_type = "MISC"
off_cols = ['name', 'year'] + ['OFF'+s+play_type for i,s in enumerate(cols) if i > 1]
def_cols = ['name', 'year'] + ['DEF'+s+play_type for i,s in enumerate(cols) if i > 1]
urls = [ "https://stats.nba.com/players/playtype-misc/?SeasonYear=20{0}-{1}&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1".format(ya[i], yb[i]) for i in range(len(ya)) ]

np_arr_misc = FetchPlayTypeTables(urls, years, "Offensive")
df_misc = pd.DataFrame(np_arr_misc, columns=off_cols)
df_misc = ConvertDataFrame(df_misc)


Fetching play type data from Year 2016 ...
Fetched stats for 366 NBA players.
Fetching play type data from Year 2017 ...
Fetched stats for 362 NBA players.
Fetching play type data from Year 2018 ...
Fetched stats for 361 NBA players.
Fetching play type data from Year 2019 ...
Fetched stats for 377 NBA players.
Fetching play type data from Year 2020 ...
Fetched stats for 284 NBA players.


# Merge and Store Dataframes

In [22]:
# Create a complete dataframe, performing an outer merge so as
# not to exclude players who play with a couple of play types exclusively
df = pd.merge(df_trans, df_iso, on=['name', 'year'], how='outer')
df = pd.merge(df, df_iso2, on=['name', 'year'], how='outer')
df = pd.merge(df, df_prbh, on=['name', 'year'], how='outer')
df = pd.merge(df, df_prbh2, on=['name', 'year'], how='outer')
df = pd.merge(df, df_prrm, on=['name', 'year'], how='outer')
df = pd.merge(df, df_prrm2, on=['name', 'year'], how='outer')
df = pd.merge(df, df_pu, on=['name', 'year'], how='outer')
df = pd.merge(df, df_pu2, on=['name', 'year'], how='outer')
df = pd.merge(df, df_su, on=['name', 'year'], how='outer')
df = pd.merge(df, df_su2, on=['name', 'year'], how='outer')
df = pd.merge(df, df_ho, on=['name', 'year'], how='outer')
df = pd.merge(df, df_ho2, on=['name', 'year'], how='outer')
df = pd.merge(df, df_cut, on=['name', 'year'], how='outer')
df = pd.merge(df, df_os, on=['name', 'year'], how='outer')
df = pd.merge(df, df_os2, on=['name', 'year'], how='outer')
df = pd.merge(df, df_pb, on=['name', 'year'], how='outer')
df = pd.merge(df, df_misc, on=['name', 'year'], how='outer')

# Scale percentage quantities to be in range 0-1 (for convenience)
perc_cols = [col for col in df.columns if 'FGP' in col or '3PP' in col or 'FTP' in col or 'PERC' in col or 'PCT' in col or 'FREQ' in col]
df[perc_cols] = df[perc_cols].astype(float)/100.


In [23]:
# Fill all NaN values with 0, which is reasonable for play-type missing values
df = df.fillna(0)
print(df)

                  name  year  OFF_POSS_TR  OFF_FREQ_TR  OFF_PPP_TR  \
0         Aaron Brooks  2016          0.9        0.104        0.95   
1         Aaron Brooks  2017          0.6        0.102        0.72   
2         Aaron Gordon  2016          1.6        0.174        1.08   
3         Aaron Gordon  2017          2.6        0.199        1.03   
4         Aaron Gordon  2018          3.7        0.201        0.96   
...                ...   ...          ...          ...         ...   
2028    Devin Robinson  2019          0.0        0.000        0.00   
2029  Jared Cunningham  2016          0.0        0.000        0.00   
2030     Jarrod Uthoff  2017          0.0        0.000        0.00   
2031    Alex Stepheson  2016          0.0        0.000        0.00   
2032       Jack Cooley  2018          0.0        0.000        0.00   

      OFF_PTS_TR  OFF_FGM_TR  OFF_FGA_TR  OFF_FGP_TR  OFF_EFGP_TR  ...  \
0            0.8         0.4         0.7       0.392        0.520  ...   
1          

In [24]:
# Write overall play-type dataframe to a .csv file
df.to_csv("NBAPlayTypeStats.csv")