<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Scrape-Basic-Lineup-Stats" data-toc-modified-id="Scrape-Basic-Lineup-Stats-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Scrape Basic Lineup Stats</a></span></li><li><span><a href="#Scrape-Advanced-Lineup-Stats" data-toc-modified-id="Scrape-Advanced-Lineup-Stats-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Scrape Advanced Lineup Stats</a></span></li><li><span><a href="#Scrape-Misc-Lineup-Stats" data-toc-modified-id="Scrape-Misc-Lineup-Stats-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Scrape Misc Lineup Stats</a></span></li><li><span><a href="#Scrape-Four-Factors-Lineup-Stats" data-toc-modified-id="Scrape-Four-Factors-Lineup-Stats-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Scrape Four Factors Lineup Stats</a></span></li><li><span><a href="#Scrape-Scoring-Lineup-Stats" data-toc-modified-id="Scrape-Scoring-Lineup-Stats-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Scrape Scoring Lineup Stats</a></span></li><li><span><a href="#Scrape-Opponent-(Defensive)-Lineup-Stats" data-toc-modified-id="Scrape-Opponent-(Defensive)-Lineup-Stats-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Scrape Opponent (Defensive) Lineup Stats</a></span></li><li><span><a href="#Merge-and-Store-Dataframes" data-toc-modified-id="Merge-and-Store-Dataframes-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Merge and Store Dataframes</a></span></li></ul></div>

In [1]:
import numpy as np
import urllib.request
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.chrome.options import Options as ChromeOptions
import lxml.html
from lxml import etree
import re
import time
import pandas as pd
from functools import reduce
from operator import itemgetter

In [2]:
# Modify dataframe to have the appropriate data types
def ConvertDataFrame(df):
    if 'team' in df.columns:
        cols = df.columns.drop(['lineup_name', 'team', 'code', 'sorted_code'])
    else:
        cols = df.columns.drop(['lineup_name', 'code', 'sorted_code'])
    df[cols] = df[cols].apply(pd.to_numeric, errors='coerce', axis=1)
    df['year'] = df['year'].astype('int')
    df = df.drop_duplicates(subset=['code'], keep=False)
    return df


In [3]:
# Scrape lineup data tables from NBA.com webpages, grabbing only
# the specified columns (by index) and for the specified seasons
def FetchStatsTables(urls, years, col_list, buttons):
    # Create a headless Firefox browser instance
    opt = FirefoxOptions()
    opt.add_argument("--headless")
    driver = webdriver.Firefox(options=opt)
    
    arr = []
    for i,url in enumerate(urls):
        year = years[i]
        driver.get(url)
        time.sleep(5)
        wait = WebDriverWait(driver, 60)
        for button in buttons:
            print("Fetching lineup stats for the", button, "from the", year, "season...")
            
            wait.until(EC.presence_of_element_located((By.XPATH, "//select[@name='TeamID']")))
            time.sleep(1)
            sel = Select(driver.find_element_by_name('TeamID'))
            time.sleep(1)
            sel.select_by_visible_text(button)
            time.sleep(1)
            #driver.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
            wait.until(EC.presence_of_element_located((By.XPATH, "//select[contains(@class, 'stats-table-pagination__select')]")))
            time.sleep(1)
            sel2 = Select(driver.find_element_by_class_name('stats-table-pagination__select'))
            sel2.select_by_visible_text("All")
            time.sleep(5)
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
            
            retries = 1
            while retries <= 3:
                try:
                    wait.until(EC.presence_of_element_located((By.XPATH, "//*[@class='nba-stat-table__overflow']//table/tbody/tr")))
                    break
                except TimeoutException:
                    print('\nRefreshing lineup page due to timeout (retry #', retries,')...')
                    driver.refresh()
                    time.sleep(1)
                    retries += 1
                    
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
            
            root = lxml.html.fromstring(driver.page_source)
            results = root.xpath("//*[@class='nba-stat-table__overflow']//table/tbody/tr")
            
            counter = 0
            for result in results:
                item = result.xpath("./td//text()")
                item = [re.sub('\n +', '', x) for x in item]
                data = [x for x in item if x != '' and x != '\n']
                team = str(data[1])
                if float(data[2]) == 0. or float(data[3]) == 0.:
                    continue
                data = list(itemgetter(*col_list)(data))
                data = [s.strip('%') for s in data]
                data[0] = data[0].replace('.', '')
                #code_arr = [s.replace(' ', '') for s in data[0].split(',')]
                code_arr = [s.replace(' ', '') for s in data[0].split(', ')]
                sorted_code_arr = [s.replace(' ', '') for s in sorted(data[0].split(', '))]
                code = ''.join(code_arr) + team + str(year)
                sorted_code = ''.join(sorted_code_arr) + team + str(year)              
                data.insert(1, str(code))
                data.insert(2, str(sorted_code))
                data.insert(3, int(year))
                arr.append(data)
                counter += 1
                #print(data)

            print("Fetched stats for", counter, "NBA player lineups.")
            time.sleep(1)
        
        print("Fetched lineup stats for all teams in the", year, "season.\n")
        time.sleep(5)
        
    driver.quit()
    #print(arr)
    return np.array(arr)

# Scrape Basic Lineup Stats

In [4]:
# Establish the years for which we want to fetch lineup data
ya = [str(n).zfill(2) for n in range(16, 19)]
yb = [str(n).zfill(2) for n in range(17, 20)]
years = [int("20"+y) for y in yb]
#teams = ['Atlanta Hawks', 'Brooklyn Nets']
teams = ['Atlanta Hawks', 'Brooklyn Nets', 'Boston Celtics', 'Charlotte Hornets', 'Chicago Bulls', 'Cleveland Cavaliers', 'Dallas Mavericks', 'Denver Nuggets', 'Detroit Pistons', 'Golden State Warriors', 'Houston Rockets', 'Indiana Pacers', 'LA Clippers', 'Los Angeles Lakers', 'Memphis Grizzlies', 'Miami Heat', 'Milwaukee Bucks', 'Minnesota Timberwolves', 'New Orleans Pelicans', 'New York Knicks', 'Oklahoma City Thunder', 'Orlando Magic', 'Philadelphia 76ers', 'Phoenix Suns', 'Portland Trail Blazers', 'Sacramento Kings', 'San Antonio Spurs', 'Toronto Raptors', 'Utah Jazz', 'Washington Wizards']


In [5]:
# Create URLs for the available years of NBA.com traditional/basic lineup data (per 36 minutes), 
# fetch the data in 2D array format, and put into a Pandas dataframe
#urls = [ "https://stats.nba.com/lineups/traditional/?Season=2018-19&SeasonType=Regular%20Season&PerMode=Per36" ]
urls = [ "https://stats.nba.com/lineups/traditional/?Season=20{0}-{1}&SeasonType=Regular%20Season&PerMode=Per36".format(ya[i], yb[i]) for i in range(len(ya)) ]
#print(urls)
np_arr_basic = FetchStatsTables(urls, years, [i for i in np.arange(25)], teams)


Fetching lineup stats for the Atlanta Hawks from the 2017 season...
Fetched stats for 479 NBA player lineups.
Fetching lineup stats for the Brooklyn Nets from the 2017 season...
Fetched stats for 564 NBA player lineups.
Fetching lineup stats for the Boston Celtics from the 2017 season...
Fetched stats for 431 NBA player lineups.
Fetching lineup stats for the Charlotte Hornets from the 2017 season...
Fetched stats for 259 NBA player lineups.
Fetching lineup stats for the Chicago Bulls from the 2017 season...
Fetched stats for 436 NBA player lineups.
Fetching lineup stats for the Cleveland Cavaliers from the 2017 season...
Fetched stats for 456 NBA player lineups.
Fetching lineup stats for the Dallas Mavericks from the 2017 season...
Fetched stats for 476 NBA player lineups.
Fetching lineup stats for the Denver Nuggets from the 2017 season...
Fetched stats for 404 NBA player lineups.
Fetching lineup stats for the Detroit Pistons from the 2017 season...
Fetched stats for 224 NBA player li

Fetching lineup stats for the LA Clippers from the 2019 season...
Fetched stats for 373 NBA player lineups.
Fetching lineup stats for the Los Angeles Lakers from the 2019 season...
Fetched stats for 595 NBA player lineups.
Fetching lineup stats for the Memphis Grizzlies from the 2019 season...
Fetched stats for 686 NBA player lineups.
Fetching lineup stats for the Miami Heat from the 2019 season...
Fetched stats for 449 NBA player lineups.
Fetching lineup stats for the Milwaukee Bucks from the 2019 season...
Fetched stats for 569 NBA player lineups.
Fetching lineup stats for the Minnesota Timberwolves from the 2019 season...
Fetched stats for 437 NBA player lineups.
Fetching lineup stats for the New Orleans Pelicans from the 2019 season...
Fetched stats for 589 NBA player lineups.
Fetching lineup stats for the New York Knicks from the 2019 season...
Fetched stats for 611 NBA player lineups.
Fetching lineup stats for the Oklahoma City Thunder from the 2019 season...
Fetched stats for 35

In [6]:
# Convert the dataframe to one with appropriate data types
df_basic = pd.DataFrame(np_arr_basic, columns=['lineup_name', 'code', 'sorted_code', 'year', 'team', 'GPT', 'MPT', 'PTST_PT', 'FGMT_PT', 'FGAT_PT', 'FGPT_PT', '3PMT_PT', '3PAT_PT', '3PPT_PT', 'FTMT_PT', 'FTAT_PT', 'FTPT_PT', 'ORBT_PT', 'DRBT_PT', 'TRBT_PT', 'ASTT_PT', 'TOVT_PT', 'STLT_PT', 'BLKT_PT', 'BLKAT_PT', 'PFT_PT', 'PFDT_PT', 'PMT_PT'])
df_basic = ConvertDataFrame(df_basic)
print(df_basic)


                                             lineup_name  \
0      D Howard, T Sefolosha, P Millsap, K Bazemore, ...   
1      D Howard, P Millsap, K Bazemore, D Schroder, T...   
2      K Korver, D Howard, P Millsap, K Bazemore, D S...   
3      D Howard, T Sefolosha, P Millsap, D Schroder, ...   
4      D Howard, E Ilyasova, D Schroder, T Hardaway J...   
...                                                  ...   
37849  T Ariza, J Green, M Morris, T Satoransky, C Ra...   
37850   J Wall, M Morris, B Beal, T Satoransky, T Bryant   
37851     J Green, J Wall, B Beal, O Porter Jr, T Bryant   
37852  D Howard, M Morris, B Beal, A Rivers, T Satora...   
37853     J Green, J McRae, B Portis, C Randle, T Bryant   

                                                    code  \
0      DHowardTSefoloshaPMillsapKBazemoreDSchroderATL...   
1      DHowardPMillsapKBazemoreDSchroderTHardawayJrAT...   
2        KKorverDHowardPMillsapKBazemoreDSchroderATL2017   
3      DHowardTSefoloshaPMillsapDSchrod

In [7]:
# Create URLs for the available years of NBA.com traditional/basic lineup data (per 100 possessions), 
# fetch the data in 2D array format, and put into a Pandas dataframe
urls = [ "https://stats.nba.com/lineups/traditional/?Season=20{0}-{1}&SeasonType=Regular%20Season&PerMode=Per100Possessions".format(ya[i], yb[i]) for i in range(len(ya)) ]
np_arr_basic2 = FetchStatsTables(urls, years, [i for i in np.arange(25) if i != 1 and i != 2], teams)


Fetching lineup stats for the Atlanta Hawks from the 2017 season...
Fetched stats for 511 NBA player lineups.
Fetching lineup stats for the Brooklyn Nets from the 2017 season...
Fetched stats for 613 NBA player lineups.
Fetching lineup stats for the Boston Celtics from the 2017 season...
Fetched stats for 456 NBA player lineups.
Fetching lineup stats for the Charlotte Hornets from the 2017 season...
Fetched stats for 271 NBA player lineups.
Fetching lineup stats for the Chicago Bulls from the 2017 season...
Fetched stats for 463 NBA player lineups.
Fetching lineup stats for the Cleveland Cavaliers from the 2017 season...
Fetched stats for 493 NBA player lineups.
Fetching lineup stats for the Dallas Mavericks from the 2017 season...
Fetched stats for 519 NBA player lineups.
Fetching lineup stats for the Denver Nuggets from the 2017 season...
Fetched stats for 430 NBA player lineups.
Fetching lineup stats for the Detroit Pistons from the 2017 season...
Fetched stats for 243 NBA player li

Fetching lineup stats for the LA Clippers from the 2019 season...
Fetched stats for 431 NBA player lineups.
Fetching lineup stats for the Los Angeles Lakers from the 2019 season...
Fetched stats for 630 NBA player lineups.
Fetching lineup stats for the Memphis Grizzlies from the 2019 season...
Fetched stats for 736 NBA player lineups.
Fetching lineup stats for the Miami Heat from the 2019 season...
Fetched stats for 479 NBA player lineups.
Fetching lineup stats for the Milwaukee Bucks from the 2019 season...
Fetched stats for 609 NBA player lineups.
Fetching lineup stats for the Minnesota Timberwolves from the 2019 season...
Fetched stats for 476 NBA player lineups.
Fetching lineup stats for the New Orleans Pelicans from the 2019 season...
Fetched stats for 635 NBA player lineups.
Fetching lineup stats for the New York Knicks from the 2019 season...
Fetched stats for 663 NBA player lineups.
Fetching lineup stats for the Oklahoma City Thunder from the 2019 season...
Fetched stats for 37

In [8]:
# Convert the dataframe to one with appropriate data types
df_basic2 = pd.DataFrame(np_arr_basic2, columns=['lineup_name', 'code', 'sorted_code', 'year', 'MPT_PH', 'PTST_PH', 'FGMT_PH', 'FGAT_PH', 'FGPT_PH', '3PMT_PH', '3PAT_PH', '3PPT_PH', 'FTMT_PH', 'FTAT_PH', 'FTPT_PH', 'ORBT_PH', 'DRBT_PH', 'TRBT_PH', 'ASTT_PH', 'TOVT_PH', 'STLT_PH', 'BLKT_PH', 'BLKAT_PH', 'PFT_PH', 'PFDT_PH', 'PMT_PH'])
df_basic2 = ConvertDataFrame(df_basic2)
print(df_basic2)


                                             lineup_name  \
0      D Howard, J Calderon, M Muscala, T Hardaway Jr...   
1      K Korver, M Scott, K Bazemore, M Muscala, T Pr...   
2      M Dunleavy, D Howard, P Millsap, K Bazemore, M...   
3      M Dunleavy, K Humphries, M Muscala, T Hardaway...   
4      K Korver, D Howard, T Sefolosha, P Millsap, K ...   
...                                                  ...   
40634  J Green, J Wall, T Satoransky, O Porter Jr, T ...   
40635         T Ariza, J Green, J Wall, B Beal, S Dekker   
40636  T Satoransky, J McRae, J Parker, S Dekker, B P...   
40637  T Ariza, B Beal, T Satoransky, B Portis, T Bryant   
40638  J Green, M Morris, B Beal, O Porter Jr, K Oubr...   

                                                    code  \
0      DHowardJCalderonMMuscalaTHardawayJrDBembryATL2017   
1           KKorverMScottKBazemoreMMuscalaTPrinceATL2017   
2       MDunleavyDHowardPMillsapKBazemoreMDelaneyATL2017   
3      MDunleavyKHumphriesMMuscalaTHard

# Scrape Advanced Lineup Stats

In [9]:
# Create URLs for the available years of NBA.com advanced lineup data, 
# fetch the data in 2D array format, and put into a Pandas dataframe
#urls = [ "https://stats.nba.com/lineups/advanced/?Season=2018-19&SeasonType=Regular%20Season" ]
urls = [ "https://stats.nba.com/lineups/advanced/?Season=20{0}-{1}&SeasonType=Regular%20Season".format(ya[i], yb[i]) for i in range(len(ya)) ]
np_arr_adv = FetchStatsTables(urls, years, [0,4,5,6,7,8,9,10,11,12,13,14,15,16,17], teams)


Fetching lineup stats for the Atlanta Hawks from the 2017 season...
Fetched stats for 479 NBA player lineups.
Fetching lineup stats for the Brooklyn Nets from the 2017 season...
Fetched stats for 564 NBA player lineups.
Fetching lineup stats for the Boston Celtics from the 2017 season...
Fetched stats for 431 NBA player lineups.
Fetching lineup stats for the Charlotte Hornets from the 2017 season...
Fetched stats for 259 NBA player lineups.
Fetching lineup stats for the Chicago Bulls from the 2017 season...
Fetched stats for 436 NBA player lineups.
Fetching lineup stats for the Cleveland Cavaliers from the 2017 season...
Fetched stats for 456 NBA player lineups.
Fetching lineup stats for the Dallas Mavericks from the 2017 season...
Fetched stats for 476 NBA player lineups.
Fetching lineup stats for the Denver Nuggets from the 2017 season...
Fetched stats for 404 NBA player lineups.
Fetching lineup stats for the Detroit Pistons from the 2017 season...
Fetched stats for 224 NBA player li

Fetching lineup stats for the LA Clippers from the 2019 season...
Fetched stats for 373 NBA player lineups.
Fetching lineup stats for the Los Angeles Lakers from the 2019 season...
Fetched stats for 595 NBA player lineups.
Fetching lineup stats for the Memphis Grizzlies from the 2019 season...
Fetched stats for 686 NBA player lineups.
Fetching lineup stats for the Miami Heat from the 2019 season...
Fetched stats for 449 NBA player lineups.
Fetching lineup stats for the Milwaukee Bucks from the 2019 season...
Fetched stats for 569 NBA player lineups.
Fetching lineup stats for the Minnesota Timberwolves from the 2019 season...
Fetched stats for 437 NBA player lineups.
Fetching lineup stats for the New Orleans Pelicans from the 2019 season...
Fetched stats for 589 NBA player lineups.
Fetching lineup stats for the New York Knicks from the 2019 season...
Fetched stats for 611 NBA player lineups.
Fetching lineup stats for the Oklahoma City Thunder from the 2019 season...
Fetched stats for 35

In [10]:
# Convert the dataframe to one with appropriate data types
df_adv = pd.DataFrame(np_arr_adv, columns=['lineup_name', 'code', 'sorted_code', 'year', 'OFFRTGT', 'DEFRTGT', 'NETRTGT', 'ASTPT', 'ATRT', 'ASTRT', 'ORBRT', 'DRBRT', 'TRBRT', 'TORT', 'EFGPT', 'TST', 'PACET', 'PIET'])
df_adv = ConvertDataFrame(df_adv)
print(df_adv)


                                             lineup_name  \
0      D Howard, T Sefolosha, P Millsap, K Bazemore, ...   
1      D Howard, P Millsap, K Bazemore, D Schroder, T...   
2      K Korver, D Howard, P Millsap, K Bazemore, D S...   
3      D Howard, T Sefolosha, P Millsap, D Schroder, ...   
4      D Howard, E Ilyasova, D Schroder, T Hardaway J...   
...                                                  ...   
37849  T Satoransky, J McRae, S Dekker, T Bryant, T B...   
37850  T Satoransky, K Oubre Jr, O White, T Bryant, T...   
37851  W Johnson, B Beal, T Satoransky, J Parker, T B...   
37852  W Johnson, J Jenkins, J McRae, C Randle, T Bryant   
37853  W Johnson, J Parker, S Dekker, C Randle, T Bryant   

                                                    code  \
0      DHowardTSefoloshaPMillsapKBazemoreDSchroderATL...   
1      DHowardPMillsapKBazemoreDSchroderTHardawayJrAT...   
2        KKorverDHowardPMillsapKBazemoreDSchroderATL2017   
3      DHowardTSefoloshaPMillsapDSchrod

# Scrape Misc Lineup Stats

In [11]:
# Create URLs for the available years of NBA.com miscellaneous lineup data (per 36 minutes), 
# fetch the data in 2D array format, and put into a Pandas dataframe
urls = [ "https://stats.nba.com/lineups/misc/?Season=20{0}-{1}&SeasonType=Regular%20Season&PerMode=Per36".format(ya[i], yb[i]) for i in range(len(ya)) ]
np_arr_misc = FetchStatsTables(urls, years, [0,4,5,6,7,8,9,10,11], teams)


Fetching lineup stats for the Atlanta Hawks from the 2017 season...
Fetched stats for 479 NBA player lineups.
Fetching lineup stats for the Brooklyn Nets from the 2017 season...
Fetched stats for 564 NBA player lineups.
Fetching lineup stats for the Boston Celtics from the 2017 season...
Fetched stats for 431 NBA player lineups.
Fetching lineup stats for the Charlotte Hornets from the 2017 season...
Fetched stats for 259 NBA player lineups.
Fetching lineup stats for the Chicago Bulls from the 2017 season...
Fetched stats for 436 NBA player lineups.
Fetching lineup stats for the Cleveland Cavaliers from the 2017 season...
Fetched stats for 456 NBA player lineups.
Fetching lineup stats for the Dallas Mavericks from the 2017 season...
Fetched stats for 476 NBA player lineups.
Fetching lineup stats for the Denver Nuggets from the 2017 season...
Fetched stats for 404 NBA player lineups.
Fetching lineup stats for the Detroit Pistons from the 2017 season...
Fetched stats for 224 NBA player li

Fetching lineup stats for the LA Clippers from the 2019 season...
Fetched stats for 373 NBA player lineups.
Fetching lineup stats for the Los Angeles Lakers from the 2019 season...
Fetched stats for 595 NBA player lineups.
Fetching lineup stats for the Memphis Grizzlies from the 2019 season...
Fetched stats for 686 NBA player lineups.
Fetching lineup stats for the Miami Heat from the 2019 season...
Fetched stats for 449 NBA player lineups.
Fetching lineup stats for the Milwaukee Bucks from the 2019 season...
Fetched stats for 569 NBA player lineups.
Fetching lineup stats for the Minnesota Timberwolves from the 2019 season...
Fetched stats for 437 NBA player lineups.
Fetching lineup stats for the New Orleans Pelicans from the 2019 season...
Fetched stats for 589 NBA player lineups.
Fetching lineup stats for the New York Knicks from the 2019 season...
Fetched stats for 611 NBA player lineups.
Fetching lineup stats for the Oklahoma City Thunder from the 2019 season...
Fetched stats for 35

In [12]:
df_misc = pd.DataFrame(np_arr_misc, columns=['lineup_name', 'code', 'sorted_code', 'year', 'PTS_OFF_TOVT_PT', 'SEC_CHANCE_PTST_PT', 'FBPST_PT', 'PITPT_PT', 'OPP_PTS_OFF_TOVT_PT', 'OPP_SEC_CHANCE_PTST_PT', 'OPP_FBPST_PT', 'OPP_PITPT_PT'])
df_misc = ConvertDataFrame(df_misc)
print(df_misc)


                                             lineup_name  \
0      D Howard, T Sefolosha, P Millsap, K Bazemore, ...   
1      D Howard, P Millsap, K Bazemore, D Schroder, T...   
2      K Korver, D Howard, P Millsap, K Bazemore, D S...   
3      D Howard, T Sefolosha, P Millsap, D Schroder, ...   
4      D Howard, E Ilyasova, D Schroder, T Hardaway J...   
...                                                  ...   
37849  T Ariza, J Green, M Morris, T Satoransky, C Ra...   
37850   J Wall, M Morris, B Beal, T Satoransky, T Bryant   
37851     J Green, J Wall, B Beal, O Porter Jr, T Bryant   
37852  D Howard, M Morris, B Beal, A Rivers, T Satora...   
37853     J Green, J McRae, B Portis, C Randle, T Bryant   

                                                    code  \
0      DHowardTSefoloshaPMillsapKBazemoreDSchroderATL...   
1      DHowardPMillsapKBazemoreDSchroderTHardawayJrAT...   
2        KKorverDHowardPMillsapKBazemoreDSchroderATL2017   
3      DHowardTSefoloshaPMillsapDSchrod

In [13]:
# Create URLs for the available years of NBA.com miscellaneous lineup data (per 100 poss), 
# fetch the data in 2D array format, and put into a Pandas dataframe
urls = [ "https://stats.nba.com/lineups/misc/?Season=20{0}-{1}&SeasonType=Regular%20Season&PerMode=Per100Possessions".format(ya[i], yb[i]) for i in range(len(ya)) ]
np_arr_misc2 = FetchStatsTables(urls, years, [0,4,5,6,7,8,9,10,11], teams)


Fetching lineup stats for the Atlanta Hawks from the 2017 season...
Fetched stats for 511 NBA player lineups.
Fetching lineup stats for the Brooklyn Nets from the 2017 season...
Fetched stats for 613 NBA player lineups.
Fetching lineup stats for the Boston Celtics from the 2017 season...
Fetched stats for 456 NBA player lineups.
Fetching lineup stats for the Charlotte Hornets from the 2017 season...
Fetched stats for 271 NBA player lineups.
Fetching lineup stats for the Chicago Bulls from the 2017 season...
Fetched stats for 463 NBA player lineups.
Fetching lineup stats for the Cleveland Cavaliers from the 2017 season...
Fetched stats for 493 NBA player lineups.
Fetching lineup stats for the Dallas Mavericks from the 2017 season...
Fetched stats for 519 NBA player lineups.
Fetching lineup stats for the Denver Nuggets from the 2017 season...
Fetched stats for 430 NBA player lineups.
Fetching lineup stats for the Detroit Pistons from the 2017 season...
Fetched stats for 243 NBA player li

Fetching lineup stats for the LA Clippers from the 2019 season...
Fetched stats for 431 NBA player lineups.
Fetching lineup stats for the Los Angeles Lakers from the 2019 season...
Fetched stats for 630 NBA player lineups.
Fetching lineup stats for the Memphis Grizzlies from the 2019 season...
Fetched stats for 736 NBA player lineups.
Fetching lineup stats for the Miami Heat from the 2019 season...
Fetched stats for 479 NBA player lineups.
Fetching lineup stats for the Milwaukee Bucks from the 2019 season...
Fetched stats for 609 NBA player lineups.
Fetching lineup stats for the Minnesota Timberwolves from the 2019 season...
Fetched stats for 476 NBA player lineups.
Fetching lineup stats for the New Orleans Pelicans from the 2019 season...
Fetched stats for 635 NBA player lineups.
Fetching lineup stats for the New York Knicks from the 2019 season...
Fetched stats for 663 NBA player lineups.
Fetching lineup stats for the Oklahoma City Thunder from the 2019 season...
Fetched stats for 37

In [14]:
df_misc2 = pd.DataFrame(np_arr_misc2, columns=['lineup_name', 'code', 'sorted_code', 'year', 'PTS_OFF_TOVT_PH', 'SEC_CHANCE_PTST_PH', 'FBPST_PH', 'PITPT_PH', 'OPP_PTS_OFF_TOVT_PH', 'OPP_SEC_CHANCE_PTST_PH', 'OPP_FBPST_PH', 'OPP_PITPT_PH'])
df_misc2 = ConvertDataFrame(df_misc2)
print(df_misc2)


                                             lineup_name  \
0      D Howard, J Calderon, M Muscala, T Hardaway Jr...   
1      K Korver, M Scott, K Bazemore, M Muscala, T Pr...   
2      M Dunleavy, D Howard, P Millsap, K Bazemore, M...   
3      M Dunleavy, K Humphries, M Muscala, T Hardaway...   
4      K Korver, D Howard, T Sefolosha, P Millsap, K ...   
...                                                  ...   
40634  J Green, J Wall, T Satoransky, O Porter Jr, T ...   
40635         T Ariza, J Green, J Wall, B Beal, S Dekker   
40636  T Satoransky, J McRae, J Parker, S Dekker, B P...   
40637  T Ariza, B Beal, T Satoransky, B Portis, T Bryant   
40638  J Green, M Morris, B Beal, O Porter Jr, K Oubr...   

                                                    code  \
0      DHowardJCalderonMMuscalaTHardawayJrDBembryATL2017   
1           KKorverMScottKBazemoreMMuscalaTPrinceATL2017   
2       MDunleavyDHowardPMillsapKBazemoreMDelaneyATL2017   
3      MDunleavyKHumphriesMMuscalaTHard

# Scrape Four Factors Lineup Stats

In [15]:
# Create URLs for the available years of NBA.com four factors lineup data, 
# fetch the data in 2D array format, and put into a Pandas dataframe
urls = [ "https://stats.nba.com/lineups/four-factors/?Season=20{0}-{1}&SeasonType=Regular%20Season".format(ya[i], yb[i]) for i in range(len(ya)) ]
np_arr_ff = FetchStatsTables(urls, years, [0,5,8,9,10,11], teams)


Fetching lineup stats for the Atlanta Hawks from the 2017 season...
Fetched stats for 479 NBA player lineups.
Fetching lineup stats for the Brooklyn Nets from the 2017 season...
Fetched stats for 564 NBA player lineups.
Fetching lineup stats for the Boston Celtics from the 2017 season...
Fetched stats for 431 NBA player lineups.
Fetching lineup stats for the Charlotte Hornets from the 2017 season...
Fetched stats for 259 NBA player lineups.
Fetching lineup stats for the Chicago Bulls from the 2017 season...
Fetched stats for 436 NBA player lineups.
Fetching lineup stats for the Cleveland Cavaliers from the 2017 season...
Fetched stats for 456 NBA player lineups.
Fetching lineup stats for the Dallas Mavericks from the 2017 season...
Fetched stats for 476 NBA player lineups.
Fetching lineup stats for the Denver Nuggets from the 2017 season...
Fetched stats for 404 NBA player lineups.
Fetching lineup stats for the Detroit Pistons from the 2017 season...
Fetched stats for 224 NBA player li

Fetching lineup stats for the LA Clippers from the 2019 season...
Fetched stats for 373 NBA player lineups.
Fetching lineup stats for the Los Angeles Lakers from the 2019 season...
Fetched stats for 595 NBA player lineups.
Fetching lineup stats for the Memphis Grizzlies from the 2019 season...
Fetched stats for 686 NBA player lineups.
Fetching lineup stats for the Miami Heat from the 2019 season...
Fetched stats for 449 NBA player lineups.
Fetching lineup stats for the Milwaukee Bucks from the 2019 season...
Fetched stats for 569 NBA player lineups.
Fetching lineup stats for the Minnesota Timberwolves from the 2019 season...
Fetched stats for 437 NBA player lineups.
Fetching lineup stats for the New Orleans Pelicans from the 2019 season...
Fetched stats for 589 NBA player lineups.
Fetching lineup stats for the New York Knicks from the 2019 season...
Fetched stats for 611 NBA player lineups.
Fetching lineup stats for the Oklahoma City Thunder from the 2019 season...
Fetched stats for 35

In [16]:
df_ff = pd.DataFrame(np_arr_ff, columns=['lineup_name', 'code', 'sorted_code', 'year', 'FTRT', 'OPP_EFGPT', 'OPP_FTRT', 'OPP_TORT', 'OPP_ORBRT'])
df_ff = ConvertDataFrame(df_ff)
print(df_ff)


                                             lineup_name  \
0      D Howard, T Sefolosha, P Millsap, K Bazemore, ...   
1      D Howard, P Millsap, K Bazemore, D Schroder, T...   
2      K Korver, D Howard, P Millsap, K Bazemore, D S...   
3      D Howard, T Sefolosha, P Millsap, D Schroder, ...   
4      D Howard, E Ilyasova, D Schroder, T Hardaway J...   
...                                                  ...   
37849  T Satoransky, J McRae, S Dekker, T Bryant, T B...   
37850  T Satoransky, K Oubre Jr, O White, T Bryant, T...   
37851  W Johnson, B Beal, T Satoransky, J Parker, T B...   
37852  W Johnson, J Jenkins, J McRae, C Randle, T Bryant   
37853  W Johnson, J Parker, S Dekker, C Randle, T Bryant   

                                                    code  \
0      DHowardTSefoloshaPMillsapKBazemoreDSchroderATL...   
1      DHowardPMillsapKBazemoreDSchroderTHardawayJrAT...   
2        KKorverDHowardPMillsapKBazemoreDSchroderATL2017   
3      DHowardTSefoloshaPMillsapDSchrod

# Scrape Scoring Lineup Stats

In [17]:
# Create URLs for the available years of NBA.com scoring lineup data, 
# fetch the data in 2D array format, and put into a Pandas dataframe
urls = [ "https://stats.nba.com/lineups/scoring/?Season=20{0}-{1}&SeasonType=Regular%20Season".format(ya[i], yb[i]) for i in range(len(ya)) ]
np_arr_sc = FetchStatsTables(urls, years, [0,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18], teams)


Fetching lineup stats for the Atlanta Hawks from the 2017 season...
Fetched stats for 522 NBA player lineups.
Fetching lineup stats for the Brooklyn Nets from the 2017 season...
Fetched stats for 630 NBA player lineups.
Fetching lineup stats for the Boston Celtics from the 2017 season...
Fetched stats for 457 NBA player lineups.
Fetching lineup stats for the Charlotte Hornets from the 2017 season...
Fetched stats for 272 NBA player lineups.
Fetching lineup stats for the Chicago Bulls from the 2017 season...
Fetched stats for 472 NBA player lineups.
Fetching lineup stats for the Cleveland Cavaliers from the 2017 season...
Fetched stats for 505 NBA player lineups.
Fetching lineup stats for the Dallas Mavericks from the 2017 season...
Fetched stats for 520 NBA player lineups.
Fetching lineup stats for the Denver Nuggets from the 2017 season...
Fetched stats for 441 NBA player lineups.
Fetching lineup stats for the Detroit Pistons from the 2017 season...
Fetched stats for 244 NBA player li

Fetching lineup stats for the LA Clippers from the 2019 season...
Fetched stats for 441 NBA player lineups.
Fetching lineup stats for the Los Angeles Lakers from the 2019 season...
Fetched stats for 636 NBA player lineups.
Fetching lineup stats for the Memphis Grizzlies from the 2019 season...
Fetched stats for 746 NBA player lineups.
Fetching lineup stats for the Miami Heat from the 2019 season...
Fetched stats for 486 NBA player lineups.
Fetching lineup stats for the Milwaukee Bucks from the 2019 season...
Fetched stats for 617 NBA player lineups.
Fetching lineup stats for the Minnesota Timberwolves from the 2019 season...
Fetched stats for 483 NBA player lineups.
Fetching lineup stats for the New Orleans Pelicans from the 2019 season...
Fetched stats for 631 NBA player lineups.
Fetching lineup stats for the New York Knicks from the 2019 season...
Fetched stats for 666 NBA player lineups.
Fetching lineup stats for the Oklahoma City Thunder from the 2019 season...
Fetched stats for 37

In [18]:
df_sc = pd.DataFrame(np_arr_sc, columns=['lineup_name', 'code', 'sorted_code', 'year', 'PERC_2PAT', 'PERC_3PAT', 'PERC_PTS_2PT', 'PERC_PTS_MRT', 'PERC_PTS_3PT', 'PERC_PTS_FBPST', 'PERC_PTS_FTT', 'PERC_PTS_OFF_TOVT', 'PERC_PTS_PITPT', 'PERC_2PM_ASTT', 'PERC_2PM_UASTT', 'PERC_3PM_ASTT', 'PERC_3PM_UASTT', 'PERC_FGM_ASTT', 'PERC_FGM_UASTT'])
df_sc = ConvertDataFrame(df_sc)
print(df_sc)


                                             lineup_name  \
0      D Howard, T Sefolosha, P Millsap, K Bazemore, ...   
1      K Humphries, E Ilyasova, J Calderon, T Prince,...   
2      K Korver, D Howard, P Millsap, K Bazemore, D S...   
3      D Howard, K Bazemore, T Hardaway Jr, M Delaney...   
4      M Dunleavy, T Sefolosha, M Muscala, R Kelly, M...   
...                                                  ...   
41029  J Green, J Wall, T Satoransky, O Porter Jr, T ...   
41030  T Ariza, B Beal, T Satoransky, B Portis, T Bryant   
41031    T Ariza, J Green, B Beal, O Porter Jr, C Randle   
41032         T Ariza, J Green, J Wall, B Beal, S Dekker   
41033  T Ariza, J Green, T Satoransky, C Randle, T Br...   

                                                    code  \
0      DHowardTSefoloshaPMillsapKBazemoreDSchroderATL...   
1      KHumphriesEIlyasovaJCalderonTPrinceDBembryATL2017   
2        KKorverDHowardPMillsapKBazemoreDSchroderATL2017   
3      DHowardKBazemoreTHardawayJrMDela

# Scrape Opponent (Defensive) Lineup Stats

In [19]:
# Create URLs for the available years of NBA.com opponent/defensive lineup data (per 36 min.), 
# fetch the data in 2D array format, and put into a Pandas dataframe
urls = [ "https://stats.nba.com/lineups/opponent/?Season=20{0}-{1}&SeasonType=Regular%20Season&PerMode=Per36".format(ya[i], yb[i]) for i in range(len(ya)) ]
np_arr_opp = FetchStatsTables(urls, years, [0,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23], teams)


Fetching lineup stats for the Atlanta Hawks from the 2017 season...
Fetched stats for 479 NBA player lineups.
Fetching lineup stats for the Brooklyn Nets from the 2017 season...
Fetched stats for 564 NBA player lineups.
Fetching lineup stats for the Boston Celtics from the 2017 season...
Fetched stats for 431 NBA player lineups.
Fetching lineup stats for the Charlotte Hornets from the 2017 season...
Fetched stats for 259 NBA player lineups.
Fetching lineup stats for the Chicago Bulls from the 2017 season...
Fetched stats for 436 NBA player lineups.
Fetching lineup stats for the Cleveland Cavaliers from the 2017 season...
Fetched stats for 456 NBA player lineups.
Fetching lineup stats for the Dallas Mavericks from the 2017 season...
Fetched stats for 476 NBA player lineups.
Fetching lineup stats for the Denver Nuggets from the 2017 season...
Fetched stats for 404 NBA player lineups.
Fetching lineup stats for the Detroit Pistons from the 2017 season...
Fetched stats for 224 NBA player li

Fetching lineup stats for the LA Clippers from the 2019 season...
Fetched stats for 373 NBA player lineups.
Fetching lineup stats for the Los Angeles Lakers from the 2019 season...
Fetched stats for 595 NBA player lineups.
Fetching lineup stats for the Memphis Grizzlies from the 2019 season...
Fetched stats for 686 NBA player lineups.
Fetching lineup stats for the Miami Heat from the 2019 season...
Fetched stats for 449 NBA player lineups.
Fetching lineup stats for the Milwaukee Bucks from the 2019 season...
Fetched stats for 569 NBA player lineups.
Fetching lineup stats for the Minnesota Timberwolves from the 2019 season...
Fetched stats for 437 NBA player lineups.
Fetching lineup stats for the New Orleans Pelicans from the 2019 season...
Fetched stats for 589 NBA player lineups.
Fetching lineup stats for the New York Knicks from the 2019 season...
Fetched stats for 611 NBA player lineups.
Fetching lineup stats for the Oklahoma City Thunder from the 2019 season...
Fetched stats for 35

In [20]:
df_opp = pd.DataFrame(np_arr_opp, columns=['lineup_name', 'code', 'sorted_code', 'year', 'OPP_FGMT_PT', 'OPP_FGAT_PT', 'OPP_FGPT_PT', 'OPP_3PMT_PT', 'OPP_3PAT_PT', 'OPP_3PPT_PT', 'OPP_FTMT_PT', 'OPP_FTAT_PT', 'OPP_FTPT_PT', 'OPP_OREBT_PT', 'OPP_DREBT_PT', 'OPP_REBT_PT', 'OPP_ASTT_PT', 'OPP_TOVT_PT', 'OPP_STLT_PT', 'OPP_BLKT_PT', 'OPP_BLKAT_PT', 'OPP_PFT_PT', 'OPP_PFDT_PT', 'OPP_PTST_PT'])
df_opp = ConvertDataFrame(df_opp)
print(df_opp)


                                             lineup_name  \
0      D Howard, T Sefolosha, P Millsap, K Bazemore, ...   
1      D Howard, P Millsap, K Bazemore, D Schroder, T...   
2      K Korver, D Howard, P Millsap, K Bazemore, D S...   
3      D Howard, T Sefolosha, P Millsap, D Schroder, ...   
4      D Howard, E Ilyasova, D Schroder, T Hardaway J...   
...                                                  ...   
37849  T Ariza, J Green, M Morris, T Satoransky, C Ra...   
37850   J Wall, M Morris, B Beal, T Satoransky, T Bryant   
37851     J Green, J Wall, B Beal, O Porter Jr, T Bryant   
37852  D Howard, M Morris, B Beal, A Rivers, T Satora...   
37853     J Green, J McRae, B Portis, C Randle, T Bryant   

                                                    code  \
0      DHowardTSefoloshaPMillsapKBazemoreDSchroderATL...   
1      DHowardPMillsapKBazemoreDSchroderTHardawayJrAT...   
2        KKorverDHowardPMillsapKBazemoreDSchroderATL2017   
3      DHowardTSefoloshaPMillsapDSchrod

In [21]:
# Create URLs for the available years of NBA.com opponent/defensive lineup data (per 100 poss.), 
# fetch the data in 2D array format, and put into a Pandas dataframe
urls = [ "https://stats.nba.com/lineups/opponent/?Season=20{0}-{1}&SeasonType=Regular%20Season&PerMode=Per100Possessions".format(ya[i], yb[i]) for i in range(len(ya)) ]
np_arr_opp2 = FetchStatsTables(urls, years, [0,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23], teams)


Fetching lineup stats for the Atlanta Hawks from the 2017 season...
Fetched stats for 511 NBA player lineups.
Fetching lineup stats for the Brooklyn Nets from the 2017 season...
Fetched stats for 613 NBA player lineups.
Fetching lineup stats for the Boston Celtics from the 2017 season...
Fetched stats for 456 NBA player lineups.
Fetching lineup stats for the Charlotte Hornets from the 2017 season...
Fetched stats for 271 NBA player lineups.
Fetching lineup stats for the Chicago Bulls from the 2017 season...
Fetched stats for 463 NBA player lineups.
Fetching lineup stats for the Cleveland Cavaliers from the 2017 season...
Fetched stats for 493 NBA player lineups.
Fetching lineup stats for the Dallas Mavericks from the 2017 season...
Fetched stats for 519 NBA player lineups.
Fetching lineup stats for the Denver Nuggets from the 2017 season...
Fetched stats for 430 NBA player lineups.
Fetching lineup stats for the Detroit Pistons from the 2017 season...
Fetched stats for 243 NBA player li

Fetching lineup stats for the LA Clippers from the 2019 season...
Fetched stats for 431 NBA player lineups.
Fetching lineup stats for the Los Angeles Lakers from the 2019 season...
Fetched stats for 630 NBA player lineups.
Fetching lineup stats for the Memphis Grizzlies from the 2019 season...
Fetched stats for 736 NBA player lineups.
Fetching lineup stats for the Miami Heat from the 2019 season...
Fetched stats for 479 NBA player lineups.
Fetching lineup stats for the Milwaukee Bucks from the 2019 season...
Fetched stats for 609 NBA player lineups.
Fetching lineup stats for the Minnesota Timberwolves from the 2019 season...
Fetched stats for 476 NBA player lineups.
Fetching lineup stats for the New Orleans Pelicans from the 2019 season...
Fetched stats for 635 NBA player lineups.
Fetching lineup stats for the New York Knicks from the 2019 season...
Fetched stats for 663 NBA player lineups.
Fetching lineup stats for the Oklahoma City Thunder from the 2019 season...
Fetched stats for 37

In [22]:
df_opp2 = pd.DataFrame(np_arr_opp2, columns=['lineup_name', 'code', 'sorted_code', 'year', 'OPP_FGMT_PH', 'OPP_FGAT_PH', 'OPP_FGPT_PH', 'OPP_3PMT_PH', 'OPP_3PAT_PH', 'OPP_3PPT_PH', 'OPP_FTMT_PH', 'OPP_FTAT_PH', 'OPP_FTPT_PH', 'OPP_OREBT_PH', 'OPP_DREBT_PH', 'OPP_REBT_PH', 'OPP_ASTT_PH', 'OPP_TOVT_PH', 'OPP_STLT_PH', 'OPP_BLKT_PH', 'OPP_BLKAT_PH', 'OPP_PFT_PH', 'OPP_PFDT_PH', 'OPP_PTST_PH'])
df_opp2 = ConvertDataFrame(df_opp2)
print(df_opp2)


                                             lineup_name  \
0      D Howard, J Calderon, M Muscala, T Hardaway Jr...   
1      K Korver, M Scott, K Bazemore, M Muscala, T Pr...   
2      M Dunleavy, D Howard, P Millsap, K Bazemore, M...   
3      M Dunleavy, K Humphries, M Muscala, T Hardaway...   
4      K Korver, D Howard, T Sefolosha, P Millsap, K ...   
...                                                  ...   
40634  J Green, J Wall, T Satoransky, O Porter Jr, T ...   
40635         T Ariza, J Green, J Wall, B Beal, S Dekker   
40636  T Satoransky, J McRae, J Parker, S Dekker, B P...   
40637  T Ariza, B Beal, T Satoransky, B Portis, T Bryant   
40638  J Green, M Morris, B Beal, O Porter Jr, K Oubr...   

                                                    code  \
0      DHowardJCalderonMMuscalaTHardawayJrDBembryATL2017   
1           KKorverMScottKBazemoreMMuscalaTPrinceATL2017   
2       MDunleavyDHowardPMillsapKBazemoreMDelaneyATL2017   
3      MDunleavyKHumphriesMMuscalaTHard

# Merge and Store Dataframes

In [23]:
# Merge lineup dataframes on the sorted lineup code
#dfb = df_basic.drop([''], axis=1)
dfb = df_basic.copy()

# Drop duplicate variables
dfb2 = df_basic2.drop(['lineup_name', 'code', 'year'], axis=1)
dfa = df_adv.drop(['lineup_name', 'code', 'year'], axis=1)
dfm = df_misc.drop(['lineup_name', 'code', 'year'], axis=1)
dfm2 = df_misc2.drop(['lineup_name', 'code', 'year'], axis=1)
dfff = df_ff.drop(['lineup_name', 'code', 'year'], axis=1)
dfsc = df_sc.drop(['lineup_name', 'code', 'year'], axis=1)
dfo = df_opp.drop(['lineup_name', 'code', 'year'], axis=1)
dfo2 = df_opp2.drop(['lineup_name', 'code', 'year'], axis=1)

# Merge tables on the sorted lineup code
df = pd.merge(dfb, dfb2, on=['sorted_code'], how='inner')
df = pd.merge(df, dfa, on=['sorted_code'], how='inner')
df = pd.merge(df, dfm, on=['sorted_code'], how='inner')
df = pd.merge(df, dfm2, on=['sorted_code'], how='inner')
df = pd.merge(df, dfff, on=['sorted_code'], how='inner')
df = pd.merge(df, dfsc, on=['sorted_code'], how='inner')
df = pd.merge(df, dfo, on=['sorted_code'], how='inner')
df = pd.merge(df, dfo2, on=['sorted_code'], how='inner')

#print(df.columns)

# Add any extra columns
df['2PAT_PT'] = df['FGAT_PT']-df['3PAT_PT']
df['2PMT_PT'] = df['FGMT_PT']-df['3PMT_PT']
df['2PPT_PT'] = df['2PMT_PT']/df['2PAT_PT']
df['2PAT_PH'] = df['FGAT_PH']-df['3PAT_PH']
df['2PMT_PH'] = df['FGMT_PH']-df['3PMT_PH']
df['2PPT_PH'] = df['2PMT_PH']/df['2PAT_PH']
df['2PPT_PT'].fillna(0, inplace=True)
df['2PPT_PH'].fillna(0, inplace=True)

# Add minutes-per-game played together for lineups
df['MPGT'] = df['MPT']/df['GPT']


In [28]:
df.columns.values

array(['lineup_name', 'code', 'sorted_code', 'year', 'team', 'GPT', 'MPT',
       'PTST_PT', 'FGMT_PT', 'FGAT_PT', 'FGPT_PT', '3PMT_PT', '3PAT_PT',
       '3PPT_PT', 'FTMT_PT', 'FTAT_PT', 'FTPT_PT', 'ORBT_PT', 'DRBT_PT',
       'TRBT_PT', 'ASTT_PT', 'TOVT_PT', 'STLT_PT', 'BLKT_PT', 'BLKAT_PT',
       'PFT_PT', 'PFDT_PT', 'PMT_PT', 'MPT_PH', 'PTST_PH', 'FGMT_PH',
       'FGAT_PH', 'FGPT_PH', '3PMT_PH', '3PAT_PH', '3PPT_PH', 'FTMT_PH',
       'FTAT_PH', 'FTPT_PH', 'ORBT_PH', 'DRBT_PH', 'TRBT_PH', 'ASTT_PH',
       'TOVT_PH', 'STLT_PH', 'BLKT_PH', 'BLKAT_PH', 'PFT_PH', 'PFDT_PH',
       'PMT_PH', 'OFFRTGT', 'DEFRTGT', 'NETRTGT', 'ASTPT', 'ATRT',
       'ASTRT', 'ORBRT', 'DRBRT', 'TRBRT', 'TORT', 'EFGPT', 'TST',
       'PACET', 'PIET', 'PTS_OFF_TOVT_PT', 'SEC_CHANCE_PTST_PT',
       'FBPST_PT', 'PITPT_PT', 'OPP_PTS_OFF_TOVT_PT',
       'OPP_SEC_CHANCE_PTST_PT', 'OPP_FBPST_PT', 'OPP_PITPT_PT',
       'PTS_OFF_TOVT_PH', 'SEC_CHANCE_PTST_PH', 'FBPST_PH', 'PITPT_PH',
       'OPP_PTS_OFF_TOVT_P

In [32]:
# Add opponent 2PT percentages
df['OPP_2PAT_PT'] = df['OPP_FGAT_PT']-df['OPP_3PAT_PT']
df['OPP_2PMT_PT'] = df['OPP_FGMT_PT']-df['OPP_3PMT_PT']
df['OPP_2PPT_PT'] = df['OPP_2PMT_PT']/df['OPP_2PAT_PT']
df['OPP_2PAT_PH'] = df['OPP_FGAT_PH']-df['OPP_3PAT_PH']
df['OPP_2PMT_PH'] = df['OPP_FGMT_PH']-df['OPP_3PMT_PH']
df['OPP_2PPT_PH'] = df['OPP_2PMT_PH']/df['OPP_2PAT_PH']
df['OPP_2PPT_PT'].fillna(0, inplace=True)
df['OPP_2PPT_PH'].fillna(0, inplace=True)

In [50]:
# Convert percentage columns
perc_cols = [col for col in df.columns if 'FGP' in col or '3PP' in col or 'FTP' in col or 'PERC' in col or 'PCT' in col or 'FREQ' in col or 'ASTPT' in col or 'ORBRT' in col or 'DRBRT' in col or 'TRBRT' in col or 'TST' in col]
df[perc_cols] = df[perc_cols].astype(float)/100.

In [55]:
# Store to csv file
#df.to_csv("NBALineupStats.csv")
df.to_csv("NBA_Complete_Lineup_Stats.csv")
df


Unnamed: 0,lineup_name,code,sorted_code,year,team,GPT,MPT,PTST_PT,FGMT_PT,FGAT_PT,...,2PAT_PH,2PMT_PH,2PPT_PH,MPGT,OPP_2PAT_PT,OPP_2PMT_PT,OPP_2PPT_PT,OPP_2PAT_PH,OPP_2PMT_PH,OPP_2PPT_PH
0,"D Howard, T Sefolosha, P Millsap, K Bazemore, ...",DHowardTSefoloshaPMillsapKBazemoreDSchroderATL...,DHowardDSchroderKBazemorePMillsapTSefoloshaATL...,2017,ATL,36.0,425.0,0.726,27.5,60.2,...,62.2,30.5,0.490354,11.805556,41.6,19.3,0.463942,55.8,25.9,0.464158
1,"D Howard, P Millsap, K Bazemore, D Schroder, T...",DHowardPMillsapKBazemoreDSchroderTHardawayJrAT...,DHowardDSchroderKBazemorePMillsapTHardawayJrAT...,2017,ATL,38.0,240.0,0.775,28.8,62.3,...,58.8,30.9,0.525510,6.315789,41.6,20.0,0.480769,55.5,26.7,0.481081
2,"K Korver, D Howard, P Millsap, K Bazemore, D S...",KKorverDHowardPMillsapKBazemoreDSchroderATL2017,DHowardDSchroderKBazemoreKKorverPMillsapATL2017,2017,ATL,21.0,206.0,0.711,28.5,66.2,...,64.2,30.4,0.473520,9.809524,46.6,22.1,0.474249,62.6,29.6,0.472843
3,"D Howard, T Sefolosha, P Millsap, D Schroder, ...",DHowardTSefoloshaPMillsapDSchroderTHardawayJrA...,DHowardDSchroderPMillsapTHardawayJrTSefoloshaA...,2017,ATL,34.0,126.0,0.871,33.8,63.1,...,61.1,33.2,0.543372,3.705882,42.7,19.5,0.456674,56.8,26.0,0.457746
4,"D Howard, E Ilyasova, D Schroder, T Hardaway J...",DHowardEIlyasovaDSchroderTHardawayJrTPrinceATL...,DHowardDSchroderEIlyasovaTHardawayJrTPrinceATL...,2017,ATL,13.0,97.0,0.728,29.9,66.2,...,68.1,31.4,0.461087,7.461538,41.8,20.7,0.495215,55.4,27.5,0.496390
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41429,"T Ariza, J Green, M Morris, T Satoransky, C Ra...",TArizaJGreenMMorrisTSatoranskyCRandleWAS2019,CRandleJGreenMMorrisTArizaTSatoranskyWAS2019,2019,WAS,1.0,1.0,0.000,0.0,120.0,...,100.0,0.0,0.000000,1.000000,0.0,0.0,0.000000,0.0,0.0,0.000000
41430,"J Wall, M Morris, B Beal, T Satoransky, T Bryant",JWallMMorrisBBealTSatoranskyTBryantWAS2019,BBealJWallMMorrisTBryantTSatoranskyWAS2019,2019,WAS,1.0,1.0,1.234,0.0,61.7,...,50.0,0.0,0.000000,1.000000,61.7,61.7,1.000000,50.0,50.0,1.000000
41431,"J Green, J Wall, B Beal, O Porter Jr, T Bryant",JGreenJWallBBealOPorterJrTBryantWAS2019,BBealJGreenJWallOPorterJrTBryantWAS2019,2019,WAS,1.0,1.0,1.906,63.5,63.5,...,50.0,50.0,1.000000,1.000000,63.5,63.5,1.000000,50.0,50.0,1.000000
41432,"D Howard, M Morris, B Beal, A Rivers, T Satora...",DHowardMMorrisBBealARiversTSatoranskyWAS2019,ARiversBBealDHowardMMorrisTSatoranskyWAS2019,2019,WAS,1.0,1.0,1.350,67.5,67.5,...,100.0,100.0,1.000000,1.000000,0.0,0.0,0.000000,0.0,0.0,0.000000
