In [1]:
import numpy as np
import urllib.request
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.chrome.options import Options as ChromeOptions
import lxml.html
from lxml import etree
import time
import pandas as pd
from functools import reduce

In [2]:
# Modify the dataframe to have appropriate data types
def ConvertDataFrame(df):
    new_df = df.loc[:, df.columns != 'name'].astype(float)
    df[new_df.columns] = new_df
    df['name'] = df['name'].astype('str')
    df['draft_year'] = df['draft_year'].astype('int')
    df = df.drop_duplicates(subset=['name'], keep=False)
    return df

In [3]:
# Scrape draft combine data from NBA.com
def FetchCombineAnthroTables(urls, years):
    # Create a headless Firefox browser instance
    opt = FirefoxOptions()
    opt.add_argument("--headless")
    driver = webdriver.Firefox(options=opt)
    
    arr = []
    for i,url in enumerate(urls):
        year = years[i]
        print("Fetching NBA Combine measurements from Year", year, "...")
        
        driver.get(url)
        wait = WebDriverWait(driver, 30)
        wait.until(EC.presence_of_element_located((By.XPATH, "//*[@class='nba-stat-table__overflow']//table/tbody/tr")))
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
        results = driver.find_elements_by_xpath("//*[@class='nba-stat-table__overflow']/table/tbody/tr")
    
        counter = 0
        for result in results:
            item = result.text
            data = item.split()
            #if counter == 0:
            #    print(data)
            last_str = ''.join(data[-7:])
            if '-' in last_str:
                continue
            if len(data) != 13 and len(data) != 15:
                continue
            if len(data) == 15:
                #print("Deleting", data[-7])
                del data[-7]
                #print("Deleting", data[-6])
                del data[-6]
            #print(' '.join(data[0:-11]))
            data[0:-11] = [' '.join(data[0:-11])]
            del data[1]
            data = [s.strip('%') for s in data]
            data = [s.strip('\'') for s in data]
            data[4:] = [float(f) for f in data[4:]]
            data[4:5] = [data[4]*12.+data[5]]
            del data[5]
            data[5:6] = [data[5]*12.+data[6]]
            del data[6]
            data[7:8] = [data[7]*12.+data[8]]
            del data[8]
            data.insert(1, int(year))
            arr.append(data)
            counter += 1
            #print(data)
        
        print("Fetched stats for", counter, "NBA draft players.")
        time.sleep(1)
        
    time.sleep(1)
    driver.quit()
    #print(arr)
    return np.array(arr)

In [4]:
# Establish the years for which we want to fetch NBA Draft Combine player measurements
ya = [str(n).zfill(2) for n in range(0, 20)]
yb = [str(n).zfill(2) for n in range(1, 21)]
years = [int("20"+y) for y in yb]

In [5]:
# Create URLs for the available years of NBA Combine data, fetch the data in 2D array format,
# put into a Pandas dataframe, and store the data in a .csv file format
#urls = [ 'https://stats.nba.com/draft/combine-anthro/?SeasonYear=2006-07' ]
urls = [ "https://stats.nba.com/draft/combine-anthro/?SeasonYear=20{0}-{1}".format(ya[i], yb[i]) for i in range(len(ya)) ]
#print(urls)
np_arr = FetchCombineAnthroTables(urls, years)
#print(np_arr)
df = pd.DataFrame(np_arr, columns=['name', 'draft_year', 'BFP', 'hand_length', 'hand_width', 'height', 'reach', 'weight', 'wingspan'])
print(df)

Fetching NBA Combine measurements from Year 2001 ...
Fetched stats for 64 NBA draft players.
Fetching NBA Combine measurements from Year 2002 ...
Fetched stats for 78 NBA draft players.
Fetching NBA Combine measurements from Year 2003 ...
Fetched stats for 81 NBA draft players.
Fetching NBA Combine measurements from Year 2004 ...
Fetched stats for 78 NBA draft players.
Fetching NBA Combine measurements from Year 2005 ...
Fetched stats for 79 NBA draft players.
Fetching NBA Combine measurements from Year 2006 ...
Fetched stats for 78 NBA draft players.
Fetching NBA Combine measurements from Year 2007 ...
Fetched stats for 76 NBA draft players.
Fetching NBA Combine measurements from Year 2008 ...
Fetched stats for 76 NBA draft players.
Fetching NBA Combine measurements from Year 2009 ...
Fetched stats for 74 NBA draft players.
Fetching NBA Combine measurements from Year 2010 ...
Fetched stats for 46 NBA draft players.
Fetching NBA Combine measurements from Year 2011 ...
Fetched stats for

In [6]:
df.to_csv("NBACombineStats.csv")