In [1]:
# Import libraries
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time

In [3]:
#2018 NBA Draft URL
url = 'https://www.basketball-reference.com/draft/NBA_2018.html'
rec = requests.get(url)

In [4]:
rec.status_code


200

In [7]:
soup = BeautifulSoup(rec.content, 'lxml')

In [8]:
#setting variable equal to table with all the data
table = soup.find('table', {'class': 'sortable stats_table'})

In [9]:
#creating list of columns for dataframe
#taken from table
columns = [th.text for th in table.find_all('tr')[1].find_all('th')]

In [10]:
#example scrape for one player
[td.text for td in table.find('tbody').find_all('tr')[0].find_all('td')]

['1',
 'PHO',
 'Deandre Ayton',
 'Arizona',
 '4',
 '198',
 '6159',
 '3183',
 '2114',
 '328',
 '.591',
 '.186',
 '.745',
 '31.1',
 '16.1',
 '10.7',
 '1.7',
 '19.9',
 '.155',
 '0.7',
 '4.1']

In [11]:
# list to hold each player and their stats
draft_list = []

# loop through draft by year
for i in range(2008,2020):
    print(f'Scraping {i} draft')
    #url for the draft includes the specific year
    url = f'https://www.basketball-reference.com/draft/NBA_{i}.html'
    rec = requests.get(url)
    
    #running loop is status code is 200
    if rec.status_code == 200:
        soup = BeautifulSoup(rec.content, 'lxml')
        table = soup.find('table', {'class': 'sortable stats_table'})
        
        for player in table.find('tbody').find_all('tr'):
            player_info = [td.text for td in player.find_all('td')]
            draft_list.append(player_info)
        time.sleep(1)
    else:
        print('website error')

Scraping 2008 draft
Scraping 2009 draft
Scraping 2010 draft
Scraping 2011 draft
Scraping 2012 draft
Scraping 2013 draft
Scraping 2014 draft
Scraping 2015 draft
Scraping 2016 draft
Scraping 2017 draft
Scraping 2018 draft
Scraping 2019 draft


In [12]:
#creating dataframe from the scraped data
draft_df = pd.DataFrame(draft_list, columns = columns[1:])

In [13]:
draft_df.head()

Unnamed: 0,Pk,Tm,Player,College,Yrs,G,MP,PTS,TRB,AST,...,3P%,FT%,MP.1,PTS.1,TRB.1,AST.1,WS,WS/48,BPM,VORP
0,1,CHI,Derrick Rose,Memphis,13,672,21317,12230,2239,3646,...,0.316,0.83,31.7,18.2,3.3,5.4,44.2,0.099,1.4,18.2
1,2,MIA,Michael Beasley,Kansas State,11,609,13903,7568,2861,788,...,0.349,0.759,22.8,12.4,4.7,1.3,15.6,0.054,-1.7,0.9
2,3,MIN,O.J. Mayo,USC,8,547,16919,7574,1706,1607,...,0.373,0.82,30.9,13.8,3.1,2.9,21.8,0.062,-0.9,4.7
3,4,SEA,Russell Westbrook,UCLA,14,972,33797,22426,7187,8306,...,0.306,0.788,34.8,23.1,7.4,8.5,106.3,0.151,4.5,55.5
4,5,MEM,Kevin Love,UCLA,14,760,23861,13563,8287,1801,...,0.372,0.83,31.4,17.8,10.9,2.4,84.9,0.171,3.3,32.1


In [14]:
#dropping nulls
draft_df.dropna(inplace = True)

In [15]:
#checking for null values
draft_df.isnull().sum().sum()

0

In [16]:
draft_df.shape

(720, 21)

In [17]:
#saving file as csv to access in other notebooks
draft_df.to_csv('draftpicks.csv')