# Web Scrapper to Pull NBA Standings from Basketball Reference

### Year (NBA Season)

In [1]:
#Specify the year in which to pull data. For example, the 2019-2020 season should be specified as 2020
year = 2020

### Import Libraries and Configure URL

In [2]:
#Import libraries
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import string

In [3]:
#URL
url = "https://www.basketball-reference.com/leagues/NBA_{}_standings.html".format(year)

#This is the HTML from the given URL
html = urlopen(url, timeout=10)
soup = BeautifulSoup(html)

### Eastern Conference

In [4]:
#Use findALL() to get the column headers
soup.findAll('tr', limit=2)

#Use getText()to extract the text we need into a list
ec_headers = [th.getText() for th in soup.findAll('tr', limit=2)[0].findAll('th')]
ec_headers = ec_headers[0:8]
ec_headers

['Eastern Conference', 'W', 'L', 'W/L%', 'GB', 'PS/G', 'PA/G', 'SRS']

In [5]:
#Create rows
ec_rows = soup.findAll('tr')[2:17]
ec_standings = [[tr.getText() for tr in ec_rows[i].findAll(['th','td'])]
            for i in range(len(ec_rows))]                         

In [6]:
#Create dataframe
ec = pd.DataFrame(ec_standings, columns=ec_headers)

In [7]:
ec.columns = ['Team','W','L','WL_pct','GB','PPG','OPPG','SRS']
ec['Conference'] = 'Eastern'
ec['Rank'] = ec.index+1
ec['Playoff_Team'] = np.where(ec['Rank'] <=8, 1,0)
ec['Year'] = year
ec['GB'] = np.where(ec['GB']=='—', 0, ec['GB'])
ec['GB'] = np.where(ec['GB']=='-', 0, ec['GB'])

In [8]:
#Clean invalid characters
invalidchar = string.punctuation
ec['Team'] = ec.Team.str.strip(invalidchar)

### Western Conference

In [9]:
#Use findALL() to get the column headers
soup.findAll('tr', limit=2)

#Use getText()to extract the text we need into a list
wc_headers = [th.getText() for th in soup.findAll('tr', limit=2)[0].findAll('th')]
wc_headers = wc_headers[23:31]
wc_headers

['Western Conference', 'W', 'L', 'W/L%', 'GB', 'PS/G', 'PA/G', 'SRS']

In [10]:
#Create rows
wc_rows = soup.findAll('tr')[18:33]
wc_standings = [[tr.getText() for tr in wc_rows[i].findAll(['th','td'])]
            for i in range(len(wc_rows))]     

In [11]:
#Create dataframe
wc = pd.DataFrame(wc_standings, columns=wc_headers)

In [12]:
wc.columns = ['Team','W','L','WL_pct','GB','PPG','OPPG','SRS']
wc['Conference'] = 'Western'
wc['Rank'] = wc.index+1
wc['Playoff_Team'] = np.where(wc['Rank'] <=8, 1,0)
wc['Year'] = year
wc['GB'] = np.where(wc['GB']=='—', 0, ec['GB'])
wc['GB'] = np.where(wc['GB']=='-', 0, ec['GB'])

In [13]:
#Clean invalid characters
wc['Team'] = wc.Team.str.strip(invalidchar)

In [14]:
ec

Unnamed: 0,Team,W,L,WL_pct,GB,PPG,OPPG,SRS,Conference,Rank,Playoff_Team,Year
0,Milwaukee Bucks,56,17,0.767,0.0,118.7,108.6,9.41,Eastern,1,1,2020
1,Toronto Raptors,53,19,0.736,2.5,112.8,106.5,5.97,Eastern,2,1,2020
2,Boston Celtics,48,24,0.667,7.5,113.7,107.3,5.83,Eastern,3,1,2020
3,Indiana Pacers,45,28,0.616,11.0,109.4,107.5,1.63,Eastern,4,1,2020
4,Miami Heat,44,29,0.603,12.0,112.0,109.1,2.59,Eastern,5,1,2020
5,Philadelphia 76ers,43,30,0.589,13.0,110.7,108.4,2.25,Eastern,6,1,2020
6,Brooklyn Nets,35,37,0.486,20.5,111.8,112.3,-1.01,Eastern,7,1,2020
7,Orlando Magic,33,40,0.452,23.0,107.3,108.3,-0.93,Eastern,8,1,2020
8,Charlotte Hornets,23,42,0.354,29.0,102.9,109.6,-7.03,Eastern,9,0,2020
9,Washington Wizards,25,47,0.347,30.5,114.4,119.1,-5.24,Eastern,10,0,2020


In [15]:
wc

Unnamed: 0,Team,W,L,WL_pct,GB,PPG,OPPG,SRS,Conference,Rank,Playoff_Team,Year
0,Los Angeles Lakers,52,19,0.732,0.0,113.4,107.6,6.28,Western,1,1,2020
1,Los Angeles Clippers,49,23,0.681,2.5,116.3,109.9,6.66,Western,2,1,2020
2,Denver Nuggets,46,27,0.63,7.5,111.3,109.2,2.35,Western,3,1,2020
3,Houston Rockets,44,28,0.611,11.0,117.8,114.8,3.13,Western,4,1,2020
4,Oklahoma City Thunder,44,28,0.611,12.0,110.4,108.4,2.33,Western,5,1,2020
5,Utah Jazz,44,28,0.611,13.0,111.3,108.8,2.52,Western,6,1,2020
6,Dallas Mavericks,43,32,0.573,20.5,117.0,112.1,4.87,Western,7,1,2020
7,Portland Trail Blazers,35,39,0.473,23.0,115.0,116.1,-0.61,Western,8,1,2020
8,Memphis Grizzlies,34,39,0.466,29.0,112.6,113.7,-0.91,Western,9,0,2020
9,Phoenix Suns,34,39,0.466,30.5,113.6,113.4,0.56,Western,10,0,2020
