## Scraping NBA Player Statistics

In [1]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd

Set year range to 1985 and 2020

In [2]:
year_range = range(1985, 2020)

Define function to get the NBA Player Statistics for given year

In [3]:
def get_nba_stats(year):
    # Get the source of the page
    url = f'https://www.basketball-reference.com/leagues/NBA_{ year }_per_game.html'
    html = urlopen(url)
    soup = BeautifulSoup(html)
    # Extract all table rows <tr>
    rows = soup.findAll('tr')
    # Extract all but the first table headers <th>
    headers = [th.getText() for th in rows[0].findAll('th')][1:]
    # Extract all but the first table cells <td>
    players = [[td.getText().rstrip('*') for td in row.findAll('td')] for row in rows[1:]]
    # Combine the headers and player data into Pandas DataFrame
    stats = pd.DataFrame([row for row in players if row], columns = headers)
    # Insert the year into the DataFrame
    stats.insert(0, 'Year', year)
    return stats

Get the NBA Player Statistics from each year between the year range and save to Pandas DataFrame

In [4]:
full_stat = pd.DataFrame()
for year in year_range:
    stats = get_nba_stats(year)
    full_stat = pd.concat([full_stat, stats])

Save DataFrame to file

In [5]:
full_stat.to_csv('../data/scraped/NBA_Player_Stats.csv', index = False)