In [96]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [97]:
import warnings
warnings.filterwarnings('ignore')

# Final Project: NBA Quick Stats

In [116]:
def compute_summary_stats(year):
    url = f"https://www.basketball-reference.com/leagues/NBA_{year}.html"
    tables = pd.read_html(url)
    
    east = tables[0].iloc[:,[0, 3]]
    east.rename(columns={'Eastern Conference': 'Team'}, inplace=True)
    
    west = tables[1].iloc[:,[0, 3]]
    west.rename(columns={'Western Conference': 'Team'}, inplace=True)
    
    all_teams = [east, west]
    all_teams = pd.concat(all_teams)
    all_teams = all_teams.reset_index()
    all_teams.drop('index', axis=1, inplace=True)
    
    if year >= 2016:
        df_total = tables[4].sort_values('PTS')
        df_total = df_total.merge(all_teams).sort_values('W/L%')
        df_total['Year'] = year
        return pd.DataFrame(df_total.sort_values('W/L%', ascending=False))
    else:
        df_total = tables[2].sort_values('PTS')
        df_total = df_total.merge(all_teams).sort_values('W/L%')
        df_total['Year'] = year
        return pd.DataFrame(df_total.sort_values('W/L%', ascending=False))

In [118]:
result = compute_summary_stats(2020)
result.head()

Unnamed: 0,Rk,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,W/L%,Year
29,1.0,Milwaukee Bucks*,73,241.0,43.3,90.9,0.476,13.8,38.9,0.355,...,42.2,51.7,25.9,7.2,5.9,15.1,19.6,118.7,0.767,2020
17,13.0,Toronto Raptors*,72,241.4,40.2,87.9,0.458,13.8,37.0,0.374,...,35.9,45.4,25.2,8.8,5.0,14.8,21.7,112.8,0.736,2020
19,11.0,Los Angeles Lakers*,71,240.7,42.3,88.3,0.48,11.0,31.6,0.349,...,35.1,45.7,25.4,8.6,6.6,15.2,20.7,113.4,0.732,2020
26,4.0,Los Angeles Clippers*,72,241.4,41.6,89.2,0.466,12.4,33.5,0.371,...,37.0,47.7,23.7,7.1,4.7,14.6,22.1,116.3,0.681,2020
21,9.0,Boston Celtics*,72,242.1,41.3,89.6,0.461,12.6,34.5,0.364,...,35.4,46.1,23.0,8.3,5.6,13.8,21.6,113.7,0.667,2020


In [120]:
nba_data = []

for year in range(2012, 2022):
    print(f"Working on year {year}")
    nba_data.append(compute_summary_stats(year))
    
nba_data = pd.concat(nba_data)
nba_data

Working on year 2012
Working on year 2013
Working on year 2014
Working on year 2015
Working on year 2016
Working on year 2017
Working on year 2018
Working on year 2019
Working on year 2020
Working on year 2021


Unnamed: 0,Rk,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,W/L%,Year
12,18.0,Chicago Bulls*,66,241.5,37.4,82.8,0.452,6.3,16.9,0.375,...,32.8,46.7,23.1,6.9,5.9,14.0,17.3,96.3,.758,2012
28,2.0,San Antonio Spurs*,66,241.5,39.6,82.8,0.478,8.4,21.3,0.393,...,32.6,43.0,23.2,7.4,4.4,13.6,17.3,103.7,.758,2012
27,3.0,Oklahoma City Thunder*,66,242.3,37.3,79.2,0.471,7.2,20.0,0.358,...,32.7,43.7,18.5,7.5,8.2,16.3,20.5,103.1,.712,2012
23,7.0,Miami Heat*,66,243.0,37.1,79.0,0.469,5.6,15.6,0.359,...,31.2,41.6,20.0,8.9,5.4,15.2,19.4,98.5,.697,2012
17,13.0,Indiana Pacers*,66,242.3,35.7,81.4,0.438,5.9,16.1,0.368,...,31.4,43.9,18.6,7.9,5.4,14.0,21.7,97.7,.636,2012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2,28.0,Oklahoma City Thunder,72,241.0,38.8,88.0,0.441,11.9,35.1,0.339,...,35.7,45.6,22.1,7.0,4.4,16.1,18.1,105.0,0.306,2021
0,30.0,Cleveland Cavaliers,72,242.1,38.6,85.8,0.450,10.0,29.7,0.336,...,32.3,42.8,23.8,7.8,4.5,15.5,18.2,103.8,0.306,2021
1,29.0,Orlando Magic,72,240.7,38.3,89.2,0.429,10.9,31.8,0.343,...,35.1,45.4,21.8,6.9,4.4,12.8,17.2,104.0,0.292,2021
3,27.0,Detroit Pistons,72,242.1,38.7,85.6,0.452,11.6,32.9,0.351,...,33.1,42.7,24.2,7.4,5.2,14.9,20.5,106.6,0.278,2021


Questions

(1) How have the basic stats (ie. 3-point FGs attempted, winning percentage, etc.) of NBA teams changed over the past decade?
Ryan
(2) Playoff team data vs Non-playoff team data over past decade
Chris
(3) What variable has the highest correlation with winning percentage among teams in the top 25% over the past decade? Is there a significant difference with this variable between the top 25% and bottom 25%?
Jovanni