In [175]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [176]:
import warnings
warnings.filterwarnings('ignore')

# Final Project: NBA Quick Stats

## Initial Data Scraping

In [177]:
def compute_summary_stats(year):
    url = f"https://www.basketball-reference.com/leagues/NBA_{year}.html"
    tables = pd.read_html(url)
    
    east = tables[0].iloc[:,[0, 3]]
    east.rename(columns={'Eastern Conference': 'Team'}, inplace=True)
    
    west = tables[1].iloc[:,[0, 3]]
    west.rename(columns={'Western Conference': 'Team'}, inplace=True)
    
    all_teams = [east, west]
    all_teams = pd.concat(all_teams)
    all_teams = all_teams.reset_index()
    all_teams.drop('index', axis=1, inplace=True)
    
    if year >= 2016:
        df_total = tables[4].sort_values('PTS')
        df_total = df_total.merge(all_teams).sort_values('W/L%')
        df_total['Year'] = year
        df_total['3P_Rate'] = df_total['3PA'] / df_total['FGA']
        return pd.DataFrame(df_total.sort_values('W/L%', ascending=False))
    else:
        df_total = tables[2].sort_values('PTS')
        df_total = df_total.merge(all_teams).sort_values('W/L%')
        df_total['Year'] = year
        df_total['3P_Rate'] = df_total['3PA'] / df_total['FGA']
        return pd.DataFrame(df_total.sort_values('W/L%', ascending=False))

In [178]:
nba_data = []

for year in range(2012, 2022):
    print(f"Working on year {year}")
    nba_data.append(compute_summary_stats(year))
    
nba_data = pd.concat(nba_data)
nba_data

Working on year 2012
Working on year 2013
Working on year 2014
Working on year 2015
Working on year 2016
Working on year 2017
Working on year 2018
Working on year 2019
Working on year 2020
Working on year 2021


Unnamed: 0,Rk,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,...,TRB,AST,STL,BLK,TOV,PF,PTS,W/L%,Year,3P_Rate
12,18.0,Chicago Bulls*,66,241.5,37.4,82.8,0.452,6.3,16.9,0.375,...,46.7,23.1,6.9,5.9,14.0,17.3,96.3,.758,2012,0.204106
28,2.0,San Antonio Spurs*,66,241.5,39.6,82.8,0.478,8.4,21.3,0.393,...,43.0,23.2,7.4,4.4,13.6,17.3,103.7,.758,2012,0.257246
27,3.0,Oklahoma City Thunder*,66,242.3,37.3,79.2,0.471,7.2,20.0,0.358,...,43.7,18.5,7.5,8.2,16.3,20.5,103.1,.712,2012,0.252525
23,7.0,Miami Heat*,66,243.0,37.1,79.0,0.469,5.6,15.6,0.359,...,41.6,20.0,8.9,5.4,15.2,19.4,98.5,.697,2012,0.197468
17,13.0,Indiana Pacers*,66,242.3,35.7,81.4,0.438,5.9,16.1,0.368,...,43.9,18.6,7.9,5.4,14.0,21.7,97.7,.636,2012,0.197789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2,28.0,Oklahoma City Thunder,72,241.0,38.8,88.0,0.441,11.9,35.1,0.339,...,45.6,22.1,7.0,4.4,16.1,18.1,105.0,0.306,2021,0.398864
0,30.0,Cleveland Cavaliers,72,242.1,38.6,85.8,0.450,10.0,29.7,0.336,...,42.8,23.8,7.8,4.5,15.5,18.2,103.8,0.306,2021,0.346154
1,29.0,Orlando Magic,72,240.7,38.3,89.2,0.429,10.9,31.8,0.343,...,45.4,21.8,6.9,4.4,12.8,17.2,104.0,0.292,2021,0.356502
3,27.0,Detroit Pistons,72,242.1,38.7,85.6,0.452,11.6,32.9,0.351,...,42.7,24.2,7.4,5.2,14.9,20.5,106.6,0.278,2021,0.384346


# Questions

1. How have the basic stats (ie. 3-point FGs attempted, winning percentage, etc.) of NBA teams changed over the past decade? **[Ryan]**

2. Playoff team data vs Non-playoff team data over past decade **[Chris]**

3. What variable has the highest correlation with winning percentage among teams in the top 25% over the past decade? Is there a significant difference with this variable between the top 25% and bottom 25%? **[Jovanni]**

## (2) Playoff Team Data vs. Non-Playoff Team Data

In [179]:
nba_data['Year_Bin'] = pd.cut(nba_data['Year'], [2011.5, 2014.5, 2017.5, 2021.5], 
                              labels=['2012-2014', '2015-2017', '2018-2021'])

playoff_data = nba_data[nba_data['Team'].str.endswith('*')]
playoff_data['Status'] = 'Playoffs'

lottery_data = nba_data[~nba_data['Team'].str.endswith('*')]
lottery_data['Status'] = 'Lottery'

nba_data = pd.concat([playoff_data, lottery_data])

display(playoff_data.head())
display(lottery_data.head())

Unnamed: 0,Rk,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,...,STL,BLK,TOV,PF,PTS,W/L%,Year,3P_Rate,Year_Bin,Status
12,18.0,Chicago Bulls*,66,241.5,37.4,82.8,0.452,6.3,16.9,0.375,...,6.9,5.9,14.0,17.3,96.3,0.758,2012,0.204106,2012-2014,Playoffs
28,2.0,San Antonio Spurs*,66,241.5,39.6,82.8,0.478,8.4,21.3,0.393,...,7.4,4.4,13.6,17.3,103.7,0.758,2012,0.257246,2012-2014,Playoffs
27,3.0,Oklahoma City Thunder*,66,242.3,37.3,79.2,0.471,7.2,20.0,0.358,...,7.5,8.2,16.3,20.5,103.1,0.712,2012,0.252525,2012-2014,Playoffs
23,7.0,Miami Heat*,66,243.0,37.1,79.0,0.469,5.6,15.6,0.359,...,8.9,5.4,15.2,19.4,98.5,0.697,2012,0.197468,2012-2014,Playoffs
17,13.0,Indiana Pacers*,66,242.3,35.7,81.4,0.438,5.9,16.1,0.368,...,7.9,5.4,14.0,21.7,97.7,0.636,2012,0.197789,2012-2014,Playoffs


Unnamed: 0,Rk,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,...,STL,BLK,TOV,PF,PTS,W/L%,Year,3P_Rate,Year_Bin,Status
21,9.0,Houston Rockets,66,243.4,37.7,84.0,0.449,7.2,20.2,0.359,...,7.5,4.8,14.5,20.4,98.1,0.515,2012,0.240476,2012-2014,Lottery
22,8.0,Phoenix Suns,66,240.0,37.8,82.5,0.458,6.7,19.6,0.343,...,6.5,5.5,14.1,18.7,98.4,0.5,2012,0.237576,2012-2014,Lottery
25,5.0,Milwaukee Bucks,66,240.4,37.9,85.6,0.443,6.6,19.2,0.345,...,8.3,5.1,14.1,19.3,99.0,0.47,2012,0.224299,2012-2014,Lottery
14,16.0,Portland Trail Blazers,66,241.9,36.4,82.1,0.443,7.2,20.9,0.346,...,8.0,4.9,14.2,19.0,97.2,0.424,2012,0.254568,2012-2014,Lottery
20,10.0,Minnesota Timberwolves,66,241.5,35.7,82.3,0.433,7.2,21.6,0.332,...,6.6,4.4,15.2,18.4,97.9,0.394,2012,0.262454,2012-2014,Lottery


In [190]:
focus_cols = ['Team','FGA', 'FG%', '3PA', '3P%', '2PA', '2P%', 'FTA', 'FT%', 'ORB', 'DRB',
            'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PTS', '3P_Rate', 'Year', 'Year_Bin', 'Status']

In [193]:
overall_summary = nba_data[focus_cols].groupby(['Year_Bin', 'Status']).mean()
overall_summary

Unnamed: 0_level_0,Unnamed: 1_level_0,FGA,FG%,3PA,3P%,2PA,2P%,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PTS,3P_Rate,Year
Year_Bin,Status,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2012-2014,Lottery,82.840476,0.443143,19.347619,0.346167,63.495238,0.472476,22.25,0.754238,11.459524,30.471429,41.916667,21.197619,7.511905,4.811905,14.7,96.895238,0.233213,2013.0
2012-2014,Playoffs,81.55,0.4595,20.502083,0.361833,61.047917,0.492542,23.185417,0.754542,10.885417,31.847917,42.7375,22.152083,7.908333,5.13125,14.5,99.845833,0.251577,2013.0
2015-2017,Lottery,84.678571,0.4455,23.261905,0.344762,61.42619,0.483833,22.895238,0.755833,10.604762,32.72619,43.340476,21.657143,7.561905,4.740476,14.495238,100.790476,0.274415,2016.0
2015-2017,Playoffs,84.375,0.459188,25.58125,0.360229,58.791667,0.5025,23.285417,0.763917,10.36875,33.31875,43.6875,22.885417,7.933333,4.904167,14.00625,104.477083,0.302841,2016.0
2018-2021,Lottery,88.351786,0.454571,31.928571,0.353036,56.421429,0.512286,22.225,0.761857,10.05,33.758929,43.803571,24.1125,7.617857,4.691071,14.507143,108.526786,0.361246,2019.5
2018-2021,Playoffs,87.925,0.467891,32.879687,0.366578,55.035938,0.529281,22.58125,0.779641,9.95,35.0625,45.004688,24.367188,7.6625,5.046875,13.903125,111.903125,0.373739,2019.5
