# What Makes an NBA Champion?

#### Daniel Abboudi and Sean Campi
##### Data Bootcamp, NYU Stern 4/25/2021

In [1]:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from sklearn.manifold import TSNE
import plotly.express as px

Data is from [NBA.com](https://www.nba.com/stats/teams/)
<br>
Includes Advanced Stats, Traditional Stats (standardized over 100 possessions), and Opponent Stats (standardized over 100 possessions)
<br>
<br>
*Note: Unfortunately, NBA.com does not allow for scraping or downloading of their data. We copy and pasted the relevant data into three excel files and uploaded them to the project's github repository*

In [95]:
# Import the data sets
adv = pd.read_excel('https://github.com/danielabboudi/DB_Project/raw/main/NBA_Advanced.xlsx')
trad = pd.read_excel('https://github.com/danielabboudi/DB_Project/raw/main/NBA_Traditional_per100.xlsx')
opp = pd.read_excel('https://github.com/danielabboudi/DB_Project/raw/main/NBA_Opponent_per100.xlsx')

In [134]:
# Merge the data sets
merge1 = pd.merge(trad,adv,how='left',left_on=['Season','TEAM'],right_on=['Season','TEAM'])
df = pd.merge(merge1,opp,how='left',left_on=['Season','TEAM'],right_on=['Season','TEAM'])

Unnamed: 0,Season,TEAM,GP_x,W_x,L_x,WIN%,MIN_x,PTS,FGM,FGA,...,OPPREB,OPPAST,OPPTOV,OPPSTL,OPPBLK,OPPBLKA,OPPPF,OPPPFD,OPPPTS,OPP +/-
0,2001,San Antonio Spurs,82,58,24,0.707,52.8,105.0,38.4,83.3,...,45.2,21.5,14.7,8.0,5.7,7.7,25.9,20.6,96.5,-8.5
1,2001,Los Angeles Lakers,82,56,26,0.683,51.6,107.0,40.3,86.7,...,43.9,21.5,13.9,8.0,4.2,6.4,26.0,24.3,103.4,-3.6
2,2001,Philadelphia 76ers,82,56,26,0.683,52.1,101.9,38.1,85.1,...,44.0,23.2,16.6,8.8,6.0,5.4,25.4,22.0,97.3,-4.6
3,2001,Sacramento Kings,82,55,27,0.671,50.1,104.2,39.1,87.0,...,47.0,22.7,16.7,8.3,5.9,5.4,22.0,19.9,98.2,-6.0
4,2001,Dallas Mavericks,82,53,29,0.646,50.8,105.6,39.5,86.1,...,47.3,22.8,16.3,8.0,4.9,6.3,23.7,24.4,101.1,-4.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
621,2021,Oklahoma City Thunder,60,20,40,0.333,47.4,103.6,38.2,86.0,...,46.2,24.9,12.8,8.9,5.0,4.4,18.0,17.8,112.6,9.3
622,2021,Orlando Magic,59,18,41,0.305,48.5,105.0,38.8,90.2,...,47.2,26.2,13.2,7.6,5.3,4.3,18.6,17.0,113.7,8.5
623,2021,Detroit Pistons,61,18,43,0.295,48.8,108.2,38.9,86.6,...,44.6,24.8,14.4,8.0,5.9,5.2,20.8,20.9,111.9,3.8
624,2021,Minnesota Timberwolves,61,17,44,0.279,47.2,108.0,39.3,88.5,...,45.0,26.1,14.8,7.5,5.5,5.4,19.2,20.3,115.0,6.9


In [136]:
# Convert percentages into decimals
df['FG%'] = df['FG%']/100
df['3P%'] = df['3P%']/100
df['FT%'] = df['FT%']/100
df['OREB%'] = df['OREB%']/100
df['DREB%'] = df['DREB%']/100
df['TOV%'] = df['TOV%']/100
df['TS%'] = df['TS%']/100
df['STL'] = df['STL']/100
df['BLK'] = df['BLK']/100

df['OPPFG%'] = df['OPPFG%']/100
df['OPP3P%'] = df['OPP3P%']
df['OPPFT%'] = df['OPPFT%']/100
df['OPPTOV'] = df['OPPTOV']/100
df['OPPSTL'] = df['OPPSTL']/100
df['OPPBLK'] = df['OPPBLK']/100

In [137]:
# Calculate new columns for additional metrics
df['FTR'] = df['FTA']/df['FGA']
df['3PFREQ'] = df['3PA']/df['FGA']
df['OPPTS%'] = df['OPPPTS']/(2*(df['OPPFGA']+0.44*df['OPPFTA']))
df['OPPFTR'] = df['OPPFTA']/df['OPPFGA']
df['OPP3PFREQ'] = df['OPP3PA']/df['OPPFGA']
df['OPPOREB%'] = 1-df['DREB%']
df['OPPDREB%'] = 1-df['OREB%']
df['OPPAST/TO'] = df['OPPAST']/(df['OPPTOV']*100)

In [140]:
# Select the columns of data we want to use
remove_col = ['GP_x','W_x','L_x','MIN_x','AST RATIO','EFG%','REB%','AST%','PIE','POSS','GP_y','W_y','L_y','MIN_y','PTS','FGM','3PM',
             'FTM','OREB','DREB','REB','AST','TOV','BLKA','PF','PFD','+/-','Unnamed: 28','GP','W','L','MIN','OPPFGM',
             'OPP3PM','OPPFTM','OPPOREB','OPPDREB','OPPREB','OPPAST','OPPBLKA','OPPPF','OPPPFD','OPPPTS','OPP +/-',
             'FGA','3PA','FTA','OPPFGA','OPP3PA','OPPFTA']

df = df.drop(remove_col,axis=1)
df

Unnamed: 0,Season,TEAM,WIN%,FG%,3P%,FT%,STL,BLK,OFFRTG,DEFRTG,...,OPPSTL,OPPBLK,FTR,3PFREQ,OPPTS%,OPPFTR,OPP3PFREQ,OPPOREB%,OPPDREB%,OPPAST/TO
0,2001,San Antonio Spurs,0.707,0.461,0.407,0.715,0.076,0.077,105.0,96.6,...,0.080,0.057,0.373349,0.175270,0.483351,0.245283,0.154273,0.297,0.703,1.462585
1,2001,Los Angeles Lakers,0.683,0.465,0.344,0.683,0.073,0.064,107.0,103.6,...,0.080,0.042,0.349481,0.190311,0.514653,0.309955,0.170814,0.309,0.662,1.546763
2,2001,Philadelphia 76ers,0.683,0.447,0.326,0.745,0.091,0.054,101.9,97.3,...,0.088,0.060,0.351351,0.123384,0.499015,0.248009,0.186576,0.296,0.666,1.397590
3,2001,Sacramento Kings,0.671,0.449,0.354,0.771,0.099,0.054,104.2,98.5,...,0.083,0.059,0.297701,0.194253,0.498174,0.241302,0.158249,0.321,0.693,1.359281
4,2001,Dallas Mavericks,0.646,0.459,0.381,0.794,0.079,0.063,105.6,101.2,...,0.080,0.049,0.290360,0.202091,0.513803,0.333333,0.170163,0.322,0.728,1.398773
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
621,2021,Oklahoma City Thunder,0.333,0.444,0.346,0.732,0.068,0.045,103.6,112.8,...,0.089,0.050,0.238372,0.408140,0.571040,0.214206,0.400666,0.260,0.753,1.945312
622,2021,Orlando Magic,0.305,0.430,0.346,0.775,0.071,0.044,105.0,113.3,...,0.076,0.053,0.230599,0.362528,0.581526,0.212528,0.413870,0.251,0.749,1.984848
623,2021,Detroit Pistons,0.295,0.450,0.355,0.769,0.076,0.051,108.2,112.3,...,0.080,0.059,0.277136,0.386836,0.574506,0.259725,0.358124,0.276,0.726,1.722222
624,2021,Minnesota Timberwolves,0.279,0.444,0.350,0.772,0.086,0.054,108.0,114.9,...,0.075,0.055,0.247458,0.406780,0.591102,0.262615,0.373853,0.285,0.733,1.763514


In [141]:
# Create a dictionary of NBA Champions
champions = {2001: 'Los Angeles Lakers',
             2002: 'Los Angeles Lakers',
             2003: 'San Antonio Spurs',
             2004: 'Detroit Pistons',
             2005: 'San Antonio Spurs',
             2006: 'Miami Heat',
             2007: 'San Antonio Spurs',
             2008: 'Boston Celtics',
             2009: 'Los Angeles Lakers',
             2010: 'Los Angeles Lakers',
             2011: 'Dallas Mavericks',
             2012: 'Miami Heat',
             2013: 'Miami Heat',
             2014: 'San Antonio Spurs',
             2015: 'Golden State Warriors',
             2016: 'Cleveland Cavaliers',
             2017: 'Golden State Warriors',
             2018: 'Golden State Warriors',
             2019: 'Toronto Raptors',
             2020: 'Los Angeles Lakers'}

In [142]:
# Pull Champions from our Dictionary
df['Champion'] = 0
for i in range(0,len(df),1):
    if df['Season'][i] == 2021:
        df['Champion'][i] = 0 
    elif df['TEAM'][i] == champions[df['Season'][i]]:
        df['Champion'][i] = 1
    else:
        df['Champion'][i] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Champion'][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Champion'][i] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Champion'][i] = 0


Unnamed: 0,Season,TEAM,WIN%,FG%,3P%,FT%,STL,BLK,OFFRTG,DEFRTG,...,OPPBLK,FTR,3PFREQ,OPPTS%,OPPFTR,OPP3PFREQ,OPPOREB%,OPPDREB%,OPPAST/TO,Champion
0,2001,San Antonio Spurs,0.707,0.461,0.407,0.715,0.076,0.077,105.0,96.6,...,0.057,0.373349,0.175270,0.483351,0.245283,0.154273,0.297,0.703,1.462585,0
1,2001,Los Angeles Lakers,0.683,0.465,0.344,0.683,0.073,0.064,107.0,103.6,...,0.042,0.349481,0.190311,0.514653,0.309955,0.170814,0.309,0.662,1.546763,1
2,2001,Philadelphia 76ers,0.683,0.447,0.326,0.745,0.091,0.054,101.9,97.3,...,0.060,0.351351,0.123384,0.499015,0.248009,0.186576,0.296,0.666,1.397590,0
3,2001,Sacramento Kings,0.671,0.449,0.354,0.771,0.099,0.054,104.2,98.5,...,0.059,0.297701,0.194253,0.498174,0.241302,0.158249,0.321,0.693,1.359281,0
4,2001,Dallas Mavericks,0.646,0.459,0.381,0.794,0.079,0.063,105.6,101.2,...,0.049,0.290360,0.202091,0.513803,0.333333,0.170163,0.322,0.728,1.398773,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
621,2021,Oklahoma City Thunder,0.333,0.444,0.346,0.732,0.068,0.045,103.6,112.8,...,0.050,0.238372,0.408140,0.571040,0.214206,0.400666,0.260,0.753,1.945312,0
622,2021,Orlando Magic,0.305,0.430,0.346,0.775,0.071,0.044,105.0,113.3,...,0.053,0.230599,0.362528,0.581526,0.212528,0.413870,0.251,0.749,1.984848,0
623,2021,Detroit Pistons,0.295,0.450,0.355,0.769,0.076,0.051,108.2,112.3,...,0.059,0.277136,0.386836,0.574506,0.259725,0.358124,0.276,0.726,1.722222,0
624,2021,Minnesota Timberwolves,0.279,0.444,0.350,0.772,0.086,0.054,108.0,114.9,...,0.055,0.247458,0.406780,0.591102,0.262615,0.373853,0.285,0.733,1.763514,0


In [144]:
df[df['Champion']==1][['Season','TEAM']]

Unnamed: 0,Season,TEAM
1,2001,Los Angeles Lakers
30,2002,Los Angeles Lakers
59,2003,San Antonio Spurs
92,2004,Detroit Pistons
118,2005,San Antonio Spurs
150,2006,Miami Heat
178,2007,San Antonio Spurs
206,2008,Boston Celtics
237,2009,Los Angeles Lakers
268,2010,Los Angeles Lakers
