In [212]:
# imports and setup

import scipy as sc
from scipy.stats import norm

import pandas as pd
import numpy as np
import statsmodels.formula.api as sm
from sklearn import linear_model

import matplotlib.pyplot as plt
%matplotlib inline  
plt.rcParams['figure.figsize'] = (10, 6)


In [213]:
# Open Dataframe
df_team = pd.read_csv('Data/Team-stats/stats-00-21')

# Do some initial cleaning to the data
# drop the unnamed column
df_team = df_team.drop(df_team.columns[0], 1)

# Handle changes in team names
df_team.loc[df_team['TEAM'] == 'Vancouver Grizzlies', 'TEAM'] = 'Memphis Grizzlies'
df_team.loc[df_team['TEAM'] == 'Charlotte Bobcats', 'TEAM'] = 'Charlotte Hornets'
df_team.loc[df_team['TEAM'] == 'New Orleans Hornets', 'TEAM'] = 'New Orleans Pelicans'
df_team.loc[df_team['TEAM'] == 'New Orleans/Oklahoma City Hornets', 'TEAM'] = 'New Orleans Pelicans'
df_team.loc[df_team['TEAM'] == 'New Jersey Nets', 'TEAM'] = 'Brooklyn Nets'
df_team.loc[df_team['TEAM'] == 'Seattle SuperSonics', 'TEAM'] = 'Oklahoma City Thunder'

# df_team.loc[df_team['SEASON'] == '2009-10'].tail(30)

In [214]:
# add some categorical columns to indicate NBA champions (1 or 0) *data from wikipedia
dict_champs = { 
    'Los Angeles Lakers': ['2000-01', '2001-02', '2008-09', '2009-10', '2019-20'],
    'Boston Celtics': ['2007-08'],
    'Golden State Warriors': ['2014-15', '2016-17', '2017-18'],
    'San Antonio Spurs': ['2002-03', '2004-05', '2006-07', '2013-14'],
    'Detroit Pistons': ['2003-04'],
    'Miami Heat': ['2005-06', '2011-12', '2012-13'],
    'Cleveland Cavaliers': ['2015-16'],
    'Dallas Mavericks':  ['2010-11'],
    'Toronto Raptors': ['2018-19']
}

# Initiliaze categorical vars we are abouot to update

df_team['Playoffs'] = 0
df_team['Conf Semi'] = 0 
df_team['Conf Final'] = 0
df_team['Finals'] = 0
df_team['Champs'] = 0


# Update Champs column with data from above dict *Note that to be a champ infers all other playoff appearances, update all
for key in dict_champs:
    for champ in dict_champs[key]:
        # identify the row of the data frame that matches the team and season
        df_team.loc[(df_team['TEAM'] == key) & (df_team['SEASON'] == champ), 'Champs'] = 1
        df_team.loc[(df_team['TEAM'] == key) & (df_team['SEASON'] == champ), 'Finals'] = 1
        df_team.loc[(df_team['TEAM'] == key) & (df_team['SEASON'] == champ), 'Conf Final'] = 1
        df_team.loc[(df_team['TEAM'] == key) & (df_team['SEASON'] == champ), 'Conf Semi'] = 1
        df_team.loc[(df_team['TEAM'] == key) & (df_team['SEASON'] == champ), 'Playoffs'] = 1
# df_team[df_team['SEASON'] == '2019-20'].head(30)

In [215]:
# Finals appearance (1 or 0) *data from wikipedia
dict_lost_finals = { 
    'Los Angeles Lakers': ['2003-04', '2007-08'],
    'Boston Celtics': ['2009-10'],
    'Golden State Warriors': ['2015-16', '2018-19'],
    'San Antonio Spurs': ['2012-13'],
    'Philadelphia 76ers': ['2000-01'],
    'Detroit Pistons': ['2004-05'],
    'Miami Heat': ['2010-11', '2013-14', '2019-2020'],
    'Cleveland Cavaliers': ['2006-07', '2014-15', '2016-17', '2017-18'],
    'Oklahoma City Thunder': ['2011-2012'],
    'Dallas Mavericks':  ['2005-06'],
    'Brooklyn Nets': ['2001-02', '2002-03'],
    'Orlando Magic': ['2008-09']
}

# Write above dict into team stats data
for key in dict_lost_finals:
    for champ in dict_lost_finals[key]:
        # identify the row of the data frame that matches the team and season
        df_team.loc[(df_team['TEAM'] == key) & (df_team['SEASON'] == champ), 'Finals'] = 1
# df_team.tail(15)

In [218]:
# Conference Finals appearance(1 or 0) 
dict_cf =  {
    'Atlanta Hawks': ['2014-15'],
    'Boston Celtics': ['2019-20', '2017-18', '2016-17', '2011-12', '2009-10', '2007-08', '2001-02'],
    'Brooklyn Nets': ['2002-03', '2001-02'],
    'Charlotte Hornets': [],
    'Chicago Bulls': ['2010-11'],
    'Clevland Cavaliers': ['2017-18', '2016-17', '2015-16', '2014-15', '2008-09', '2006-07'],
    'Dallas Mavericks': ['2010-11', '2005-06', '2002-03'],
    'Denver Nuggets': ['2019-20', '2008-09'],
    'Detroit Pistons': ['2007-08', '2006-07', '2005-06', '2004-05', '2003-04', '2002-03'],
    'Golden State Warriors': ['2018-19', '2017-18', '2016-17', '2015-16', '2014-15'],
    'Houston Rockets': ['2017-18', '2014-15'],
    'Indiana Pacers': ['2013-14', '2012-13', '2003-04'],
    'Los Angeles Clippers': [],
    'Los Angeles Lakers': ['2019-20', '2009-10', '2008-09', '2007-08', '2003-04', '2001-02', '2000-01'],
    'Memphis Gizzlies': ['2012-13'],
    'Miami Heat': ['2019-20', '2013-14', '2012-13', '2011-12', '2010-11', '2005-06'],
    'Milwaukee Bucks': ['2018-19', '2000-01'],
    'Minnesota Timberwolves': ['2003-04'],
    'New Orleans Pelicans': [],
    'New York Knicks': [],
    'Oklahoma City Thunder': ['2015-16', '2013-14', '2012-13', '2011-12', '2010-11', '2004-05'],
    'Orlando Magic': ['2009-10', '2008-09'],
    'Philadelphia 76ers': ['2000-01'],
    'Phoenix Suns': ['2009-10', '2005-06', '2004-05'],
    'Portland Trail Blazers': ['2018-19'],
    'Sacramento Kings': ['2001-02'],
    'San Antonio Spurs': ['2016-17', '2013-14', '2012-13', '2011-12', '2007-08', '2006-07', '2004-05', '2002-03', '2000-01'],
    'Toronto Raptors': ['2018-19', '2015-16'],
    'Utah Jazz': ['2006-07'],
    'Washington Wizards': []   
}

# Write above dict into team stats data
for key in dict_made_playoffs:
    for champ in dict_made_playoffs[key]:
        # identify the row of the data frame that matches the team and season
        df_team.loc[(df_team['TEAM'] == key) & (df_team['SEASON'] == champ), 'Conf Final'] = 1
# df_team[df_team['SEASON'] == '2019-20'].head(30)

In [249]:
# Helper function to format data properly
def num_to_season_format(list_of_nums):
    list_out = [0]*len(list_of_nums)
    for i, num in enumerate(list_of_nums):
        last_digits = num % 100
        if last_digits < 10:
            list_out[i] = str(num - 1) + '-' + '0' + str(last_digits)
        else: 
            list_out[i] = str(num - 1) + '-' + str(last_digits)
    return list_out
list_in = [2017, 2015, 2014, 2005]
l = num_to_season_format(list_in)
print(l)

['2016-17', '2014-15', '2013-14', '2004-05']


In [250]:
# Conference Semifinals appearance(1 or 0) 
dict_scf =  {
    'Atlanta Hawks': ['2015-16', '2014-15', '2010-11', '2009-10', '2008-09'],
    'Boston Celtics': ['2019-20', '2018-19', '2017-18', '2016-17', '2011-12', '2010-11', '2009-10', '2008-09', '2007-08', '2002-03', '2001-02'], 
    'Brooklyn Nets': ['2013-14', '2006-07', '2005-06', '2003-04', '2002-03', '2001-02'],
    'Charlotte Hornets': ['2001-02', '2000-01'],
    'Chicago Bulls': ['2014-15', '2012-13', '2010-11', '2006-07'],
    'Clevland Cavaliers': ['2017-18', '2016-17', '2015-16', '2014-15', '2009-10', '2008-09', '2007-08', '2006-07', '2005-06'],
    'Dallas Mavericks': ['2010-11', '2008-09', '2005-06', '2004-05', '2002-03', '2001-02', '2000-01'],
    'Denver Nuggets': ['2019-20', '2018-19', '2008-09'],
    'Detroit Pistons': ['2007-08', '2006-07', '2005-06', '2004-05', '2003-04', '2002-03', '2001-02'], 
    'Golden State Warriors': ['2018-19', '2017-18', '2016-17', '2015-16', '2014-15', '2012-13', '2006-07'], 
    'Houston Rockets': ['2019-20', '2018-19', '2017-18', '2016-17', '2014-15', '2008-09'], 
    'Indiana Pacers': ['2013-14', '2012-13', '2011-12', '2004-05', '2003-04'],
    'Los Angeles Clippers': ['2019-20', '2014-15', '2013-14', '2011-12', '2005-06'],
    'Los Angeles Lakers': ['2019-20', '2011-12', '2010-11', '2009-10', '2008-09', '2007-08', '2003-04', '2002-03', '2001-02', '2000-01'],
    'Memphis Gizzlies': ['2014-15', '2012-13', '2010-11'],
    'Miami Heat': ['2019-20', '2015-16', '2013-14', '2012-13', '2011-12', '2010-11', '2005-06', '2004-05', '2003-04'],
    'Milwaukee Bucks': ['2019-20', '2018-19', '2000-01'], 
    'Minnesota Timberwolves': ['2003-04'],
    'New Orleans Pelicans': ['2017-18', '2007-08'],
    'New York Knicks': ['2012-13'],
    'Oklahoma City Thunder': ['2015-16', '2013-14', '2012-13', '2011-12', '2010-11', '2004-05'],
    'Orlando Magic': ['2009-10', '2008-09', '2007-08'],
    'Philadelphia 76ers': ['2018-19', '2017-18', '2011-12', '2002-03', '2000-01'],
    'Phoenix Suns': ['2009-10', '2006-07', '2005-06', '2004-05'], 
    'Portland Trail Blazers': ['2018-19', '2015-16', '2013-14'],
    'Sacramento Kings': ['2003-04', '2002-03', '2001-02', '2000-01'],
    'San Antonio Spurs': ['2016-17', '2015-16', '2013-14', '2012-13', '2011-12', '2009-10', '2007-08', '2006-07', '2005-06', '2004-05', '2003-04', '2002-03', '2001-02', '2000-01'],
    'Toronto Raptors': ['2019-20', '2018-19', '2017-18', '2016-17', '2015-16', '2000-01'],
    'Utah Jazz': ['2017-18', '2016-17', '2009-10', '2007-08', '2006-07'],
    'Washington Wizards': ['2016-17', '2014-15', '2013-14', '2004-05']
}

# Write above dict into team stats data
for key in dict_made_playoffs:
    for champ in dict_made_playoffs[key]:
        # identify the row of the data frame that matches the team and season
        df_team.loc[(df_team['TEAM'] == key) & (df_team['SEASON'] == champ), 'Conf Semi'] = 1
# df_team[df_team['SEASON'] == '2019-20'].head(30)

In [252]:
# First round Playoff appearance (1 or 0)
dict_made_playoffs = {
    'Atlanta Hawks': ['2016-17', '2015-16', '2014-15', '2013-14', '2012-13', '2011-12', '2010-11', '2009-10', '2008-09', '2007-08'],
    'Boston Celtics': ['2019-20', '2018-19', '2017-18', '2016-17', '2015-16', '2014-15', '2012-13', '2011-12', '2010-11', '2009-10', '2008-09', '2007-08', '2004-05', '2003-04', '2002-03', '2001-02'],
    'Brooklyn Nets': ['2019-20', '2018-19', '2014-15', '2013-14', '2012-13', '2006-07', '2005-06', '2004-05', '2003-04', '2002-03', '2001-02'],
    'Charlotte Hornets': ['2015-16', '2013-14', '2009-10', '2001-02', '2000-01'],
    'Chicago Bulls': ['2016-17', '2014-15', '2013-14', '2012-13', '2011-12', '2010-11', '2009-10', '2008-09', '2006-07', '2005-06', '2004-05'],
    'Clevland Cavaliers': ['2017-18', '2016-17', '2015-16', '2014-15', '2009-10', '2008-09', '2007-08', '2006-07', '2005-06'],
    'Dallas Mavericks': ['2019-20', '2015-16', '2014-15', '2013-14', '2011-12', '2010-11', '2009-10', '2008-09', '2007-08', '2006-07', '2005-06', '2004-05', '2003-04', '2002-03', '2001-02', '2000-01'],
    'Denver Nuggets': ['2019-20', '2018-19', '2012-13', '2011-12', '2010-11', '2009-10', '2008-09', '2007-08', '2006-07', '2005-06', '2004-05', '2003-04'], 
    'Detroit Pistons': ['2018-19', '2015-16', '2008-09', '2007-08', '2006-07', '2005-06', '2004-05', '2003-04', '2002-03', '2001-02'],
    'Golden State Warriors': ['2018-19', '2017-18', '2016-17', '2015-16', '2014-15', '2013-14', '2012-13', '2006-07'],
    'Houston Rockets': ['2019-20', '2018-19', '2017-18', '2016-17', '2015-16', '2014-15', '2013-14', '2012-13', '2008-09', '2007-08', '2006-07', '2004-05', '2003-04'],
    'Indiana Pacers': ['2019-20', '2018-19', '2017-18', '2016-17', '2015-16', '2013-14', '2012-13', '2011-12', '2010-11', '2005-06', '2004-05', '2003-04', '2002-03', '2001-02', '2000-01'],
    'Los Angeles Clippers': ['2019-20', '2018-19', '2016-17', '2015-16', '2014-15', '2013-14', '2012-13', '2011-12', '2005-06'],
    'Los Angeles Lakers': ['2019-20', '2012-13', '2011-12', '2010-11', '2009-10', '2008-09', '2007-08', '2006-07', '2005-06', '2003-04', '2002-03', '2001-02', '2000-01'],
    'Memphis Gizzlies': ['2016-17', '2015-16', '2014-15', '2013-14', '2012-13', '2011-12', '2010-11', '2005-06', '2004-05', '2003-04'],
    'Miami Heat': ['2019-20', '2017-18', '2015-16', '2013-14', '2012-13', '2011-12', '2010-11', '2009-10', '2008-09', '2006-07', '2005-06', '2004-05', '2003-04', '2000-01'],
    'Milwaukee Bucks': ['2019-20', '2018-19', '2017-18', '2016-17', '2014-15', '2012-13', '2009-10', '2005-06', '2003-04', '2002-03', '2000-01'],
    'Minnesota Timberwolves': ['2017-18', '2003-04', '2002-03', '2001-02', '2000-01'],
    'New Orleans Pelicans': ['2017-18', '2014-15', '2010-11', '2008-09', '2007-08', '2003-04', '2002-03'],
    'New York Knicks': ['2012-13', '2011-12', '2010-11', '2003-04', '2000-01'],
    'Oklahoma City Thunder': ['2019-20', '2018-19', '2017-18', '2016-17', '2015-16', '2013-14', '2012-13', '2011-12', '2010-11', '2009-10', '2004-05', '2001-02'],
    'Orlando Magic': ['2019-20', '2018-19', '2011-12', '2010-11', '2009-10', '2008-09', '2007-08', '2006-07', '2002-03', '2001-02', '2000-01'],
    'Philadelphia 76ers': ['2019-20', '2018-19', '2017-18', '2011-12', '2010-11', '2008-09', '2007-08', '2004-05', '2002-03', '2001-02', '2000-01'],
    'Phoenix Suns': ['2009-10', '2007-08', '2006-07', '2005-06', '2004-05', '2002-03', '2000-01'],
    'Portland Trail Blazers': ['2019-20', '2018-19', '2017-18', '2016-17', '2015-16', '2014-15', '2013-14', '2010-11', '2009-10', '2008-09', '2002-03', '2001-02', '2000-01'],
    'Sacramento Kings': ['2005-06', '2004-05', '2003-04', '2002-03', '2001-02', '2000-01'],
    'San Antonio Spurs': ['2018-19', '2017-18', '2016-17', '2015-16', '2014-15', '2013-14', '2012-13', '2011-12', '2010-11', '2009-10', '2008-09', '2007-08', '2006-07', '2005-06', '2004-05', '2003-04', '2002-03', '2001-02', '2000-01'],
    'Toronto Raptors': ['2019-20', '2018-19', '2017-18', '2016-17', '2015-16', '2014-15', '2013-14', '2007-08', '2006-07', '2001-02', '2000-01'],
    'Utah Jazz': ['2019-20', '2018-19', '2017-18', '2016-17', '2011-12', '2009-10', '2008-09', '2007-08', '2006-07', '2002-03', '2001-02', '2000-01'],
    'Washington Wizards': ['2017-18', '2016-17', '2014-15', '2013-14', '2007-08', '2006-07', '2005-06', '2004-05']
}

# Write above dict into team stats data
for key in dict_made_playoffs:
    for champ in dict_made_playoffs[key]:
        # identify the row of the data frame that matches the team and season
        df_team.loc[(df_team['TEAM'] == key) & (df_team['SEASON'] == champ), 'Playoffs'] = 1
#df_team[df_team['SEASON'] == '2019-20'].head(30)

In [254]:
df_team[df_team['SEASON'] == '2019-20'].head(30)

Unnamed: 0,TEAM,GP,W,L,WIN%,MIN,PTS,FGM,FGA,FG%,...,BLKA,PF,PFD,+/-,SEASON,Playoffs,Conf Semi,Conf Final,Finals,Champs
30,Atlanta Hawks,67,20,47,0.299,48.6,111.8,40.6,90.6,44.9,...,6.4,23.1,21.0,-8.0,2019-20,0,0,0,0,0
31,Boston Celtics,72,48,24,0.667,48.4,113.7,41.3,89.6,46.1,...,5.5,21.6,20.7,6.3,2019-20,1,1,1,0,0
32,Brooklyn Nets,72,35,37,0.486,48.6,111.8,40.4,90.3,44.8,...,5.3,21.0,21.1,-0.6,2019-20,1,1,1,0,0
33,Charlotte Hornets,65,23,42,0.354,48.5,102.9,37.3,85.9,43.4,...,5.0,18.8,20.6,-6.8,2019-20,0,0,0,0,0
34,Chicago Bulls,65,22,43,0.338,48.2,106.8,39.6,88.6,44.7,...,5.9,21.8,19.2,-3.1,2019-20,0,0,0,0,0
35,Cleveland Cavaliers,65,19,46,0.292,48.4,106.9,40.3,87.9,45.8,...,6.3,18.3,19.6,-7.9,2019-20,0,0,0,0,0
36,Dallas Mavericks,75,43,32,0.573,48.5,117.0,41.7,90.3,46.1,...,4.1,19.5,21.3,4.9,2019-20,1,1,1,0,0
37,Denver Nuggets,73,46,27,0.63,48.6,111.3,42.0,88.9,47.3,...,4.4,20.3,20.5,2.1,2019-20,1,1,1,0,0
38,Detroit Pistons,66,20,46,0.303,48.4,107.2,39.3,85.7,45.9,...,5.6,19.7,19.8,-3.6,2019-20,0,0,0,0,0
39,Golden State Warriors,65,15,50,0.231,48.4,106.3,38.6,88.2,43.8,...,4.9,20.1,20.1,-8.7,2019-20,0,0,0,0,0


In [255]:
df_team.to_csv('Data/Team-stats/Cleaned-dataframe')