In [15]:
import os

import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 10)
pd.set_option('display.notebook_repr_html', True)
pd.set_option('display.max_columns', 20)

import seaborn as sns

import statsmodels.api as sm
import statsmodels.formula.api as smf

from sklearn import linear_model

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

## Team dataset

In [21]:
def read_dataset(x):
    return pd.read_csv(os.path.join('..', 'datasets', x))

df_team_stats = read_dataset('top100_nba_historical_teams.csv')

In [22]:
df_team_stats

Unnamed: 0,Rk,Season,Tm,Lg,G,W,L,W/L%,MP,FG,...,ORtg,DRtg,eFG%,TOV%,ORB%,FT/FGA,oop_eFG%,oop_TOV%,oop_ORB%,oop_FT/FGA
0,1,1970-71,MIL*,NBA,82,66,16,0.805,19780.0,3972,...,,,0.509,,,0.226,0.424,,,0.210
1,2,1995-96,CHI*,NBA,82,72,10,0.878,19730.0,3293,...,115.2,101.8,0.517,13.1,36.9,0.217,0.482,16.1,28.9,0.222
2,3,1971-72,LAL*,NBA,82,69,13,0.841,19755.0,3920,...,,,0.490,,,0.260,0.432,,,0.177
3,4,1971-72,MIL*,NBA,82,63,19,0.768,19780.0,3813,...,,,0.498,,,0.232,0.420,,,0.217
4,5,1996-97,CHI*,NBA,82,69,13,0.841,19730.0,3277,...,114.4,102.4,0.511,12.5,35.9,0.199,0.471,14.8,30.7,0.196
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,1984-85,BOS*,NBA,82,63,19,0.768,19705.0,3721,...,112.8,106.3,0.515,13.8,32.8,0.254,0.481,12.6,29.6,0.198
96,97,1994-95,ORL*,NBA,82,57,25,0.695,19930.0,3460,...,115.1,107.8,0.539,14.0,34.0,0.239,0.490,13.3,31.6,0.220
97,98,2011-12,OKC*,NBA,66,47,19,0.712,15990.0,2462,...,109.8,103.2,0.516,15.3,27.8,0.269,0.465,13.0,27.9,0.207
98,99,2012-13,LAC*,NBA,82,56,26,0.683,19730.0,3160,...,110.6,103.6,0.526,13.9,28.8,0.203,0.492,15.4,26.5,0.229


In [18]:
df.describe()

Unnamed: 0,Rk,G,W,L,W/L%,MP,FG,FGA,2P,2PA,...,ORtg,DRtg,eFG%,TOV%,ORB%,FT/FGA,oop_eFG%,oop_TOV%,oop_ORB%,oop_FT/FGA
count,100.0,100.0,100.0,100.0,100.0,92.0,100.0,100.0,100.0,100.0,...,80.0,80.0,100.0,80.0,80.0,100.0,87.0,80.0,80.0,87.0
mean,50.5,80.15,59.83,20.32,0.7468,19574.728261,3286.23,6940.97,2954.39,6044.08,...,110.84,102.695,0.49742,13.74625,29.98875,0.24171,0.468483,14.24125,28.565,0.222885
std,29.011492,5.682465,5.608534,4.306162,0.048731,1048.219936,481.778373,829.350086,611.776398,1295.62835,...,3.260954,3.258791,0.044546,1.173003,4.244043,0.033507,0.019041,1.446925,3.24997,0.032442
min,1.0,50.0,37.0,9.0,0.659,12075.0,1723.0,3812.0,1568.0,3291.0,...,98.0,91.3,0.297,10.9,20.5,0.163,0.42,11.7,20.9,0.139
25%,25.75,82.0,57.0,17.0,0.707,,3074.75,6453.0,2507.25,5026.5,...,,,0.49,,,0.21875,,,,
50%,50.5,82.0,60.0,21.0,0.744,,3332.0,6917.0,2795.0,5745.5,...,,,0.509,,,0.244,,,,
75%,75.25,82.0,63.0,24.0,0.76975,,3617.5,7315.25,3556.5,6962.75,...,,,0.52525,,,0.26425,,,,
max,100.0,82.0,73.0,28.0,0.89,19955.0,3972.0,9109.0,3972.0,9109.0,...,115.6,107.8,0.563,17.3,37.2,0.322,0.501,18.2,34.4,0.294


## Individual team dataset

In [23]:
df_indiv_team = read_dataset('95-96_chi.csv')

In [30]:
df_indiv_team

Unnamed: 0,Rk,G,Date,Unnamed: 3,Opp,W/L,Tm,Opp.1,FG,FGA,...,FT.1,FTA.1,FT%.1,ORB.1,TRB.1,AST.1,STL.1,BLK.1,TOV.1,PF.1
0,1,1,11/3/95,,CHH,W,105,91,43,86,...,16,25,0.640,14,43,14,8,4,19,22
1,2,2,11/4/95,,BOS,W,107,85,43,88,...,25,43,0.581,11,44,17,7,4,25,24
2,3,3,11/7/95,,TOR,W,117,108,44,80,...,30,37,0.811,7,32,25,6,3,19,26
3,4,4,11/9/95,@,CLE,W,106,88,41,79,...,19,23,0.826,9,34,27,9,1,9,18
4,5,5,11/11/95,,POR,W,110,106,40,81,...,18,30,0.600,15,40,23,4,1,17,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,78,78,4/14/96,@,CLE,W,98,72,34,70,...,6,10,0.600,11,33,20,7,3,16,15
78,79,79,4/16/96,@,MIL,W,86,80,33,85,...,9,15,0.600,10,42,14,7,1,9,22
79,80,80,4/18/96,,DET,W,110,79,43,87,...,10,20,0.500,11,36,12,12,2,17,21
80,81,81,4/20/96,,IND,L,99,100,40,90,...,26,41,0.634,11,39,15,14,3,17,16
