In [2]:
import datadotworld as dw

#data handling, model creation/evaluation
import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from scipy import stats

#make it so that we only show first 4 decimals for floats
np.set_printoptions(precision=4,suppress=True)

# visualization
%matplotlib inline
import seaborn as sns

In [3]:
# Datasets are referenced by their path
dataset_key = 'jgrosz99/nba-player-data-1978-2016'

data_world_base_url = 'https://data.world/'

In [4]:
# Load dataset (onto the local file system)
dataset_local = dw.load_dataset(dataset_key)  # cached under ~/.dw/cache

In [11]:
yearly_data = dataset_local.dataframes['nba_season_data']

In [13]:
yearly_stats = yearly_data

In [28]:
yearly_data.columns.tolist()

['year',
 'tm',
 'player',
 'age',
 'g',
 'mp',
 'per',
 'ts',
 '3par',
 'ftr',
 'orb',
 'drb',
 'trb',
 'ast',
 'stl',
 'blk',
 'tov',
 'usg',
 'column_s',
 'ows',
 'dws',
 'ws',
 'ws_48',
 'column_x',
 'obpm',
 'dbpm',
 'bpm',
 'vorp',
 'ows_48',
 'dws_48',
 'shot',
 'player_id',
 'team_mar',
 'team_mp',
 'team_gm',
 'year_3par',
 'team_ts',
 'tm_usg',
 'tm_ts_w_o_plyr',
 'reb_vers',
 'defense',
 'val_shot',
 'offense',
 'mpg_int',
 'raw_spm',
 'min',
 'raw_contrib',
 'tm_sum',
 'tm_adj',
 'bpm_2',
 'stderr',
 'contrib',
 'vorp_2',
 'reb_vers_2',
 'val_shot_2',
 'offense_2',
 'defense_2',
 'mpg',
 'raw_obpm',
 'contrib_2',
 'tm_ortg',
 'tm_sum_2',
 'tm_adj_2',
 'tm_drtg',
 'obpm_2',
 'ostderr',
 'ocontrib',
 'ovorp',
 'dbpm_2',
 'dstderr',
 'dcontrib',
 'dvorp',
 'sum_spm',
 'min_2',
 'mpg_2',
 'rempg',
 'bpm_3',
 'contrib_3',
 'vorp_3',
 'vorp_gm',
 'o_bpm',
 'ocontrib_2',
 'ovorp_2',
 'ovorp_gm',
 'd_bpm',
 'dcontrib_2',
 'dvorp_2',
 'dvorp_gm',
 'production',
 'prod_gm',
 'exp_bpm

In [None]:
shooting_columns = ['year','tm','player','age','g','mp','per','ts','3par','ftr','usg',]

In [39]:
yearly_stats[yearly_stats['mp'] > 500][['player', 'year', 'ts']].sort_values(by = 'ts', ascending=False).head()

Unnamed: 0,player,year,ts
16666,Brandan Wright,2015,0.762
15500,Chris Wilcox,2013,0.725
14738,Tyson Chandler,2012,0.708
1325,Artis Gilmore,1982,0.702
16320,Kyle Korver,2015,0.699


In [63]:
Rondo_shooting = yearly_stats[yearly_stats['player'] == "Rajon Rondo"][['year', 'ts']]

In [66]:
Rondo_shooting = Rondo_shooting.set_index(Rondo_shooting['year'])

In [93]:
Rondo_shooting = Rondo_shooting.drop(['year'], 1)

In [100]:
df = pd.DataFrame([[2, 3], [5, 6]], pd.Index([1, 4], name="A"), columns=["B", "C"])

In [112]:
rookie_data_key = 'https://data.world/exercises/logistic-regression-exercise-1'
rookie_data_set_local = dw.load_dataset(rookie_data_key) 

In [118]:
rookies = rookie_data_set_local.dataframes['nba_logreg']

In [132]:
pd.set_option('display.max_rowwidth', -1)

OptionError: "No such keys(s): 'display.max_rowwidth'"

In [137]:
pd.set_option('display.max_columns', None)  

In [143]:
rookies.sort_values('target_5yrs', ascending=False).tail(100)

Unnamed: 0,name,gp,min,pts,fgm,fga,fg,3p_made,3pa,3p,ftm,fta,ft,oreb,dreb,reb,ast,stl,blk,tov,target_5yrs
1022,Joe Crispin,21,7.4,3.8,1.2,3.2,38.2,0.7,1.9,38.5,0.6,0.6,92.3,0.1,0.4,0.5,1.2,0.2,0.0,0.5,0.0
1024,Brandon Armstrong,35,5.6,1.8,0.8,2.4,31.8,0.1,0.5,29.4,0.1,0.3,50.0,0.3,0.2,0.5,0.2,0.2,0.0,0.2,0.0
998,Justin Anderson,55,11.8,3.8,1.3,3.2,40.6,0.4,1.5,26.5,0.8,1.0,80.0,0.3,2.1,2.4,0.5,0.3,0.5,0.4,0.0
231,Anthony Frederick,46,6.8,3.3,1.4,2.7,50.4,0.0,0.1,40.0,0.5,0.7,70.6,0.6,0.6,1.1,0.4,0.3,0.1,0.7,0.0
233,Everette Stephens,35,6.0,1.9,0.7,2.1,31.9,0.1,0.3,20.0,0.5,0.6,77.3,0.3,0.3,0.7,1.1,0.3,0.1,0.8,0.0
992,Adam Harrington,19,5.8,1.6,0.6,1.9,29.7,0.3,0.7,35.7,0.2,0.2,75.0,0.1,0.4,0.4,0.6,0.1,0.1,0.1,0.0
965,Vincent Yarbrough,59,23.4,6.9,2.8,7.3,39.3,0.4,1.3,26.9,0.8,1.1,79.0,0.6,2.1,2.7,2.2,1.0,0.6,1.4,0.0
969,Mike Batiste,75,16.6,6.4,2.6,6.2,42.2,0.2,1.1,22.2,0.9,1.2,78.4,1.1,2.3,3.4,0.7,0.6,0.2,0.9,0.0
972,Jarell Martin,27,14.1,5.7,2.0,4.3,46.6,0.0,0.1,0.0,1.7,2.3,72.6,1.4,1.7,3.2,0.6,0.3,0.3,0.7,0.0
973,Casey Jacobsen,72,15.9,5.1,1.7,4.5,37.3,0.7,2.3,31.5,1.0,1.5,68.6,0.4,0.8,1.2,1.0,0.5,0.1,0.8,0.0


In [148]:
br_base_url = "stats.nba.com/stats/"
def create_url(end_module, **kwargs):
    print(end_module)
    for name, value in kwargs.items():
        print( '{0} = {1}'.format(name, value))
#    try:
#        param_string = param_list[0]
#        for in param_list.length:
#            param_string
#    except ValueError:
#        print("parameters must be in a list")



In [151]:
nba.player.get_player("tim", "duncan")

AttributeError: module 'nba_py' has no attribute 'player'