In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, mean_squared_error, r2_score, f1_score
from rfpimp import permutation_importances

from collections import OrderedDict
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression

import logistic_regression_functions as lrf
import GradientDescent as GD
GradientDescent = GD.GradientDescent

from statsmodels.discrete.discrete_model import Logit
from statsmodels.tools import add_constant

In [2]:
from sklearn.cluster import KMeans

In [15]:
from sklearn import preprocessing

In [3]:
pd.set_option('display.max_columns', 2000)
pd.set_option('precision', 2) #setting the number of decimel points

In [4]:
#Import advanced team data
tadv = pd.read_csv('data/TEAMstats - AdvancedTeam.csv')

In [5]:
#get defensive votes
%run defvotes


In [6]:
#make d_votes table
fv = sum_past_yrs(fv_dict2)
sdd = replace_nans(fv)
d_votes = make_df(sdd)

In [7]:
#Get main player data
from process_players import *

In [8]:
#combine main player data with d_votes table
pdata = get_clean_pdata()
pdata = add_defvotes(pdata, d_votes)

In [9]:
#add age multiplier
pdata['AgeMulti'] = 1
pdata.loc[pdata['Age'] > 31,'AgeMulti'] = .8
pdata.loc[pdata['Age'] > 34,'AgeMulti'] = .67
pdata.loc[pdata['Age'] < 24, 'AgeMulti'] = 1.2
pdata.loc[pdata['Age'] < 21, 'AgeMulti'] = 1.33

In [11]:
#drop nans and players who played less than 31 games (half of strike shortened season)
p_wage = pdata.dropna()
p_wage = p_wage[p_wage['G'] > 33]

In [39]:
#Create a list of columns to normalize
cols = ['MP', 'PER', 'TS%', '3PAr', 'FTr', 'ORB%',
       'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%', 'USG%', 'OWS', 'DWS',
       'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM', 'VORP','MPG', 'advotes']

In [28]:
#separate p_wage into df's by position
dflist = []
for i in ['PF', 'PG', 'SF', 'SG', 'C']:
    df = p_wage[p_wage['Pos'] == i].reset_index()
    dflist.append(df)

In [68]:
#Make everything a normalized version of itself
dfscaledlist = []
count = 0
for i in ['PF', 'PG', 'SF', 'SG', 'C']:
    x = p_wage[p_wage['Pos'] == i][['MP', 'PER', 'TS%', '3PAr', 'FTr', 'ORB%',
           'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%', 'USG%', 'OWS', 'DWS',
           'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM', 'VORP','MPG', 'advotes']] #returns a numpy array
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    df = pd.DataFrame(x_scaled, columns=['MPs', 'PERs', 'TS%s', '3PArs', 'FTrs', 'ORB%s',
           'DRB%s', 'TRB%s', 'AST%s', 'STL%s', 'BLK%s', 'TOV%s', 'USG%s', 'OWSs', 'DWSs',
           'WSs', 'WS/48s', 'OBPMs', 'DBPMs', 'BPMs', 'VORPs','MPGs', 'advotess'])
    dfscaled = pd.concat([dflist[count], df], axis=1, sort=False)
    dfscaledlist.append(dfscaled)
    count += 1
        
        

In [75]:
#Rejoin positioned dataframes
dfs = dfscaledlist[0].append(dfscaledlist[1])
dfs = dfs.append(dfscaledlist[2])
dfs = dfs.append(dfscaledlist[3])
dfs = dfs.append(dfscaledlist[4])

In [76]:
dfs

Unnamed: 0,index,Player,Pos,Age,G,MP,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,TM,YR,YRprior,2YRprior,MPG,advotes,AgeMulti,MPs,PERs,TS%s,3PArs,FTrs,ORB%s,DRB%s,TRB%s,AST%s,STL%s,BLK%s,TOV%s,USG%s,OWSs,DWSs,WSs,WS/48s,OBPMs,DBPMs,BPMs,VORPs,MPGs,advotess
0,0,Shareef Abdur-Rahim,PF,29.0,72.0,1961.0,17.2,0.59,3.50e-02,0.44,6.4,14.8,10.6,13.2,1.3,1.6,12.5,20.1,4.0,2.2,6.2,1.52e-01,0.4,0.4,0.9,1.4,Sacramento Kings 05-06,6,5.0,4.0,27.24,0.00,1.00,0.58,0.51,0.63,4.18e-02,0.34,0.31,0.23,0.24,0.30,0.34,0.16,0.35,0.46,0.37,0.31,0.37,0.56,0.47,0.49,0.54,0.29,0.64,0.00e+00
1,12,Brian Skinner,PF,29.0,38.0,429.0,10.8,0.54,0.00e+00,0.39,8.3,19.2,13.8,6.0,1.3,3.4,18.2,10.6,0.2,0.6,0.8,9.40e-02,-3.7,2.1,-1.6,0.0,Sacramento Kings 05-06,6,5.0,4.0,11.29,0.00,1.00,0.09,0.29,0.52,0.00e+00,0.30,0.42,0.37,0.39,0.14,0.34,0.35,0.56,0.12,0.15,0.07,0.11,0.41,0.23,0.65,0.43,0.17,0.20,0.00e+00
2,16,Kenny Thomas,PF,28.0,82.0,2293.0,15.0,0.54,2.00e-03,0.34,10.3,21.2,15.8,12.0,1.6,1.2,16.3,16.6,2.2,3.3,5.5,1.15e-01,-1.1,1.8,0.7,1.6,Sacramento Kings 05-06,6,5.0,4.0,27.96,0.00,1.00,0.69,0.43,0.51,2.39e-03,0.25,0.54,0.44,0.49,0.27,0.45,0.12,0.49,0.33,0.27,0.47,0.34,0.46,0.38,0.62,0.53,0.31,0.66,0.00e+00
3,33,Kenny Thomas,PF,29.0,62.0,1412.0,10.2,0.49,3.00e-03,0.25,10.9,20.6,15.7,8.4,1.5,0.9,21.1,13.6,-0.1,1.4,1.2,4.10e-02,-3.1,1.2,-1.8,0.1,Sacramento Kings 06-07,7,6.0,5.0,22.77,0.00,1.00,0.41,0.27,0.39,3.58e-03,0.18,0.58,0.42,0.48,0.19,0.41,0.09,0.67,0.23,0.13,0.19,0.13,0.27,0.26,0.56,0.42,0.18,0.52,0.00e+00
4,35,Corliss Williamson,PF,33.0,68.0,1337.0,12.7,0.56,4.00e-03,0.45,5.4,14.2,9.7,5.2,1.1,0.9,14.9,21.9,0.6,0.9,1.5,5.40e-02,-3.2,-1.9,-5.1,-1.1,Sacramento Kings 06-07,7,6.0,5.0,19.66,0.00,0.80,0.38,0.35,0.56,4.78e-03,0.34,0.26,0.21,0.20,0.12,0.28,0.09,0.44,0.52,0.17,0.12,0.14,0.30,0.26,0.27,0.28,0.08,0.43,0.00e+00
5,49,Mikki Moore,PF,32.0,82.0,2385.0,11.5,0.61,4.00e-03,0.37,8.1,16.2,12.2,5.4,0.7,1.5,16.2,12.7,3.2,1.4,4.6,9.20e-02,-0.7,-0.1,-0.8,0.7,Sacramento Kings 07-08,8,7.0,6.0,29.09,0.00,0.80,0.72,0.31,0.70,4.78e-03,0.28,0.41,0.27,0.32,0.12,0.14,0.15,0.48,0.20,0.33,0.19,0.29,0.40,0.41,0.44,0.46,0.23,0.69,0.00e+00
6,57,Shelden Williams,PF,24.0,36.0,414.0,9.8,0.45,1.00e-02,0.51,9.6,20.5,15.1,4.0,1.7,2.0,10.3,14.9,0.0,0.5,0.5,6.00e-02,-4.6,-0.5,-5.1,-0.3,Atlanta Hawks 07-08,8,7.0,6.0,11.50,0.00,1.00,0.09,0.25,0.26,1.19e-02,0.39,0.50,0.42,0.46,0.09,0.48,0.20,0.26,0.27,0.14,0.06,0.10,0.32,0.17,0.40,0.28,0.14,0.20,0.00e+00
7,80,Mikki Moore,PF,33.0,46.0,746.0,8.6,0.57,0.00e+00,0.35,7.0,16.9,11.9,5.6,0.9,1.4,16.2,10.0,0.6,0.2,0.8,5.20e-02,-3.0,-0.6,-3.6,-0.3,Sacramento Kings 08-09,9,8.0,7.0,16.22,0.00,0.80,0.19,0.21,0.60,0.00e+00,0.26,0.35,0.30,0.31,0.13,0.21,0.14,0.48,0.10,0.17,0.01,0.11,0.30,0.27,0.39,0.34,0.14,0.33,0.00e+00
8,83,Andres Nocioni,PF,29.0,53.0,1278.0,11.4,0.55,4.65e-01,0.28,3.1,16.8,9.9,7.4,1.0,0.9,12.7,20.1,0.8,1.1,1.9,7.20e-02,-0.5,-1.7,-2.2,-0.1,Chicago Bulls 08-09,9,8.0,7.0,24.11,1.50,1.00,0.36,0.31,0.53,5.56e-01,0.20,0.12,0.29,0.21,0.17,0.24,0.09,0.35,0.46,0.18,0.15,0.16,0.35,0.42,0.29,0.40,0.16,0.55,5.46e-03
9,91,Jason Thompson,PF,22.0,82.0,2303.0,13.9,0.54,1.10e-02,0.40,11.8,19.0,15.4,6.8,1.0,1.9,14.8,19.3,1.8,0.9,2.7,5.70e-02,-1.7,-1.1,-2.7,-0.4,Sacramento Kings 08-09,9,8.0,7.0,28.09,0.00,1.20,0.69,0.39,0.51,1.31e-02,0.30,0.63,0.37,0.47,0.15,0.24,0.19,0.43,0.43,0.24,0.12,0.20,0.31,0.35,0.34,0.38,0.14,0.66,0.00e+00


In [60]:
dfscaledlist[0]

Unnamed: 0,index,Player,Pos,Age,G,MP,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,TM,YR,YRprior,2YRprior,MPG,advotes,AgeMulti,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22
0,0,Shareef Abdur-Rahim,PF,29.0,72.0,1961.0,17.2,0.59,3.50e-02,0.44,6.4,14.8,10.6,13.2,1.3,1.6,12.5,20.1,4.0,2.2,6.2,1.52e-01,0.4,0.4,0.9,1.4,Sacramento Kings 05-06,6,5.0,4.0,27.24,0.00,1.00,0.58,0.51,0.63,4.18e-02,0.34,0.31,0.23,0.24,0.30,0.34,0.16,0.35,0.46,0.37,0.31,0.37,0.56,0.47,0.49,0.54,0.29,0.64,0.00e+00
1,12,Brian Skinner,PF,29.0,38.0,429.0,10.8,0.54,0.00e+00,0.39,8.3,19.2,13.8,6.0,1.3,3.4,18.2,10.6,0.2,0.6,0.8,9.40e-02,-3.7,2.1,-1.6,0.0,Sacramento Kings 05-06,6,5.0,4.0,11.29,0.00,1.00,0.09,0.29,0.52,0.00e+00,0.30,0.42,0.37,0.39,0.14,0.34,0.35,0.56,0.12,0.15,0.07,0.11,0.41,0.23,0.65,0.43,0.17,0.20,0.00e+00
2,16,Kenny Thomas,PF,28.0,82.0,2293.0,15.0,0.54,2.00e-03,0.34,10.3,21.2,15.8,12.0,1.6,1.2,16.3,16.6,2.2,3.3,5.5,1.15e-01,-1.1,1.8,0.7,1.6,Sacramento Kings 05-06,6,5.0,4.0,27.96,0.00,1.00,0.69,0.43,0.51,2.39e-03,0.25,0.54,0.44,0.49,0.27,0.45,0.12,0.49,0.33,0.27,0.47,0.34,0.46,0.38,0.62,0.53,0.31,0.66,0.00e+00
3,33,Kenny Thomas,PF,29.0,62.0,1412.0,10.2,0.49,3.00e-03,0.25,10.9,20.6,15.7,8.4,1.5,0.9,21.1,13.6,-0.1,1.4,1.2,4.10e-02,-3.1,1.2,-1.8,0.1,Sacramento Kings 06-07,7,6.0,5.0,22.77,0.00,1.00,0.41,0.27,0.39,3.58e-03,0.18,0.58,0.42,0.48,0.19,0.41,0.09,0.67,0.23,0.13,0.19,0.13,0.27,0.26,0.56,0.42,0.18,0.52,0.00e+00
4,35,Corliss Williamson,PF,33.0,68.0,1337.0,12.7,0.56,4.00e-03,0.45,5.4,14.2,9.7,5.2,1.1,0.9,14.9,21.9,0.6,0.9,1.5,5.40e-02,-3.2,-1.9,-5.1,-1.1,Sacramento Kings 06-07,7,6.0,5.0,19.66,0.00,0.80,0.38,0.35,0.56,4.78e-03,0.34,0.26,0.21,0.20,0.12,0.28,0.09,0.44,0.52,0.17,0.12,0.14,0.30,0.26,0.27,0.28,0.08,0.43,0.00e+00
5,49,Mikki Moore,PF,32.0,82.0,2385.0,11.5,0.61,4.00e-03,0.37,8.1,16.2,12.2,5.4,0.7,1.5,16.2,12.7,3.2,1.4,4.6,9.20e-02,-0.7,-0.1,-0.8,0.7,Sacramento Kings 07-08,8,7.0,6.0,29.09,0.00,0.80,0.72,0.31,0.70,4.78e-03,0.28,0.41,0.27,0.32,0.12,0.14,0.15,0.48,0.20,0.33,0.19,0.29,0.40,0.41,0.44,0.46,0.23,0.69,0.00e+00
6,57,Shelden Williams,PF,24.0,36.0,414.0,9.8,0.45,1.00e-02,0.51,9.6,20.5,15.1,4.0,1.7,2.0,10.3,14.9,0.0,0.5,0.5,6.00e-02,-4.6,-0.5,-5.1,-0.3,Atlanta Hawks 07-08,8,7.0,6.0,11.50,0.00,1.00,0.09,0.25,0.26,1.19e-02,0.39,0.50,0.42,0.46,0.09,0.48,0.20,0.26,0.27,0.14,0.06,0.10,0.32,0.17,0.40,0.28,0.14,0.20,0.00e+00
7,80,Mikki Moore,PF,33.0,46.0,746.0,8.6,0.57,0.00e+00,0.35,7.0,16.9,11.9,5.6,0.9,1.4,16.2,10.0,0.6,0.2,0.8,5.20e-02,-3.0,-0.6,-3.6,-0.3,Sacramento Kings 08-09,9,8.0,7.0,16.22,0.00,0.80,0.19,0.21,0.60,0.00e+00,0.26,0.35,0.30,0.31,0.13,0.21,0.14,0.48,0.10,0.17,0.01,0.11,0.30,0.27,0.39,0.34,0.14,0.33,0.00e+00
8,83,Andres Nocioni,PF,29.0,53.0,1278.0,11.4,0.55,4.65e-01,0.28,3.1,16.8,9.9,7.4,1.0,0.9,12.7,20.1,0.8,1.1,1.9,7.20e-02,-0.5,-1.7,-2.2,-0.1,Chicago Bulls 08-09,9,8.0,7.0,24.11,1.50,1.00,0.36,0.31,0.53,5.56e-01,0.20,0.12,0.29,0.21,0.17,0.24,0.09,0.35,0.46,0.18,0.15,0.16,0.35,0.42,0.29,0.40,0.16,0.55,5.46e-03
9,91,Jason Thompson,PF,22.0,82.0,2303.0,13.9,0.54,1.10e-02,0.40,11.8,19.0,15.4,6.8,1.0,1.9,14.8,19.3,1.8,0.9,2.7,5.70e-02,-1.7,-1.1,-2.7,-0.4,Sacramento Kings 08-09,9,8.0,7.0,28.09,0.00,1.20,0.69,0.39,0.51,1.31e-02,0.30,0.63,0.37,0.47,0.15,0.24,0.19,0.43,0.43,0.24,0.12,0.20,0.31,0.35,0.34,0.38,0.14,0.66,0.00e+00


In [38]:
for i in ['MP', 'PER', 'TS%']:
    print(i)

NameError: name 'index' is not defined