In [241]:
import pandas as pd
import sklearn.model_selection as model_selection
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn import linear_model
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn import metrics
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import cross_val_predict, cross_validate
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np

In [242]:
#Importing NCAA and draft merge data
ncaa_draft_data = pd.read_csv('../static/data/ncaa_model_in.csv')
ncaa_draft_data.head()

Unnamed: 0,Player,Class,Season,Pos,School,Conf,G,MP,FG,FGA,...,Rupp Trophy,Sporting News Player of the Year,UPI Player of the Year,USBWA Freshman of the Year,Year Drafted,Rd,Pk,Tm,Projected Pk,Team Score
0,Joe Adkins,JR,1999,G,Oklahoma State,Big 12,33,27.697,3.242,8.606,...,0,0,0,0,0,0,0,,0,0
1,Brian Montonati,JR,1999,F,Oklahoma State,Big 12,34,18.0,2.5,4.912,...,0,0,0,0,0,0,0,,0,0
2,Jerome Moiso,FR,1999,F,UCLA,Pac-10,29,23.759,4.517,9.276,...,0,0,0,0,0,0,0,,0,0
3,Jason Miskiri,SR,1999,G,George Mason,CAA,29,34.345,5.414,13.552,...,0,0,0,0,0,0,0,,0,0
4,Andre Miller,SR,1999,G,Utah,WAC,33,33.091,5.758,11.727,...,0,0,0,0,0,0,0,,0,0


In [243]:
# Cleaning data -- Replacing undrafted players with pick 61 and team to blank
ncaa_draft_data['Pk'].replace(0, 61,inplace=True)
ncaa_draft_data['Projected Pk'].replace(0, 61,inplace=True)
ncaa_draft_data.Tm = ncaa_draft_data.Tm.fillna('')
ncaa_draft_data.head(8)

Unnamed: 0,Player,Class,Season,Pos,School,Conf,G,MP,FG,FGA,...,Rupp Trophy,Sporting News Player of the Year,UPI Player of the Year,USBWA Freshman of the Year,Year Drafted,Rd,Pk,Tm,Projected Pk,Team Score
0,Joe Adkins,JR,1999,G,Oklahoma State,Big 12,33,27.697,3.242,8.606,...,0,0,0,0,0,0,61,,61,0
1,Brian Montonati,JR,1999,F,Oklahoma State,Big 12,34,18.0,2.5,4.912,...,0,0,0,0,0,0,61,,61,0
2,Jerome Moiso,FR,1999,F,UCLA,Pac-10,29,23.759,4.517,9.276,...,0,0,0,0,0,0,61,,61,0
3,Jason Miskiri,SR,1999,G,George Mason,CAA,29,34.345,5.414,13.552,...,0,0,0,0,0,0,61,,61,0
4,Andre Miller,SR,1999,G,Utah,WAC,33,33.091,5.758,11.727,...,0,0,0,0,0,0,61,,61,0
5,Chris Mihm,SO,1999,C,Texas,Big 12,32,32.094,4.5,10.031,...,0,0,0,0,2000,1,7,CHI,7,0
6,Greg McQuay,JR,1999,C,Purdue,Big Ten,33,25.545,4.182,7.364,...,0,0,0,0,0,0,61,,61,0
7,Dan McClintock,JR,1999,C,Northern Arizona,Big Sky,27,16.741,4.63,6.926,...,0,0,0,0,2000,2,53,DEN,53,0


In [244]:
# number of rows
ncaa_draft_data.shape[0]

31896

In [245]:
# deleting rows on PER less than 0 and undrafted 
delete_perlessthan0 = ncaa_draft_data[(ncaa_draft_data.PER < 0) & (ncaa_draft_data.Pk == 61)].index
ncaa_draft_data.drop(delete_perlessthan0 , inplace=True)
ncaa_draft_data.shape[0]

30625

In [246]:
# deleting rows on PER greater than 50 and undrafted 
delete_pergreaterthan50=ncaa_draft_data[(ncaa_draft_data.PER > 40) & (ncaa_draft_data.Pk == 61)].index
ncaa_draft_data.drop(delete_pergreaterthan50 , inplace=True)
ncaa_draft_data.shape[0]

30362

In [247]:
ncaa_draft_data.head(8)

Unnamed: 0,Player,Class,Season,Pos,School,Conf,G,MP,FG,FGA,...,Rupp Trophy,Sporting News Player of the Year,UPI Player of the Year,USBWA Freshman of the Year,Year Drafted,Rd,Pk,Tm,Projected Pk,Team Score
0,Joe Adkins,JR,1999,G,Oklahoma State,Big 12,33,27.697,3.242,8.606,...,0,0,0,0,0,0,61,,61,0
1,Brian Montonati,JR,1999,F,Oklahoma State,Big 12,34,18.0,2.5,4.912,...,0,0,0,0,0,0,61,,61,0
2,Jerome Moiso,FR,1999,F,UCLA,Pac-10,29,23.759,4.517,9.276,...,0,0,0,0,0,0,61,,61,0
3,Jason Miskiri,SR,1999,G,George Mason,CAA,29,34.345,5.414,13.552,...,0,0,0,0,0,0,61,,61,0
4,Andre Miller,SR,1999,G,Utah,WAC,33,33.091,5.758,11.727,...,0,0,0,0,0,0,61,,61,0
5,Chris Mihm,SO,1999,C,Texas,Big 12,32,32.094,4.5,10.031,...,0,0,0,0,2000,1,7,CHI,7,0
6,Greg McQuay,JR,1999,C,Purdue,Big Ten,33,25.545,4.182,7.364,...,0,0,0,0,0,0,61,,61,0
7,Dan McClintock,JR,1999,C,Northern Arizona,Big Sky,27,16.741,4.63,6.926,...,0,0,0,0,2000,2,53,DEN,53,0


Modeling


In [248]:
#columns:
#Player,Class,Season,Pos,School,Conf,G,MP,FG,FGA,2P,2PA,3P,3PA,FT,FTA,ORB,DRB,TRB,AST,STL,BLK,
#TOV,PF,PTS,PER,eFG%,WS,AP Player of the Year,NABC Defensive Player of the Year,NABC Player of the Year,
#Naismith Award,NCAA Tournament Most Outstanding Player,NIT Most Valuable Player,Rupp Trophy,
#Sporting News Player of the Year,UPI Player of the Year,USBWA Freshman of the Year,Year Drafted,Rd,Pk,Tm,,Projected Pk,Team Score

In [249]:
#features  33
#G,MP,FG,FGA,2P,2PA,3P,3PA,FT,FTA,ORB,DRB,TRB,AST,STL,BLK,
#TOV,PF,PTS,PER,eFG%,WS,AP Player of the Year,NABC Defensive Player of the Year,NABC Player of the Year,
#Naismith Award,NCAA Tournament Most Outstanding Player,NIT Most Valuable Player,Rupp Trophy,
#Sporting News Player of the Year,UPI Player of the Year,USBWA Freshman of the Year,Pk

In [250]:
#features and target
features = data.drop(['Player','Class','Season','Pos','School','Conf','Year Drafted','Rd','Tm','Projected Pk','Team Score'],1)
target = data['Pk']

In [251]:
#split training and testing data
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size = 0.40)

In [252]:
#standardize the features
scaler = preprocessing.StandardScaler().fit(features_train)
scaler.transform(features_train)
scaler.transform(features_test)

array([[-2.02419678, -1.32514144, -1.11100639, ...,  0.        ,
        -0.02504897,  0.14641655],
       [-1.93129503, -1.36979582, -1.11100639, ...,  0.        ,
        -0.02504897,  0.14641655],
       [-1.46678632, -1.22306153, -1.03514778, ...,  0.        ,
        -0.02504897,  0.14641655],
       ...,
       [-2.02419678, -1.4144502 , -1.11100639, ...,  0.        ,
        -0.02504897,  0.14641655],
       [ 0.66995375,  0.08298978,  0.37433684, ...,  0.        ,
        -0.02504897,  0.14641655],
       [ 0.57705201, -0.92477027, -0.94655767, ...,  0.        ,
        -0.02504897,  0.14641655]])

In [253]:
#Fit model by a multi-layer perceptron neural network using lbfgs optimization
model_MLP = MLPRegressor(hidden_layer_sizes = (33,), activation='identity', solver='lbfgs', alpha = 0.005, max_iter = 500, shuffle=True)
model_MLP.fit(features_train, target_train)

MLPRegressor(activation='identity', alpha=0.005, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(33,), learning_rate='constant',
             learning_rate_init=0.001, max_iter=500, momentum=0.9,
             n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
             random_state=None, shuffle=True, solver='lbfgs', tol=0.0001,
             validation_fraction=0.1, verbose=False, warm_start=False)

In [254]:
#Fit a model by Ridge Regression
model_ridge = Ridge(alpha=1.0)
model_ridge.fit(features_train, target_train)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [255]:
#Fit a model by a linear SVR
model_SVR = SVR(kernel = 'linear')
model_SVR.fit(features_train, target_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
    gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
    tol=0.001, verbose=False)

In [256]:
#Fit a model by Lasso Regression
model_lasso = linear_model.Lasso()
model_lasso.fit(features_train, target_train)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [257]:
#Fit model by linear regression
model_LR = LinearRegression()
model_LR.fit(features_train, target_train)
print(model_LR.intercept_)
print(model_LR.coef_)

1.5631940186722204e-13
[-4.15567180e-16 -1.94289029e-16  4.98469321e-13  1.59800992e-13
 -4.54147570e-13 -1.59811400e-13 -4.30683484e-13 -1.59604101e-13
  2.18852714e-14  3.47812057e-16  5.83517609e-16  5.45570533e-16
 -1.65058939e-15 -2.16710330e-15  9.96815477e-16  2.52900999e-15
  5.47738938e-16  7.90600224e-16 -2.27439595e-14 -4.52762827e-16
 -1.29968735e-15 -1.92445886e-15 -5.19744613e-14 -3.08274592e-14
  4.10732366e-14 -1.83292111e-14 -2.87319903e-14  3.13420105e-15
 -4.46565172e-15 -1.77132970e-15 -1.05879118e-22 -3.52867429e-14
  1.00000000e+00]


In [258]:
#Predict target on testing data
target_pred_MLP = model_MLP.predict(features_test)
target_pred_LR = model_LR.predict(features_test)
target_pred_ridge = model_ridge.predict(features_test)
target_pred_SVR = model_SVR.predict(features_test)
target_pred_lasso = model_lasso.predict(features_test)

In [259]:
#Importing NCAA and draft merge data
nba_draft_data = ncaa_draft_data
pro_features = nba_draft_data.drop(['Player','Class','Season','Pos','School','Conf','Year Drafted','Rd','Tm','Projected Pk','Team Score'], 1)
scaler.transform(pro_features)

array([[ 0.94865898,  0.96982576,  0.60880888, ...,  0.        ,
        -0.02504897,  0.14641655],
       [ 1.04156072,  0.10379872,  0.21519292, ...,  0.        ,
        -0.02504897,  0.14641655],
       [ 0.57705201,  0.61812787,  1.28517053, ...,  0.        ,
        -0.02504897,  0.14641655],
       ...,
       [ 0.01964156, -0.43205384, -0.1190093 , ...,  0.        ,
        -0.02504897,  0.14641655],
       [-2.02419678, -1.50375896, -1.11100639, ...,  0.        ,
        -0.02504897,  0.14641655],
       [ 0.66995375,  0.44022482,  0.42738481, ...,  0.        ,
        -0.02504897,  0.14641655]])

In [265]:
#Using the multi-layer perceptron neural network model to predict current prospects
MLPPrediction = model_MLP.predict(pro_features)
nba_draft_data['Projected Pk'] = MLPPrediction
nba_draft_data['Projected Pk'] = nba_draft_data['Projected Pk'].round()
MLPResults = nba_draft_data[['Player','Class','Season','Pos','School','Conf','Year Drafted','Rd','Pk','Tm','Projected Pk']]
#MLPResults.head(8)
mlpdiff = MLPResults[(MLPResults['Pk'] != MLPResults['Projected Pk'])]
mlpdiff.head()

Unnamed: 0,Player,Class,Season,Pos,School,Conf,Year Drafted,Rd,Pk,Tm,Projected Pk
13737,Emeka Okafor,JR,2011,F,Western Illinois,Summit,2004,1,2,CHA,1.0


In [261]:
#Using the Linear regression perceptron neural network model to predict current prospects
LRPrediction = model_LR.predict(pro_features)
nba_draft_data['Projected Pk'] = LRPrediction
nba_draft_data['Projected Pk'] = nba_draft_data['Projected Pk'].round()
LRResults = nba_draft_data[['Player','Class','Season','Pos','School','Conf','Year Drafted','Rd','Pk','Tm','Projected Pk']]
lrdiff = LRResults[(LRResults['Pk'] != LRResults['Projected Pk'])]
lrdiff.shape[0]


0

In [262]:
#Using the Ridge perceptron neural network model to predict current prospects
RidgePrediction = model_ridge.predict(pro_features)
nba_draft_data['Projected Pk'] = RidgePrediction
nba_draft_data['Projected Pk'] = nba_draft_data['Projected Pk'].round()
RidgeResults = nba_draft_data[['Player','Class','Season','Pos','School','Conf','Year Drafted','Rd','Pk','Tm','Projected Pk']]
ridgediff = RidgeResults[(LRResults['Pk'] != RidgeResults['Projected Pk'])]
ridgediff.shape[0]

0

In [263]:
#Using the lasso perceptron neural network model to predict current prospects
lassoPrediction = model_lasso.predict(pro_features)
nba_draft_data['Projected Pk'] = lassoPrediction
nba_draft_data['Projected Pk'] = nba_draft_data['Projected Pk'].round()
lassoResults = nba_draft_data[['Player','Class','Season','Pos','School','Conf','Year Drafted','Rd','Pk','Tm','Projected Pk']]
lassodiff = lassoResults[(lassoResults['Pk'] != lassoResults['Projected Pk'])]
lassodiff.shape[0]
# lassodiff.head(635)

647

In [264]:
#Using the SVR perceptron neural network model to predict current prospects
SVRPrediction = model_SVR.predict(pro_features)
nba_draft_data['Projected Pk'] = SVRPrediction
nba_draft_data['Projected Pk'] = nba_draft_data['Projected Pk'].round()
SVRResults = nba_draft_data[['Player','Class','Season','Pos','School','Conf','Year Drafted','Rd','Pk','Tm','Projected Pk']]
svrdiff = SVRResults[(SVRResults['Pk'] != SVRResults['Projected Pk'])]
svrdiff.shape[0]

0