In [None]:
# Import necessary packages

%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn import neighbors
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import KFold
from scipy.stats import linregress
from scipy.stats import norm
from operator import itemgetter
from sklearn.model_selection import cross_val_score

In [None]:
# Import the datasets

dfCollege = pd.read_csv('final-college-data.csv')
dfNBA = pd.read_csv('final-nba-data.csv')
dfCombined = pd.read_csv('final-data-combined.csv')

dfDraftClass = pd.read_csv('final-draft-class-data.csv')

In [None]:
# Preview the combined dataset

dfCombined.head()

In [None]:
# Preview the draft class dataset

dfDraftClass.head()

In [None]:
# Prepare draft class dataframe to be plugged into machine learning models by skipping columns that aren't parameters in the model

draftClassTest = dfDraftClass.iloc[:, [2, 3, 4, 6, 7, 9, 10, 11]]

draftClassNames = dfDraftClass.iloc[:, 0]

draftClassTest.head()

# Correlation between college and NBA distribution stats

In [None]:
plt.style.use('fivethirtyeight')
 
ast, ax = plt.subplots()

ax.scatter(dfCollege['AST'], dfNBA['AST'], color = 'orange')
ax.axvline(x = np.mean(dfCollege['AST']), color = 'black')
ax.axhline(y = np.mean(dfNBA['AST']), label = "Average", color = 'black')
ast.suptitle("Correlation between college and NBA AST/G", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("College AST/G")
ax.set_ylabel("NBA AST/G")
 
ax.plot(np.unique(dfCollege['AST']), np.poly1d(np.polyfit(dfCollege['AST'], dfNBA['AST'], 1))(np.unique(dfCollege['AST'])))

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
 
slope, intercept, r_value, p_value, std_err = linregress(dfCollege['AST'], dfNBA['AST'])
print("College and NBA AST: slope =", slope, ", intercept =", intercept, ", r_value =", r_value,
    ", p_value =", p_value, ", std_err =", std_err)
rsqaured = r_value ** 2
rpString = "r = " + str(round(r_value, 3)) + ", p = " + str(round(p_value, 9)) + ", rsquared = " + str(round(rsqaured, 3))

ax.set_title("http://dribbleanalytics.blogspot.com.\n%s"%rpString, size = 12, fontname = 'Rockwell')
ast.savefig('ast-correl.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')
 
tov, ax = plt.subplots()

ax.scatter(dfCollege['TOV'], dfNBA['TOV'], color = 'orange')
ax.axvline(x = np.mean(dfCollege['TOV']), color = 'black')
ax.axhline(y = np.mean(dfNBA['TOV']), label = "Average", color = 'black')
tov.suptitle("Correlation between college and NBA TOV/G", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("College TOV/G")
ax.set_ylabel("NBA TOV/G")
 
ax.plot(np.unique(dfCollege['TOV']), np.poly1d(np.polyfit(dfCollege['TOV'], dfNBA['TOV'], 1))(np.unique(dfCollege['TOV'])))

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
 
slope, intercept, r_value, p_value, std_err = linregress(dfCollege['TOV'], dfNBA['TOV'])
print("College and NBA AST: slope =", slope, ", intercept =", intercept, ", r_value =", r_value,
    ", p_value =", p_value, ", std_err =", std_err)
rsqaured = r_value ** 2
rpString = "r = " + str(round(r_value, 3)) + ", p = " + str(round(p_value, 5)) + ", rsquared = " + str(round(rsqaured, 3))

ax.set_title("http://dribbleanalytics.blogspot.com.\n%s"%rpString, size = 12, fontname = 'Rockwell')
tov.savefig('tov-correl.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')
 
asttov, ax = plt.subplots()

ax.scatter(dfCollege['AST/TOV'], dfNBA['AST/TOV'], color = 'orange')
ax.axvline(x = np.mean(dfCollege['AST/TOV']), color = 'black')
ax.axhline(y = np.mean(dfNBA['AST/TOV']), label = "Average", color = 'black')
asttov.suptitle("Correlation between college and NBA AST/TOV", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("College AST/TOV")
ax.set_ylabel("NBA AST/TOV")
 
ax.plot(np.unique(dfCollege['AST/TOV']), np.poly1d(np.polyfit(dfCollege['AST/TOV'], dfNBA['AST/TOV'], 1))(np.unique(dfCollege['AST/TOV'])))

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
 
slope, intercept, r_value, p_value, std_err = linregress(dfCollege['AST/TOV'], dfNBA['AST/TOV'])
print("College and NBA AST: slope =", slope, ", intercept =", intercept, ", r_value =", r_value,
    ", p_value =", p_value, ", std_err =", std_err)
rsqaured = r_value ** 2
rpString = "r = " + str(round(r_value, 3)) + ", p = " + str(round(p_value, 13)) + ", rsquared = " + str(round(rsqaured, 3))

ax.set_title("http://dribbleanalytics.blogspot.com.\n%s"%rpString, size = 12, fontname = 'Rockwell')
asttov.savefig('asttov-correl.png', dpi = 400, bbox_inches = 'tight')

# Histogram of assists and turnovers

In [None]:
plt.style.use('fivethirtyeight')
astHistCollege, ax = plt.subplots()

ax.hist(dfCollege['AST'], bins = 16, edgecolor = 'white', linewidth = 3, normed = True, label = "Actual distribution")
astHistCollege.suptitle("Histogram of sample's college AST/G", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("AST/G")
ax.set_ylabel("Frequency")

overall_mean = dfCollege['AST'].mean()
overall_std = dfCollege['AST'].std()

xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, overall_mean, overall_std)
ax.plot(x, p, 'k', linewidth=5, color='orange', label = "Normal distribution")

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("http://dribbleanalytics.blogspot.com.\nmean = %.3f, std = %.3f"%(overall_mean, overall_std), size = 12, fontname = 'Rockwell')

astHistCollege.savefig('ast-hist-college.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')
astHistNBA, ax = plt.subplots()

ax.hist(dfNBA['AST'], bins = 16, edgecolor = 'white', linewidth = 3, normed = True, label = "Actual distribution")
astHistNBA.suptitle("Histogram of sample's NBA AST/G", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("AST/G")
ax.set_ylabel("Frequency")

overall_mean = dfNBA['AST'].mean()
overall_std = dfNBA['AST'].std()

xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, overall_mean, overall_std)
ax.plot(x, p, 'k', linewidth=5, color='orange', label = "Normal distribution")

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("http://dribbleanalytics.blogspot.com.\nmean = %.3f, std = %.3f"%(overall_mean, overall_std), size = 12, fontname = 'Rockwell')

astHistNBA.savefig('ast-hist-nba.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')
astHistDraftClass, ax = plt.subplots()

ax.hist(dfDraftClass['AST'], bins = 10, edgecolor = 'white', linewidth = 3, normed = True, label = "Actual distribution")
astHistDraftClass.suptitle("Histogram of draft class's AST/G", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("AST/G")
ax.set_ylabel("Frequency")

overall_mean = dfDraftClass['AST'].mean()
overall_std = dfDraftClass['AST'].std()

xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, overall_mean, overall_std)
ax.plot(x, p, 'k', linewidth=5, color='orange', label = "Normal distribution")

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("http://dribbleanalytics.blogspot.com.\nmean = %.3f, std = %.3f"%(overall_mean, overall_std), size = 12, fontname = 'Rockwell')

astHistDraftClass.savefig('ast-hist-draft-class.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')
tovHistCollege, ax = plt.subplots()

ax.hist(dfCollege['TOV'], bins = 16, edgecolor = 'white', linewidth = 3, normed = True, label = "Actual distribution")
tovHistCollege.suptitle("Histogram of sample's college TOV/G", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("TOV/G")
ax.set_ylabel("Frequency")

overall_mean = dfCollege['TOV'].mean()
overall_std = dfCollege['TOV'].std()

xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, overall_mean, overall_std)
ax.plot(x, p, 'k', linewidth=5, color='orange', label = "Normal distribution")

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("http://dribbleanalytics.blogspot.com.\nmean = %.3f, std = %.3f"%(overall_mean, overall_std), size = 12, fontname = 'Rockwell')

tovHistCollege.savefig('tov-hist-college.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')
tovHistNBA, ax = plt.subplots()

ax.hist(dfNBA['TOV'], bins = 16, edgecolor = 'white', linewidth = 3, normed = True, label = "Actual distribution")
tovHistNBA.suptitle("Histogram of sample's NBA TOV/G", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("TOV/G")
ax.set_ylabel("Frequency")

overall_mean = dfNBA['TOV'].mean()
overall_std = dfNBA['TOV'].std()

xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, overall_mean, overall_std)
ax.plot(x, p, 'k', linewidth=5, color='orange', label = "Normal distribution")

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("http://dribbleanalytics.blogspot.com.\nmean = %.3f, std = %.3f"%(overall_mean, overall_std), size = 12, fontname = 'Rockwell')

tovHistNBA.savefig('tov-hist-nba.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')
tovHistDraftClass, ax = plt.subplots()

ax.hist(dfDraftClass['TOV'], bins = 10, edgecolor = 'white', linewidth = 3, normed = True, label = "Actual distribution")
tovHistDraftClass.suptitle("Histogram of draft class's TOV/G", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("TOV/G")
ax.set_ylabel("Frequency")

overall_mean = dfDraftClass['TOV'].mean()
overall_std = dfDraftClass['TOV'].std()

xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, overall_mean, overall_std)
ax.plot(x, p, 'k', linewidth=5, color='orange', label = "Normal distribution")

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("http://dribbleanalytics.blogspot.com.\nmean = %.3f, std = %.3f"%(overall_mean, overall_std), size = 12, fontname = 'Rockwell')

tovHistDraftClass.savefig('tov-hist-draft-class.png', dpi = 400, bbox_inches = 'tight')

# Creating assists model

In [None]:
train, test = train_test_split(dfCombined, test_size = 0.2, random_state = 65)

xtrain = train[['Pk', 'Age at Draft', 'Height (in)', 'AST-Col', 'TOV-Col', 'AST%-Col', 'TOV%-Col', 'SOS-Col']]
ytrain = train[['AST-NBA']]
 
xtest = test[['Pk', 'Age at Draft', 'Height (in)', 'AST-Col', 'TOV-Col', 'AST%-Col', 'TOV%-Col', 'SOS-Col']]
ytest = test[['AST-NBA']]

kf = KFold(n_splits = 4, random_state = 0)

In [None]:
linReg = linear_model.LinearRegression(fit_intercept = True)
linReg.fit(xtrain, ytrain)

y_predLin = linReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predLin))
print('R2 score: %.3f' % r2_score(ytest, y_predLin))

cvScoreLin = cross_val_score(linReg, xtest, ytest, cv = kf, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreLin.mean(), cvScoreLin.std() * 2))

In [None]:
svr_rbf = SVR(kernel='rbf', gamma=1e-5, C=1000, epsilon=1)
svr_rbf.fit(xtrain, ytrain.values.ravel())

y_rbf = svr_rbf.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_rbf))
print('R2 score: %.3f' % r2_score(ytest, y_rbf))

cvScoreSVM = cross_val_score(svr_rbf, xtest, ytest.values.ravel(), cv = kf, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreSVM.mean(), cvScoreSVM.std() * 2))

In [None]:
rf = RandomForestRegressor(n_estimators = 50, random_state = 7)
rf.fit(xtrain, ytrain.values.ravel())

y_rf = rf.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_rf))
print('R2 score: %.3f' % r2_score(ytest, y_rf))

cvScoreRF = cross_val_score(rf, xtest, ytest.values.ravel(), cv = kf, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreRF.mean(), cvScoreRF.std() * 2))

In [None]:
knn = neighbors.KNeighborsRegressor(n_neighbors = 10)
knn.fit(xtrain, ytrain)

y_knn = knn.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_knn))
print('R2 score: %.3f' % r2_score(ytest, y_knn))

cvScoreKNN = cross_val_score(knn, xtest, ytest, cv = kf, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreKNN.mean(), cvScoreKNN.std() * 2))

# R2 and MSE graphs for assists

In [None]:
plt.style.use('fivethirtyeight')
mseAST, ax = plt.subplots()

mseScores = [mean_squared_error(ytest, y_predLin), mean_squared_error(ytest, y_rbf), mean_squared_error(ytest, y_rf), mean_squared_error(ytest, y_knn)]
x_pos = np.arange(len(mseScores))

ax.bar(x_pos, mseScores, edgecolor = 'white', linewidth = 3)

mseNames = ["Linear", "Support vector", "Random forest", "k-Nearest Neighbors"]

labels = [i for i in mseNames]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .075
    ax.text(rect.get_x() + rect.get_width() / 1.8, height, label,
            ha='center', va='bottom', rotation = 'vertical', color = 'white', size = 16)

mseAST.suptitle("Mean squared error (MSE) of regressions", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.set_ylabel("MSE (lower is better)")

mseAST.savefig('mse-ast.png', dpi = 400, bbox_inches = 'tight')

In [None]:
r2ast, ax = plt.subplots()

r2scores = [r2_score(ytest, y_predLin), r2_score(ytest, y_rbf), r2_score(ytest, y_rf), r2_score(ytest, y_knn)]
x_pos = np.arange(len(r2scores))

ax.bar(x_pos, r2scores, edgecolor = 'white', linewidth = 3)

r2names = ["Linear", "Support vector", "Random forest", "k-Nearest\nNeighbors"]

labels = [i for i in r2names]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .025
    ax.text(rect.get_x() + rect.get_width() / 1.8, height, label,
            ha='center', va='bottom', rotation = 'vertical', color = 'white', size = 16)

r2ast.suptitle("Variance score of regressions", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.set_ylabel(r"R$^{\rm 2}$ (higher is better)")

r2ast.savefig('r2-ast.png', dpi = 400, bbox_inches = 'tight')

# Standardized residuals test for assists

In [None]:
plt.style.use('fivethirtyeight')

linResid = ytest.subtract(y_predLin)

linList = []
sumResid = 0

for i in linResid.values:
    linList.append(i)

for i in linList:
    if i < 0:
        sumResid += (i * -1)
    elif i > 0:
        sumResid+= i
        
print(sumResid)
residSqrt = sumResid ** (1/2)
print(residSqrt)

linResidArray = []
for i in linList:
    linResidArray.append(i / residSqrt)

linResidList = []
for i in linResidArray:
    linResidList.append(i[0])

linResidAst, ax = plt.subplots()

x = np.arange(len(linResidList))

ax.scatter(x, linResidList)

ax.xaxis.set_visible(False)
ax.set_ylabel("Standardized residuals")

npList = np.array(linResidList)
std = np.std(npList)
mean = np.mean(npList)

print(std, mean)

outliers = 0

for i in linResidList:
    if i < mean - 2 * std:
        outliers += 1
    elif i > mean + 2 * std:
        outliers += 1
        
outlierPerc = outliers / 27
distPerc = 1 - outlierPerc

ax.axhline(y = mean, label = "Average")
ax.axhline(y = mean - 2 * std, color = 'orange')
ax.axhline(y = mean + 2 * std, label = "2 stdev from mean", color = 'orange')
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("Percentage of data within 2 standard deviations of the mean: %.3f" % distPerc, fontname = 'Rockwell', size = 12)
linResidAst.suptitle("Standardized residual analysis of LR", y = 1.005)
linResidAst.savefig('lin-resid-ast.png', dpi = 400, bbox_inches = 'tight')

In [None]:
svrResid = y_rbf - ytest['AST-NBA']

svrList = []
sumResid = 0

for i in svrResid.values:
    svrList.append(i)

for i in svrList:
    if i < 0:
        sumResid += (i * -1)
    elif i > 0:
        sumResid+= i
        
print(sumResid)
residSqrt = sumResid ** (1/2)
print(residSqrt)

svrResidArray = []
for i in svrList:
    svrResidArray.append(i / residSqrt)

svrResidList = []
for i in svrResidArray:
    svrResidList.append(i)    

svrResidAst, ax = plt.subplots()

x = np.arange(len(svrResidList))

ax.scatter(x, svrResidList)

ax.xaxis.set_visible(False)
ax.set_ylabel("Standardized residuals")

npList = np.array(svrResidList)
std = np.std(npList)
mean = np.mean(npList)

print(std, mean)

outliers = 0

for i in svrResidList:
    if i < mean - 2 * std:
        outliers += 1
    elif i > mean + 2 * std:
        outliers += 1
        
outlierPerc = outliers / 27
distPerc = 1 - outlierPerc

ax.axhline(y = mean, label = "Average")
ax.axhline(y = mean - 2 * std, color = 'orange')
ax.axhline(y = mean + 2 * std, label = "2 stdev from mean", color = 'orange')
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("Percentage of data within 2 standard deviations of the mean: %.3f" % distPerc, fontname = 'Rockwell', size = 12)
svrResidAst.suptitle("Standardized residual analysis of SVR", y = 1.005)
svrResidAst.savefig('svr-resid-ast.png', dpi = 400, bbox_inches = 'tight')

In [None]:
rfResid = y_rf - ytest['AST-NBA']

rfList = []
sumResid = 0

for i in rfResid.values:
    rfList.append(i)

for i in rfList:
    if i < 0:
        sumResid += (i * -1)
    elif i > 0:
        sumResid+= i
        
print(sumResid)
residSqrt = sumResid ** (1/2)
print(residSqrt)

rfResidArray = []
for i in rfList:
    rfResidArray.append(i / residSqrt)

rfResidList = []
for i in rfResidArray:
    rfResidList.append(i)    
    
rfResidAst, ax = plt.subplots()

x = np.arange(len(rfResidList))

ax.scatter(x, rfResidList)

ax.xaxis.set_visible(False)
ax.set_ylabel("Standardized residuals")

npList = np.array(rfResidList)
std = np.std(npList)
mean = np.mean(npList)

print(std, mean)

outliers = 0

for i in rfResidList:
    if i < mean - 2 * std:
        outliers += 1
    elif i > mean + 2 * std:
        outliers += 1
        
outlierPerc = outliers / 27
distPerc = 1 - outlierPerc

ax.axhline(y = mean, label = "Average")
ax.axhline(y = mean - 2 * std, color = 'orange')
ax.axhline(y = mean + 2 * std, label = "2 stdev from mean", color = 'orange')
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("Percentage of data within 2 standard deviations of the mean: %.3f" % distPerc, fontname = 'Rockwell', size = 12)
rfResidAst.suptitle("Standardized residual analysis of RF", y = 1.005)
rfResidAst.savefig('rf-resid-ast.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')

knnResid = ytest.subtract(y_knn)

knnList = []
sumResid = 0

for i in knnResid.values:
    knnList.append(i)

for i in knnList:
    if i < 0:
        sumResid += (i * -1)
    elif i > 0:
        sumResid+= i
        
print(sumResid)
residSqrt = sumResid ** (1/2)
print(residSqrt)

knnResidArray = []
for i in knnList:
    knnResidArray.append(i / residSqrt)

knnResidList = []
for i in knnResidArray:
    knnResidList.append(i[0])    
    
knnResidAst, ax = plt.subplots()

x = np.arange(len(knnResidList))

ax.scatter(x, knnResidList)

ax.xaxis.set_visible(False)
ax.set_ylabel("Standardized residuals")

npList = np.array(knnResidList)
std = np.std(npList)
mean = np.mean(npList)

print(std, mean)

outliers = 0

for i in knnResidList:
    if i < mean - 2 * std:
        outliers += 1
    elif i > mean + 2 * std:
        outliers += 1
        
outlierPerc = outliers / 27
distPerc = 1 - outlierPerc

ax.axhline(y = mean, label = "Average")
ax.axhline(y = mean - 2 * std, color = 'orange')
ax.axhline(y = mean + 2 * std, label = "2 stdev from mean", color = 'orange')
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("Percentage of data within 2 standard deviations of the mean: %.3f" % distPerc, fontname = 'Rockwell', size = 10)
knnResidAst.suptitle("Standardized residual analysis of KNN", y = 1.005)
knnResidAst.savefig('knn-resid-ast.png', dpi = 400, bbox_inches = 'tight')

# Durbin-Watson test for assists

In [None]:
from statsmodels.stats import stattools as stools

print(stools.durbin_watson(linResidList))
print(stools.durbin_watson(svrResidList))
print(stools.durbin_watson(rfResidList))
print(stools.durbin_watson(knnResidList))

# Jarque-Bera test for assists

In [None]:
print(stools.jarque_bera(linResidList))
print(stools.jarque_bera(svrResidList))
print(stools.jarque_bera(rfResidList))
print(stools.jarque_bera(knnResidList))

# Predictions for assists

In [None]:
linear_ast = linReg.predict(draftClassTest)

for i, j in zip(linear_ast, draftClassNames):
    print(i, j)

In [None]:
svr_ast = svr_rbf.predict(draftClassTest)

for i, j in zip(svr_ast, draftClassNames):
    print(i, j)

In [None]:
rf_ast = rf.predict(draftClassTest)

for i, j in zip(rf_ast, draftClassNames):
    print(i, j)

In [None]:
knn_ast = knn.predict(draftClassTest)

for i, j in zip(knn_ast, draftClassNames):
    print(i, j)

In [None]:
avgAst = 3.071429

plt.style.use('fivethirtyeight')
linRegAst, ax = plt.subplots()

linearPlot = []

for i in linear_ast:
    linearPlot.append(float(i))
    
combinedLinear = [[i, j] for i, j in zip(draftClassNames, linearPlot)]

sortedLinear = sorted(combinedLinear, key = itemgetter(1), reverse = True)
print(sortedLinear)

sortedLinearData = [row[1] for row in sortedLinear]
x_pos = np.arange(len(sortedLinearData))

colorList = []

for i in sortedLinearData:
    if i < avgAst:
        colorList.append('lightcoral')
    elif i > avgAst:
        colorList.append('lightgreen')

ax.bar(x_pos, sortedLinearData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedLinear]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .25
    ax.text(rect.get_x() + rect.get_width() / 1.75, height, label,
    ha='center', va='bottom', rotation = 'vertical', color = 'black')
    
linRegAst.suptitle("Linear regression predicted AST/G", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.axhline(y = avgAst, color = 'black', label = "Average of NBA sample", linestyle = 'dotted', alpha = .25)
ax.set_ylabel("Predicted AST")
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})

linRegAst.savefig('lin-reg-ast.png', dpi = 400, bbox_inches = 'tight')

In [None]:
avgAst = 3.071429

plt.style.use('fivethirtyeight')
svrAst, ax = plt.subplots()

svrPlot = []

for i in svr_ast:
    svrPlot.append(float(i))
    
combinedSvr = [[i, j] for i, j in zip(draftClassNames, svrPlot)]

sortedSvr = sorted(combinedSvr, key = itemgetter(1), reverse = True)
print(sortedSvr)

sortedSvrData = [row[1] for row in sortedSvr]
x_pos = np.arange(len(sortedSvrData))

colorList = []

for i in sortedSvrData:
    if i < avgAst:
        colorList.append('lightcoral')
    elif i > avgAst:
        colorList.append('lightgreen')

ax.bar(x_pos, sortedSvrData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedSvr]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .25
    ax.text(rect.get_x() + rect.get_width() / 1.75, height, label,
    ha='center', va='bottom', rotation = 'vertical', color = 'black')
    
svrAst.suptitle("SVR predicted AST/G", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.axhline(y = avgAst, color = 'black', label = "Average of NBA sample", linestyle = 'dotted', alpha = .25)
ax.set_ylabel("Predicted AST")
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})

svrAst.savefig('svr-ast.png', dpi = 400, bbox_inches = 'tight')

In [None]:
avgAst = 3.071429

plt.style.use('fivethirtyeight')
rfAst, ax = plt.subplots()

rfPlot = []

for i in rf_ast:
    rfPlot.append(float(i))
    
combinedRf = [[i, j] for i, j in zip(draftClassNames, rfPlot)]

sortedRf = sorted(combinedRf, key = itemgetter(1), reverse = True)
print(sortedRf)

sortedRfData = [row[1] for row in sortedRf]
x_pos = np.arange(len(sortedRfData))

colorList = []

for i in sortedRfData:
    if i < avgAst:
        colorList.append('lightcoral')
    elif i > avgAst:
        colorList.append('lightgreen')

ax.bar(x_pos, sortedRfData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedRf]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .25
    ax.text(rect.get_x() + rect.get_width() / 1.75, height, label,
    ha='center', va='bottom', rotation = 'vertical', color = 'black')
    
rfAst.suptitle("Random forest predicted AST/G", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.axhline(y = avgAst, color = 'black', label = "Average of NBA sample", linestyle = 'dotted', alpha = .25)
ax.set_ylabel("Predicted AST")
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})

rfAst.savefig('rf-ast.png', dpi = 400, bbox_inches = 'tight')

In [None]:
avgAst = 3.071429

plt.style.use('fivethirtyeight')
knnAst, ax = plt.subplots()

knnPlot = []

for i in knn_ast:
    knnPlot.append(float(i))
    
combinedKnn = [[i, j] for i, j in zip(draftClassNames, knnPlot)]

sortedKnn = sorted(combinedKnn, key = itemgetter(1), reverse = True)
print(sortedKnn)

sortedKnnData = [row[1] for row in sortedKnn]
x_pos = np.arange(len(sortedKnnData))

colorList = []

for i in sortedKnnData:
    if i < avgAst:
        colorList.append('lightcoral')
    elif i > avgAst:
        colorList.append('lightgreen')

ax.bar(x_pos, sortedKnnData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedKnn]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .25
    ax.text(rect.get_x() + rect.get_width() / 1.75, height, label,
    ha='center', va='bottom', rotation = 'vertical', color = 'black')
    
knnAst.suptitle("k-NN predicted AST/G", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.axhline(y = avgAst, color = 'black', label = "Average of NBA sample", linestyle = 'dotted', alpha = .25)
ax.set_ylabel("Predicted AST")
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})

knnAst.savefig('knn-ast.png', dpi = 400, bbox_inches = 'tight')

In [None]:
avgAst = 3.071429

avgAstGraph, ax = plt.subplots()

averagePred = []

for i, j, h, k in zip(linear_ast, svr_ast, rf_ast, knn_ast):
    averagePred.append(float((i + j + h + k) / 4))

combinedAvg = [[i, j] for i, j in zip(draftClassNames, averagePred)]

sortedAvg = sorted(combinedAvg, key = itemgetter(1), reverse = True)
print(sortedAvg)

sortedAvgData = [row[1] for row in sortedAvg]
x_pos = np.arange(len(sortedAvgData))

colorList = []

for i in sortedAvgData:
    if i < avgAst:
        colorList.append('lightcoral')
    elif i > avgAst:
        colorList.append('lightgreen')

ax.bar(x_pos, sortedAvgData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedAvg]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .25
    ax.text(rect.get_x() + rect.get_width() / 1.75, height, label,
    ha='center', va='bottom', rotation = 'vertical', color = 'black')

avgAstGraph.suptitle("4-model average predicted AST/G", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.axhline(y = avgAst, color = 'black', label = "Average of NBA sample", linestyle = 'dotted', alpha = .25)
ax.set_ylabel("Predicted AST")
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})

avgAstGraph.savefig('avg-ast.png', dpi = 400, bbox_inches = 'tight')

# Creating turnover models

In [None]:
train, test = train_test_split(dfCombined, test_size = 0.2, random_state = 65)

xtrain = train[['Pk', 'Age at Draft', 'Height (in)', 'AST-Col', 'TOV-Col', 'AST%-Col', 'TOV%-Col', 'SOS-Col']]
ytrain = train[['TOV-NBA']]
 
xtest = test[['Pk', 'Age at Draft', 'Height (in)', 'AST-Col', 'TOV-Col', 'AST%-Col', 'TOV%-Col', 'SOS-Col']]
ytest = test[['TOV-NBA']]

kf = KFold(n_splits = 4, random_state = 0)

In [None]:
linReg = linear_model.LinearRegression(fit_intercept = True)
linReg.fit(xtrain, ytrain)

y_predLin = linReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predLin))
print('R2 score: %.3f' % r2_score(ytest, y_predLin))

cvScoreLin = cross_val_score(linReg, xtest, ytest, cv = kf, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreLin.mean(), cvScoreLin.std() * 2))

In [None]:
svr_rbf = SVR(kernel='rbf', gamma=1e-5, C=1000, epsilon=1)
svr_rbf.fit(xtrain, ytrain.values.ravel())

y_rbf = svr_rbf.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_rbf))
print('R2 score: %.3f' % r2_score(ytest, y_rbf))

cvScoreSVM = cross_val_score(svr_rbf, xtest, ytest.values.ravel(), cv = kf, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreSVM.mean(), cvScoreSVM.std() * 2))

In [None]:
rf = RandomForestRegressor(n_estimators = 50, random_state = 7)
rf.fit(xtrain, ytrain.values.ravel())

y_rf = rf.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_rf))
print('R2 score: %.3f' % r2_score(ytest, y_rf))

cvScoreRF = cross_val_score(rf, xtest, ytest.values.ravel(), cv = kf, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreRF.mean(), cvScoreRF.std() * 2))

In [None]:
knn = neighbors.KNeighborsRegressor(n_neighbors = 10)
knn.fit(xtrain, ytrain)

y_knn = knn.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_knn))
print('R2 score: %.3f' % r2_score(ytest, y_knn))

cvScoreKNN = cross_val_score(knn, xtest, ytest, cv = kf, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreKNN.mean(), cvScoreKNN.std() * 2))

# R2 and MSE graphs for turnovers

In [None]:
plt.style.use('fivethirtyeight')
mseTOV, ax = plt.subplots()

mseScores = [mean_squared_error(ytest, y_predLin), mean_squared_error(ytest, y_rbf), mean_squared_error(ytest, y_rf), mean_squared_error(ytest, y_knn)]
x_pos = np.arange(len(mseScores))

ax.bar(x_pos, mseScores, edgecolor = 'white', linewidth = 3)

mseNames = ["Linear", "Support vector", "Random forest", "k-Nearest Neighbors"]

labels = [i for i in mseNames]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .025
    ax.text(rect.get_x() + rect.get_width() / 1.8, height, label,
            ha='center', va='bottom', rotation = 'vertical', color = 'white', size = 16)

mseTOV.suptitle("Mean squared error (MSE) of regressions", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.set_ylabel("MSE (lower is better)")

mseTOV.savefig('mse-tov.png', dpi = 400, bbox_inches = 'tight')

In [None]:
r2tov, ax = plt.subplots()

r2scores = [r2_score(ytest, y_predLin), r2_score(ytest, y_rbf), r2_score(ytest, y_rf), r2_score(ytest, y_knn)]
x_pos = np.arange(len(r2scores))

ax.bar(x_pos, r2scores, edgecolor = 'white', linewidth = 3)

r2names = ["Linear", "Support vector", "Random\nforest", "k-Nearest\nNeighbors"]

labels = [i for i in r2names]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .025
    ax.text(rect.get_x() + rect.get_width() / 1.8, height, label,
            ha='center', va='bottom', rotation = 'vertical', color = 'white', size = 16)

r2tov.suptitle("Variance score of regressions", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.set_ylabel(r"R$^{\rm 2}$ (higher is better)")

r2tov.savefig('r2-tov.png', dpi = 400, bbox_inches = 'tight')

# Standardized residuals test for turnovers

In [None]:
plt.style.use('fivethirtyeight')

linResid = ytest.subtract(y_predLin)

linList = []
sumResid = 0

for i in linResid.values:
    linList.append(i)

for i in linList:
    if i < 0:
        sumResid += (i * -1)
    elif i > 0:
        sumResid+= i
        
print(sumResid)
residSqrt = sumResid ** (1/2)
print(residSqrt)

linResidArray = []
for i in linList:
    linResidArray.append(i / residSqrt)

linResidList = []
for i in linResidArray:
    linResidList.append(i[0])

linResidTov, ax = plt.subplots()

x = np.arange(len(linResidList))

ax.scatter(x, linResidList)

ax.xaxis.set_visible(False)
ax.set_ylabel("Standardized residuals")

npList = np.array(linResidList)
std = np.std(npList)
mean = np.mean(npList)

print(std, mean)

outliers = 0

for i in linResidList:
    if i < mean - 2 * std:
        outliers += 1
    elif i > mean + 2 * std:
        outliers += 1
        
outlierPerc = outliers / 27
distPerc = 1 - outlierPerc

ax.axhline(y = mean, label = "Average")
ax.axhline(y = mean - 2 * std, color = 'orange')
ax.axhline(y = mean + 2 * std, label = "2 stdev from mean", color = 'orange')
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("Percentage of data within 2 standard deviations of the mean: %.3f" % distPerc, fontname = 'Rockwell', size = 12)
linResidTov.suptitle("Standardized residual analysis of LR", y = 1.005)

linResidTov.savefig('lin-resid-tov.png', dpi = 400, bbox_inches = 'tight')

In [None]:
svrResid = y_rbf - ytest['TOV-NBA']

svrList = []
sumResid = 0

for i in svrResid.values:
    svrList.append(i)

for i in svrList:
    if i < 0:
        sumResid += (i * -1)
    elif i > 0:
        sumResid+= i
        
print(sumResid)
residSqrt = sumResid ** (1/2)
print(residSqrt)

svrResidArray = []
for i in svrList:
    svrResidArray.append(i / residSqrt)

svrResidList = []
for i in svrResidArray:
    svrResidList.append(i)    

svrResidTov, ax = plt.subplots()

x = np.arange(len(svrResidList))

ax.scatter(x, svrResidList)

ax.xaxis.set_visible(False)
ax.set_ylabel("Standardized residuals")

npList = np.array(svrResidList)
std = np.std(npList)
mean = np.mean(npList)

print(std, mean)

outliers = 0

for i in svrResidList:
    if i < mean - 2 * std:
        outliers += 1
    elif i > mean + 2 * std:
        outliers += 1
        
outlierPerc = outliers / 27
distPerc = 1 - outlierPerc

ax.axhline(y = mean, label = "Average")
ax.axhline(y = mean - 2 * std, color = 'orange')
ax.axhline(y = mean + 2 * std, label = "2 stdev from mean", color = 'orange')
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("Percentage of data within 2 standard deviations of the mean: %.3f" % distPerc, fontname = 'Rockwell', size = 12)
svrResidTov.suptitle("Standardized residual analysis of SVR", y = 1.005)

svrResidTov.savefig('svr-resid-tov.png', dpi = 400, bbox_inches = 'tight')

In [None]:
rfResid = y_rf - ytest['TOV-NBA']

rfList = []
sumResid = 0

for i in rfResid.values:
    rfList.append(i)

for i in rfList:
    if i < 0:
        sumResid += (i * -1)
    elif i > 0:
        sumResid+= i
        
print(sumResid)
residSqrt = sumResid ** (1/2)
print(residSqrt)

rfResidArray = []
for i in rfList:
    rfResidArray.append(i / residSqrt)

rfResidList = []
for i in rfResidArray:
    rfResidList.append(i)    
    
rfResidTov, ax = plt.subplots()

x = np.arange(len(rfResidList))

ax.scatter(x, rfResidList)

ax.xaxis.set_visible(False)
ax.set_ylabel("Standardized residuals")

npList = np.array(rfResidList)
std = np.std(npList)
mean = np.mean(npList)

print(std, mean)

outliers = 0

for i in rfResidList:
    if i < mean - 2 * std:
        outliers += 1
    elif i > mean + 2 * std:
        outliers += 1
        
outlierPerc = outliers / 27
distPerc = 1 - outlierPerc

ax.axhline(y = mean, label = "Average")
ax.axhline(y = mean - 2 * std, color = 'orange')
ax.axhline(y = mean + 2 * std, label = "2 stdev from mean", color = 'orange')
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("Percentage of data within 2 standard deviations of the mean: %.3f" % distPerc, fontname = 'Rockwell', size = 12)
rfResidTov.suptitle("Standardized residual analysis of RF", y = 1.005)

rfResidTov.savefig('rf-resid-tov.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')

knnResid = ytest.subtract(y_knn)

knnList = []
sumResid = 0

for i in knnResid.values:
    knnList.append(i)

for i in knnList:
    if i < 0:
        sumResid += (i * -1)
    elif i > 0:
        sumResid+= i
        
print(sumResid)
residSqrt = sumResid ** (1/2)
print(residSqrt)

knnResidArray = []
for i in knnList:
    knnResidArray.append(i / residSqrt)

knnResidList = []
for i in knnResidArray:
    knnResidList.append(i[0])    
    
knnResidTov, ax = plt.subplots()

x = np.arange(len(knnResidList))

ax.scatter(x, knnResidList)

ax.xaxis.set_visible(False)
ax.set_ylabel("Standardized residuals")

npList = np.array(knnResidList)
std = np.std(npList)
mean = np.mean(npList)

print(std, mean)

outliers = 0

for i in knnResidList:
    if i < mean - 2 * std:
        outliers += 1
    elif i > mean + 2 * std:
        outliers += 1
        
outlierPerc = outliers / 27
distPerc = 1 - outlierPerc

ax.axhline(y = mean, label = "Average")
ax.axhline(y = mean - 2 * std, color = 'orange')
ax.axhline(y = mean + 2 * std, label = "2 stdev from mean", color = 'orange')
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("Percentage of data within 2 standard deviations of the mean: %.3f" % distPerc, fontname = 'Rockwell', size = 10)
knnResidTov.suptitle("Standardized residual analysis of KNN", y = 1.005)

knnResidTov.savefig('knn-resid-tov.png', dpi = 400, bbox_inches = 'tight')

# Durbin-Watson test for turnovers

In [None]:
print(stools.durbin_watson(linResidList))
print(stools.durbin_watson(svrResidList))
print(stools.durbin_watson(rfResidList))
print(stools.durbin_watson(knnResidList))

# Jarque-Bera test for turnovers

In [None]:
print(stools.jarque_bera(linResidList))
print(stools.jarque_bera(svrResidList))
print(stools.jarque_bera(rfResidList))
print(stools.jarque_bera(knnResidList))

# Predictions for turnovers

In [None]:
linear_tov = linReg.predict(draftClassTest)

for i, j in zip(linear_tov, draftClassNames):
    print(i, j)

In [None]:
svr_tov = svr_rbf.predict(draftClassTest)

for i, j in zip(svr_tov, draftClassNames):
    print(i, j)

In [None]:
rf_tov = rf.predict(draftClassTest)

for i, j in zip(rf_tov, draftClassNames):
    print(i, j)

In [None]:
knn_tov = knn.predict(draftClassTest)

for i, j in zip(knn_tov, draftClassNames):
    print(i, j)

In [None]:
avgTov = 1.555639

plt.style.use('fivethirtyeight')
linRegTov, ax = plt.subplots()

linearPlot = []

for i in linear_tov:
    linearPlot.append(float(i))
    
combinedLinear = [[i, j] for i, j in zip(draftClassNames, linearPlot)]

sortedLinear = sorted(combinedLinear, key = itemgetter(1), reverse = True)
print(sortedLinear)

sortedLinearData = [row[1] for row in sortedLinear]
x_pos = np.arange(len(sortedLinearData))

colorList = []

for i in sortedLinearData:
    if i > avgTov:
        colorList.append('lightcoral')
    elif i < avgTov:
        colorList.append('lightgreen')

ax.bar(x_pos, sortedLinearData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedLinear]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .15
    ax.text(rect.get_x() + rect.get_width() / 1.75, height, label,
    ha='center', va='bottom', rotation = 'vertical', color = 'black')
    
linRegTov.suptitle("Linear regression predicted TOV/G", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.axhline(y = avgTov, color = 'black', label = "Average of NBA sample", linestyle = 'dotted', alpha = .25)
ax.set_ylabel("Predicted TOV")
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})

linRegTov.savefig('lin-reg-tov.png', dpi = 400, bbox_inches = 'tight')

In [None]:
avgTov = 1.555639

plt.style.use('fivethirtyeight')
svrTov, ax = plt.subplots()

svrPlot = []

for i in svr_tov:
    svrPlot.append(float(i))
    
combinedSvr = [[i, j] for i, j in zip(draftClassNames, svrPlot)]

sortedSvr = sorted(combinedSvr, key = itemgetter(1), reverse = True)
print(sortedSvr)

sortedSvrData = [row[1] for row in sortedSvr]
x_pos = np.arange(len(sortedSvrData))

colorList = []

for i in sortedSvrData:
    if i > avgTov:
        colorList.append('lightcoral')
    elif i < avgTov:
        colorList.append('lightgreen')

ax.bar(x_pos, sortedSvrData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedSvr]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .15
    ax.text(rect.get_x() + rect.get_width() / 1.75, height, label,
    ha='center', va='bottom', rotation = 'vertical', color = 'black')

svrTov.suptitle("SVR predicted TOV/G", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.axhline(y = avgTov, color = 'black', label = "Average of NBA sample", linestyle = 'dotted', alpha = .25)
ax.set_ylabel("Predicted TOV")
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})

svrTov.savefig('svr-tov.png', dpi = 400, bbox_inches = 'tight')

In [None]:
avgTov = 1.555639

plt.style.use('fivethirtyeight')
rfTov, ax = plt.subplots()

rfPlot = []

for i in rf_tov:
    rfPlot.append(float(i))
    
combinedRf = [[i, j] for i, j in zip(draftClassNames, rfPlot)]

sortedRf = sorted(combinedRf, key = itemgetter(1), reverse = True)
print(sortedRf)

sortedRfData = [row[1] for row in sortedRf]
x_pos = np.arange(len(sortedRfData))

colorList = []

for i in sortedRfData:
    if i > avgTov:
        colorList.append('lightcoral')
    elif i < avgTov:
        colorList.append('lightgreen')

ax.bar(x_pos, sortedRfData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedRf]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .15 
    ax.text(rect.get_x() + rect.get_width() / 1.75, height, label,
    ha='center', va='bottom', rotation = 'vertical', color = 'black')

rfTov.suptitle("Random forest predicted TOV/G", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.axhline(y = avgTov, color = 'black', label = "Average of NBA sample", linestyle = 'dotted', alpha = .25)
ax.set_ylabel("Predicted TOV")
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})

rfTov.savefig('rf-tov.png', dpi = 400, bbox_inches = 'tight')

In [None]:
avgTov = 1.555639

plt.style.use('fivethirtyeight')
knnTov, ax = plt.subplots()

knnPlot = []

for i in knn_tov:
    knnPlot.append(float(i))
    
combinedKnn = [[i, j] for i, j in zip(draftClassNames, knnPlot)]

sortedKnn = sorted(combinedKnn, key = itemgetter(1), reverse = True)
print(sortedKnn)

sortedKnnData = [row[1] for row in sortedKnn]
x_pos = np.arange(len(sortedKnnData))

colorList = []

for i in sortedKnnData:
    if i > avgTov:
        colorList.append('lightcoral')
    elif i < avgTov:
        colorList.append('lightgreen')

ax.bar(x_pos, sortedKnnData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedKnn]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .15
    ax.text(rect.get_x() + rect.get_width() / 1.75, height, label,
    ha='center', va='bottom', rotation = 'vertical', color = 'black')

knnTov.suptitle("k-NN predicted TOV/G", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.axhline(y = avgTov, color = 'black', label = "Average of NBA sample", linestyle = 'dotted', alpha = .25)
ax.set_ylabel("Predicted TOV")
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})

knnTov.savefig('knn-tov.png', dpi = 400, bbox_inches = 'tight')

In [None]:
avgTov = 1.555639

avgTovGraph, ax = plt.subplots()

averagePred = []

for i, j, h, k in zip(linear_tov, svr_tov, rf_tov, knn_tov):
    averagePred.append(float((i + j + h + k) / 4))

combinedAvg = [[i, j] for i, j in zip(draftClassNames, averagePred)]

sortedAvg = sorted(combinedAvg, key = itemgetter(1), reverse = True)
print(sortedAvg)

sortedAvgData = [row[1] for row in sortedAvg]
x_pos = np.arange(len(sortedAvgData))

colorList = []

for i in sortedAvgData:
    if i > avgTov:
        colorList.append('lightcoral')
    elif i < avgTov:
        colorList.append('lightgreen')

ax.bar(x_pos, sortedAvgData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedAvg]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .15
    ax.text(rect.get_x() + rect.get_width() / 1.75, height, label,
    ha='center', va='bottom', rotation = 'vertical', color = 'black')

avgTovGraph.suptitle("4-model average predicted TOV/G", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.axhline(y = avgTov, color = 'black', label = "Average of NBA sample", linestyle = 'dotted', alpha = .25)
ax.set_ylabel("Predicted TOV")
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})

avgTovGraph.savefig('avg-tov.png', dpi = 400, bbox_inches = 'tight')

# Predicted assists / predicted turnovers

In [None]:
avgAstTov = 1.872334

avgAstTovGraph, ax = plt.subplots()

avgAst =[]

for i, j, h, k in zip(linear_ast, svr_ast, rf_ast, knn_ast):
    avgAst.append(float((i + j + h + k) / 4))
    
avgTov = []

for i, j, h, k in zip(linear_tov, svr_tov, rf_tov, knn_tov):
    avgTov.append(float((i + j + h + k) / 4))

    
avgAstTovDC = []

for i, j in zip(avgAst, avgTov):
    avgAstTovDC.append(i / j)
    
combinedAvg = [[i, j] for i, j in zip(draftClassNames, avgAstTovDC)]

sortedAvg = sorted(combinedAvg, key = itemgetter(1), reverse = True)
print(sortedAvg)

sortedAvgData = [row[1] for row in sortedAvg]
x_pos = np.arange(len(sortedAvgData))

colorList = []

for i in sortedAvgData:
    if i < avgAstTov:
        colorList.append('lightcoral')
    elif i > avgAstTov:
        colorList.append('lightgreen')

ax.bar(x_pos, sortedAvgData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedAvg]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .15
    ax.text(rect.get_x() + rect.get_width() / 1.75, height, label,
    ha='center', va='bottom', rotation = 'vertical', color = 'black')

avgAstTovGraph.suptitle("4-model average predicted AST/TOV", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.axhline(y = avgAstTov, color = 'black', label = "Average of NBA sample", linestyle = 'dotted', alpha = .25)
ax.set_ylabel("Pred. AST / Pred. TOV")
ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})

avgAstTovGraph.savefig('avg-ast-tov.png', dpi = 400, bbox_inches = 'tight')