In [None]:
# Import necessary packages

%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import linregress
from scipy.stats import norm
from operator import itemgetter
from sklearn.model_selection import cross_val_score

In [None]:
# Import the datasets

dfAllPlayers = pd.read_csv('all-players-final-data.csv')
dfGuards = pd.read_csv('guards-final-data.csv')
dfForwards = pd.read_csv('forwards-final-data.csv')
dfCenters = pd.read_csv('centers-final-data.csv')

dfDCallPlayers = pd.read_csv('all-draft-class-final-data.csv')
dfDCguards = pd.read_csv('draft-class-guards-final-data.csv')
dfDCforwards = pd.read_csv('draft-class-forwards-final-data.csv')
dfDCcenters = pd.read_csv('draft-class-centers-final-data.csv')

In [None]:
# Preview the all players dataset

dfAllPlayers.head()

In [None]:
# Preview one of the positional datasets

dfGuards.head()

In [None]:
# Preview one of the draft class datasets

dfDCguards.head()

In [None]:
# Prepare draft class dataframes to be plugged into machine learning models by skipping columns that aren't parameters in the model

guardsTest = dfDCguards.iloc[:, [2, 3, 7, 8, 9, 10, 11, 12, 14, 15]]
forwardsTest = dfDCforwards.iloc[:, [2, 3, 7, 8, 9, 10, 11, 12, 14, 15]]
centersTest = dfDCcenters.iloc[:, [2, 3, 7, 8, 9, 10, 11, 12, 14, 15]]

guardsNames = dfDCguards.iloc[:, 0]
forwardsNames = dfDCforwards.iloc[:, 0]
centersNames = dfDCcenters.iloc[:, 0]

guardsTest.head()

# Correlation between college and NBA DBPM 

In [None]:
# Let's look at the basic correlation between college DBPM and NBA DBPM for all positions

plt.style.use('fivethirtyeight')
 
dbpmAllPlayers, ax = plt.subplots()

ax.scatter(dfAllPlayers['DBPM-Col'], dfAllPlayers['DBPM-NBA'], color = 'orange', label = "All positions")
ax.axvline(x = np.mean(dfAllPlayers['DBPM-Col']), color = 'black')
ax.axhline(y = np.mean(dfAllPlayers['DBPM-NBA']), label = "Average", color = 'black')
dbpmAllPlayers.suptitle("Correlation between college and NBA DBPM", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("College DBPM")
ax.set_ylabel("NBA DBPM")
 
ax.plot(np.unique(dfAllPlayers['DBPM-Col']), np.poly1d(np.polyfit(dfAllPlayers['DBPM-Col'], dfAllPlayers['DBPM-NBA'], 1))(np.unique(dfAllPlayers['DBPM-Col'])))

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
 
slope, intercept, r_value, p_value, std_err = linregress(dfAllPlayers['DBPM-Col'], dfAllPlayers['DBPM-NBA'])
print("College DBPM and NBA DBPM: slope =", slope, ", intercept =", intercept, ", r_value =", r_value,
    ", p_value =", p_value, ", std_err =", std_err)
rsqaured = r_value ** 2
rpString = "r = " + str(round(r_value, 3)) + ", p = " + str(round(p_value, 3)) + ", rsquared = " + str(round(rsqaured, 3))

ax.set_title("http://dribbleanalytics.blogspot.com.\n%s"%rpString, size = 12, fontname = 'Rockwell')
dbpmAllPlayers.savefig('dbpm-all-players.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Like with DWS, the regression looks pretty good. Let's look at it for each position.

plt.style.use('fivethirtyeight')
 
dbpmGuards, ax = plt.subplots()

ax.scatter(dfGuards['DBPM-Col'], dfGuards['DBPM-NBA'], color = 'orange', label = "Guards")
ax.axvline(x = np.mean(dfGuards['DBPM-Col']), color = 'black')
ax.axhline(y = np.mean(dfGuards['DBPM-NBA']), label = "Average", color = 'black')
dbpmGuards.suptitle("Correlation between college and NBA DBPM", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("College DBPM")
ax.set_ylabel("NBA DBPM")
 
ax.plot(np.unique(dfGuards['DBPM-Col']), np.poly1d(np.polyfit(dfGuards['DBPM-Col'], dfGuards['DBPM-NBA'], 1))(np.unique(dfGuards['DBPM-Col'])))

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
 
slope, intercept, r_value, p_value, std_err = linregress(dfGuards['DBPM-Col'], dfGuards['DBPM-NBA'])
print("College DBPM and NBA DBPM for guards: slope =", slope, ", intercept =", intercept, ", r_value =", r_value,
    ", p_value =", p_value, ", std_err =", std_err)
rsqaured = r_value ** 2
rpString = "r = " + str(round(r_value, 3)) + ", p = " + str(round(p_value, 3)) + ", rsquared = " + str(round(rsqaured, 3))

ax.set_title("http://dribbleanalytics.blogspot.com.\n%s"%rpString, size = 12, fontname = 'Rockwell')
dbpmGuards.savefig('dbpm-guards.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')
 
dbpmForwards, ax = plt.subplots()

ax.scatter(dfForwards['DBPM-Col'], dfForwards['DBPM-NBA'], color = 'orange', label = "Forwards")
ax.axvline(x = np.mean(dfForwards['DBPM-Col']), color = 'black')
ax.axhline(y = np.mean(dfForwards['DBPM-NBA']), label = "Average", color = 'black')
dbpmForwards.suptitle("Correlation between college and NBA DBPM", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("College DBPM")
ax.set_ylabel("NBA DBPM")
 
ax.plot(np.unique(dfForwards['DBPM-Col']), np.poly1d(np.polyfit(dfForwards['DBPM-Col'], dfForwards['DBPM-NBA'], 1))(np.unique(dfForwards['DBPM-Col'])))

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
 
slope, intercept, r_value, p_value, std_err = linregress(dfForwards['DBPM-Col'], dfForwards['DBPM-NBA'])
print("College DBPM and NBA DBPM for forwards: slope =", slope, ", intercept =", intercept, ", r_value =", r_value,
    ", p_value =", p_value, ", std_err =", std_err)
rsqaured = r_value ** 2
rpString = "r = " + str(round(r_value, 3)) + ", p = " + str(round(p_value, 3)) + ", rsquared = " + str(round(rsqaured, 3))

ax.set_title("http://dribbleanalytics.blogspot.com.\n%s"%rpString, size = 12, fontname = 'Rockwell')
dbpmForwards.savefig('dbpm-forwards.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')
 
dbpmCenters, ax = plt.subplots()

ax.scatter(dfCenters['DBPM-Col'], dfCenters['DBPM-NBA'], color = 'orange', label = "Centers")
ax.axvline(x = np.mean(dfCenters['DBPM-Col']), color = 'black')
ax.axhline(y = np.mean(dfCenters['DBPM-NBA']), label = "Average", color = 'black')
dbpmCenters.suptitle("Correlation between college and NBA DBPM", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("College DBPM")
ax.set_ylabel("NBA DBPM")
 
ax.plot(np.unique(dfCenters['DBPM-Col']), np.poly1d(np.polyfit(dfCenters['DBPM-Col'], dfCenters['DBPM-NBA'], 1))(np.unique(dfCenters['DBPM-Col'])))

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
 
slope, intercept, r_value, p_value, std_err = linregress(dfCenters['DBPM-Col'], dfCenters['DBPM-NBA'])
print("College DBPM and NBA DBPM for centers: slope =", slope, ", intercept =", intercept, ", r_value =", r_value,
    ", p_value =", p_value, ", std_err =", std_err)
rsqaured = r_value ** 2
rpString = "r = " + str(round(r_value, 3)) + ", p = " + str(round(p_value, 3)) + ", rsquared = " + str(round(rsqaured, 3))

ax.set_title("http://dribbleanalytics.blogspot.com.\n%s"%rpString, size = 12, fontname = 'Rockwell')
dbpmCenters.savefig('dbpm-centers.png', dpi = 400, bbox_inches = 'tight')

# Histograms of steals and blocks

In [None]:
plt.style.use('fivethirtyeight')
stlHistCollege, ax = plt.subplots()

ax.hist(dfAllPlayers['STL-Col'], bins = 16, edgecolor = 'white', linewidth = 3, normed = True, label = "Actual distribution")
stlHistCollege.suptitle("Histogram of sample's college STL/G", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("STL/G")
ax.set_ylabel("Frequency")

overall_mean = dfAllPlayers['STL-Col'].mean()
overall_std = dfAllPlayers['STL-Col'].std()

xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, overall_mean, overall_std)
ax.plot(x, p, 'k', linewidth=5, color='orange', label = "Normal distribution")

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("http://dribbleanalytics.blogspot.com.\nmean = %.3f, std = %.3f"%(overall_mean, overall_std), size = 12, fontname = 'Rockwell')

stlHistCollege.savefig('stl-hist-college.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')
stlHistDC, ax = plt.subplots()

ax.hist(dfDCallPlayers['STL'], bins = 12, edgecolor = 'white', linewidth = 3, normed = True, label = "Actual distribution")
stlHistDC.suptitle("Histogram of draft class's STL/G", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("STL/G")
ax.set_ylabel("Frequency")

overall_mean = dfDCallPlayers['STL'].mean()
overall_std = dfDCallPlayers['STL'].std()

xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, overall_mean, overall_std)
ax.plot(x, p, 'k', linewidth=5, color='orange', label = "Normal distribution")

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("http://dribbleanalytics.blogspot.com.\nmean = %.3f, std = %.3f"%(overall_mean, overall_std), size = 12, fontname = 'Rockwell')

stlHistDC.savefig('stl-hist-dc.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')
blkHistCollege, ax = plt.subplots()

ax.hist(dfAllPlayers['BLK-Col'], bins = 16, edgecolor = 'white', linewidth = 3, normed = True, label = "Actual distribution")
blkHistCollege.suptitle("Histogram of sample's college BLK/G", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("BLK/G")
ax.set_ylabel("Frequency")

overall_mean = dfAllPlayers['BLK-Col'].mean()
overall_std = dfAllPlayers['BLK-Col'].std()

xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, overall_mean, overall_std)
ax.plot(x, p, 'k', linewidth=5, color='orange', label = "Normal distribution")

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("http://dribbleanalytics.blogspot.com.\nmean = %.3f, std = %.3f"%(overall_mean, overall_std), size = 12, fontname = 'Rockwell')

blkHistCollege.savefig('blk-hist-college.png', dpi = 400, bbox_inches = 'tight')

In [None]:
plt.style.use('fivethirtyeight')
blkHistDC, ax = plt.subplots()

ax.hist(dfDCallPlayers['BLK'], bins = 12, edgecolor = 'white', linewidth = 3, normed = True, label = "Actual distribution")
blkHistDC.suptitle("Histogram of draft class's BLK/G", weight = 'bold', size = 18, y = 1.05)
ax.set_xlabel("BLK/G")
ax.set_ylabel("Frequency")

overall_mean = dfDCallPlayers['BLK'].mean()
overall_std = dfDCallPlayers['BLK'].std()

xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, overall_mean, overall_std)
ax.plot(x, p, 'k', linewidth=5, color='orange', label = "Normal distribution")

ax.legend(loc='best', prop={'size': 9, "family": "Rockwell"})
ax.set_title("http://dribbleanalytics.blogspot.com.\nmean = %.3f, std = %.3f"%(overall_mean, overall_std), size = 12, fontname = 'Rockwell')

blkHistDC.savefig('blk-hist-dc.png', dpi = 400, bbox_inches = 'tight')

# Create machine learning model for guards

In [None]:
train, test = train_test_split(dfGuards, test_size = 0.2, random_state = 99)

xtrain = train[['Wingspan (in)', 'Height (in)', 'STL-Col', 'BLK-Col', 'PF-Col', 'STL%-Col', 'BLK%-Col', 'SOS-Col', 'DWS/40-Col', 'DBPM-Col']]
ytrain = train[['DBPM-NBA']]
 
xtest = test[['Wingspan (in)', 'Height (in)', 'STL-Col', 'BLK-Col', 'PF-Col', 'STL%-Col', 'BLK%-Col', 'SOS-Col', 'DWS/40-Col', 'DBPM-Col']]
ytest = test[['DBPM-NBA']]

In [None]:
linReg = linear_model.LinearRegression(fit_intercept = False)
linReg.fit(xtrain, ytrain)

y_predLin = linReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predLin))
print('Variance score: %.3f' % r2_score(ytest, y_predLin))

cvScoreLin = cross_val_score(linReg, xtest, ytest, cv = 4, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreLin.mean(), cvScoreLin.std() * 2))

In [None]:
ridgeReg = linear_model.Ridge(alpha = 5, fit_intercept = False)
ridgeReg.fit(xtrain, ytrain)

y_predRidge = ridgeReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predRidge))
print('Variance score: %.3f' % r2_score(ytest, y_predRidge))

cvScoreRidge = cross_val_score(ridgeReg, xtest, ytest, cv = 4, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreRidge.mean(), cvScoreRidge.std() * 2))

In [None]:
lassoReg = linear_model.Lasso(alpha = .1, max_iter = 100000, fit_intercept = False)
lassoReg.fit(xtrain, ytrain)

y_predLasso = lassoReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predLasso))
print('Variance score: %.3f' % r2_score(ytest, y_predLasso))

cvScoreLasso = cross_val_score(lassoReg, xtest, ytest, cv = 4, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreLasso.mean(), cvScoreLasso.std() * 2))

In [None]:
plt.style.use('fivethirtyeight')
mseGuards, ax = plt.subplots()

mseScores = [mean_squared_error(ytest, y_predLin), mean_squared_error(ytest, y_predRidge), mean_squared_error(ytest, y_predLasso)]
x_pos = np.arange(len(mseScores))

ax.bar(x_pos, mseScores, edgecolor = 'white', linewidth = 3)

mseNames = ["Linear regression", "Ridge regression", "Lasso regression"]

labels = [i for i in mseNames]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .025
    ax.text(rect.get_x() + rect.get_width() / 1.8, height, label,
            ha='center', va='bottom', rotation = 'vertical', color = 'white', size = 16)

mseGuards.suptitle("Mean squared error (MSE) of regressions", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.set_ylabel("MSE (lower is better)")

mseGuards.savefig('mse-guards.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Let's graph the variance score of all three models

r2guards, ax = plt.subplots()

r2scores = [r2_score(ytest, y_predLin), r2_score(ytest, y_predRidge), r2_score(ytest, y_predLasso)]
x_pos = np.arange(len(r2scores))

ax.bar(x_pos, r2scores, edgecolor = 'white', linewidth = 3)

r2names = ["Linear regression", "Ridge regression", "Lasso regression"]

labels = [i for i in r2names]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .025
    ax.text(rect.get_x() + rect.get_width() / 1.8, height, label,
            ha='center', va='bottom', rotation = 'vertical', color = 'white', size = 16)

r2guards.suptitle("Variance score of regressions", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.set_ylabel(r"R$^{\rm 2}$ (higher is better)")

r2guards.savefig('r2-guards.png', dpi = 400, bbox_inches = 'tight')

# Predict defense for guards using the models

In [None]:
linear_guards = linReg.predict(guardsTest)

for i, j in zip(linear_guards, guardsNames):
    print(i, j)

In [None]:
ridge_guards = ridgeReg.predict(guardsTest)

for i, j in zip(ridge_guards, guardsNames):
    print(i, j)

In [None]:
lasso_guards = lassoReg.predict(guardsTest)

for i, j in zip(lasso_guards, guardsNames):
    print(i, j)

In [None]:
# Let's plot the linear regression predictions

plt.style.use('fivethirtyeight')
linRegGuards, ax = plt.subplots()

linearPlot = []

for i in linear_guards:
    linearPlot.append(float(i))
    
combinedLinear = [[i, j] for i, j in zip(guardsNames, linearPlot)]

sortedLinear = sorted(combinedLinear, key = itemgetter(1))
print(sortedLinear)

sortedLinearData = [row[1] for row in sortedLinear]
y_pos = np.arange(len(sortedLinearData))

colorList = []

for i in sortedLinearData:
    if i < 0:
        colorList.append('lightcoral')
    elif i > 0:
        colorList.append('lightgreen')

ax.barh(y_pos, sortedLinearData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedLinear]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = rect.get_y()
    if rect.get_y() < 11:
        ax.text(rect.get_x() + .05, height, label,
        ha='left', va='bottom', color = 'black')
    elif rect.get_y() >= 11:
        ax.text(rect.get_x() - .05, height, label,
        ha='right', va='bottom', color = 'black')

linRegGuards.suptitle("Linear regression predicted guards DBPM", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.yaxis.set_visible(False)
ax.set_xlabel("Predicted DBPM")
ax.grid(alpha = .5)

linRegGuards.savefig('lin-reg-guards.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Let's plot the ridge regression predictions

plt.style.use('fivethirtyeight')
ridgeRegGuards, ax = plt.subplots()

ridgePlot = []

for i in ridge_guards:
    ridgePlot.append(float(i))
    
combinedRidge = [[i, j] for i, j in zip(guardsNames, ridgePlot)]

sortedRidge = sorted(combinedRidge, key = itemgetter(1))
print(sortedRidge)

sortedRidgeData = [row[1] for row in sortedRidge]
y_pos = np.arange(len(sortedRidgeData))

colorList = []

for i in sortedRidgeData:
    if i < 0:
        colorList.append('lightcoral')
    elif i > 0:
        colorList.append('lightgreen')

ax.barh(y_pos, sortedRidgeData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedRidge]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = rect.get_y()
    if rect.get_y() < 13:
        ax.text(rect.get_x() + .05, height, label,
        ha='left', va='bottom', color = 'black')
    elif rect.get_y() >= 13:
        ax.text(rect.get_x() - .05, height, label,
        ha='right', va='bottom', color = 'black')
        
ridgeRegGuards.suptitle("Ridge regression predicted guards DBPM", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.yaxis.set_visible(False)
ax.set_xlabel("Predicted DBPM")
ax.grid(alpha = .5)

ridgeRegGuards.savefig('ridge-reg-guards.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Let's plot the lasso regression predictions

plt.style.use('fivethirtyeight')
lassoRegGuards, ax = plt.subplots()

lassoPlot = []

for i in lasso_guards:
    lassoPlot.append(float(i))
    
combinedLasso = [[i, j] for i, j in zip(guardsNames, lassoPlot)]

sortedLasso = sorted(combinedLasso, key = itemgetter(1))
print(sortedLasso)

sortedLassoData = [row[1] for row in sortedLasso]
y_pos = np.arange(len(sortedLassoData))

colorList = []

for i in sortedLassoData:
    if i < 0:
        colorList.append('lightcoral')
    elif i > 0:
        colorList.append('lightgreen')
        
ax.barh(y_pos, sortedLassoData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedLasso]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = rect.get_y()
    if rect.get_y() < 13:
        ax.text(rect.get_x() + .05, height, label,
        ha='left', va='bottom', color = 'black')
    elif rect.get_y() >= 13:
        ax.text(rect.get_x() - .05, height, label,
        ha='right', va='bottom', color = 'black')

lassoRegGuards.suptitle("Lasso regression predicted guards DBPM", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.yaxis.set_visible(False)
ax.set_xlabel("Predicted DBPM")
ax.grid(alpha = .5)

lassoRegGuards.savefig('lasso-reg-guards.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Let's see who has the highest average DBPM among the three models

avgGuards, ax = plt.subplots()

averagePred = []

for i, j, h in zip(linear_guards, ridge_guards, lasso_guards):
    averagePred.append(float((i + j + h) / 3))

combinedAvg = [[i, j] for i, j in zip(guardsNames, averagePred)]

sortedAvg = sorted(combinedAvg, key = itemgetter(1))
print(sortedAvg)

sortedAvgData = [row[1] for row in sortedAvg]
y_pos = np.arange(len(sortedAvgData))

colorList = []

for i in sortedAvgData:
    if i < 0:
        colorList.append('lightcoral')
    elif i > 0:
        colorList.append('lightgreen')

ax.barh(y_pos, sortedAvgData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedAvg]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = rect.get_y()
    if rect.get_y() < 13:
        ax.text(rect.get_x() + .05, height, label,
        ha='left', va='bottom', color = 'black')
    elif rect.get_y() >= 13:
        ax.text(rect.get_x() - .05, height, label,
        ha='right', va='bottom', color = 'black')

avgGuards.suptitle("3-model average predicted guards DBPM", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.yaxis.set_visible(False)
ax.set_xlabel("Predicted DBPM")
ax.grid(alpha = .5)

avgGuards.savefig('avg-guards.png', dpi = 400, bbox_inches = 'tight')

# Create machine learning model for forwards

In [None]:
train, test = train_test_split(dfForwards, test_size = 0.25, random_state = 0)

xtrain = train[['Wingspan (in)', 'Height (in)', 'STL-Col', 'BLK-Col', 'PF-Col', 'STL%-Col', 'BLK%-Col', 'SOS-Col', 'DWS/40-Col', 'DBPM-Col']]
ytrain = train[['DBPM-NBA']]
 
xtest = test[['Wingspan (in)', 'Height (in)', 'STL-Col', 'BLK-Col', 'PF-Col', 'STL%-Col', 'BLK%-Col', 'SOS-Col', 'DWS/40-Col', 'DBPM-Col']]
ytest = test[['DBPM-NBA']]

In [None]:
linReg = linear_model.LinearRegression(fit_intercept = False)
linReg.fit(xtrain, ytrain)

y_predLin = linReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predLin))
print('Variance score: %.3f' % r2_score(ytest, y_predLin))

cvScoreLin = cross_val_score(linReg, xtest, ytest, cv = 4, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreLin.mean(), cvScoreLin.std() * 2))

In [None]:
ridgeReg = linear_model.Ridge(alpha = .01, fit_intercept = False)
ridgeReg.fit(xtrain, ytrain)

y_predRidge = ridgeReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predRidge))
print('Variance score: %.3f' % r2_score(ytest, y_predRidge))

cvScoreRidge = cross_val_score(ridgeReg, xtest, ytest, cv = 4, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreRidge.mean(), cvScoreRidge.std() * 2))

In [None]:
lassoReg = linear_model.Lasso(alpha = .0001, max_iter = 10000000, fit_intercept = False)
lassoReg.fit(xtrain, ytrain)

y_predLasso = lassoReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predLasso))
print('Variance score: %.3f' % r2_score(ytest, y_predLasso))

cvScoreLasso = cross_val_score(lassoReg, xtest, ytest, cv = 4, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreLasso.mean(), cvScoreLasso.std() * 2))

# Let's split it up into SF and PF. Create model machine learning model for SF

In [None]:
dfSF = pd.read_csv('sf-final-data.csv')

train, test = train_test_split(dfSF, test_size = 0.25, random_state = 0)

xtrain = train[['Wingspan (in)', 'Height (in)', 'STL-Col', 'BLK-Col', 'PF-Col', 'STL%-Col', 'BLK%-Col', 'SOS-Col', 'DWS/40-Col', 'DBPM-Col']]
ytrain = train[['DBPM-NBA']]
 
xtest = test[['Wingspan (in)', 'Height (in)', 'STL-Col', 'BLK-Col', 'PF-Col', 'STL%-Col', 'BLK%-Col', 'SOS-Col', 'DWS/40-Col', 'DBPM-Col']]
ytest = test[['DBPM-NBA']]

In [None]:
linReg = linear_model.LinearRegression(fit_intercept = False)
linReg.fit(xtrain, ytrain)

y_predLin = linReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predLin))
print('Variance score: %.3f' % r2_score(ytest, y_predLin))

cvScoreLin = cross_val_score(linReg, xtest, ytest, cv = 4, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreLin.mean(), cvScoreLin.std() * 2))

In [None]:
ridgeReg = linear_model.Ridge(alpha = 5, fit_intercept = False)
ridgeReg.fit(xtrain, ytrain)

y_predRidge = ridgeReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predRidge))
print('Variance score: %.3f' % r2_score(ytest, y_predRidge))

cvScoreRidge = cross_val_score(ridgeReg, xtest, ytest, cv = 4, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreRidge.mean(), cvScoreRidge.std() * 2))

In [None]:
lassoReg = linear_model.Lasso(alpha = .15, max_iter = 100000, fit_intercept = False)
lassoReg.fit(xtrain, ytrain)

y_predLasso = lassoReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predLasso))
print('Variance score: %.3f' % r2_score(ytest, y_predLasso))

cvScoreLasso = cross_val_score(lassoReg, xtest, ytest, cv = 4, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreLasso.mean(), cvScoreLasso.std() * 2))

In [None]:
plt.style.use('fivethirtyeight')
mseSF, ax = plt.subplots()

mseScores = [mean_squared_error(ytest, y_predLin), mean_squared_error(ytest, y_predRidge), mean_squared_error(ytest, y_predLasso)]
x_pos = np.arange(len(mseScores))

ax.bar(x_pos, mseScores, edgecolor = 'white', linewidth = 3)

mseNames = ["Linear regression", "Ridge\nregression", "Lasso\nregression"]

labels = [i for i in mseNames]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .045
    ax.text(rect.get_x() + rect.get_width() / 1.8, height, label,
            ha='center', va='bottom', rotation = 'vertical', color = 'white', size = 16)

mseSF.suptitle("Mean squared error (MSE) of regressions", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.set_ylabel("MSE (lower is better)")

mseSF.savefig('mse-sf.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Let's graph the variance score of all three models

r2sf, ax = plt.subplots()

r2scores = [r2_score(ytest, y_predLin), r2_score(ytest, y_predRidge), r2_score(ytest, y_predLasso)]
x_pos = np.arange(len(r2scores))

ax.bar(x_pos, r2scores, edgecolor = 'white', linewidth = 3)

r2names = ["Linear regression", "Ridge regression", "Lasso regression"]

labels = [i for i in r2names]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .025
    ax.text(rect.get_x() + rect.get_width() / 1.8, height, label,
            ha='center', va='bottom', rotation = 'vertical', color = 'white', size = 16)

r2sf.suptitle("Variance score of regressions", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.set_ylabel(r"R$^{\rm 2}$ (higher is better)")

r2sf.savefig('r2-sf.png', dpi = 400, bbox_inches = 'tight')

# Create model machine learning model for PF

In [None]:
dfPF = pd.read_csv('pf-final-data.csv')

train, test = train_test_split(dfPF, test_size = 0.25, random_state = 1)

xtrain = train[['Wingspan (in)', 'Height (in)', 'STL-Col', 'BLK-Col', 'PF-Col', 'STL%-Col', 'BLK%-Col', 'SOS-Col', 'DWS/40-Col', 'DBPM-Col']]
ytrain = train[['DBPM-NBA']]
 
xtest = test[['Wingspan (in)', 'Height (in)', 'STL-Col', 'BLK-Col', 'PF-Col', 'STL%-Col', 'BLK%-Col', 'SOS-Col', 'DWS/40-Col', 'DBPM-Col']]
ytest = test[['DBPM-NBA']]

In [None]:
linReg = linear_model.LinearRegression(fit_intercept = True)
linReg.fit(xtrain, ytrain)

y_predLin = linReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predLin))
print('Variance score: %.3f' % r2_score(ytest, y_predLin))

cvScoreLin = cross_val_score(linReg, xtest, ytest, cv = 4, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreLin.mean(), cvScoreLin.std() * 2))

In [None]:
ridgeReg = linear_model.Ridge(alpha = 5, fit_intercept = True)
ridgeReg.fit(xtrain, ytrain)

y_predRidge = ridgeReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predRidge))
print('Variance score: %.3f' % r2_score(ytest, y_predRidge))

cvScoreRidge = cross_val_score(ridgeReg, xtest, ytest, cv = 4, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreRidge.mean(), cvScoreRidge.std() * 2))

In [None]:
lassoReg = linear_model.Lasso(alpha = .15, max_iter = 100000, fit_intercept = True)
lassoReg.fit(xtrain, ytrain)

y_predLasso = lassoReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predLasso))
print('Variance score: %.3f' % r2_score(ytest, y_predLasso))

cvScoreLasso = cross_val_score(lassoReg, xtest, ytest, cv = 4, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreLasso.mean(), cvScoreLasso.std() * 2))

# The PF model is really bad. Let's make predictions for forwards using the SF model.

In [None]:
linear_sf = linReg.predict(forwardsTest)

for i, j in zip(linear_sf, forwardsNames):
    print(i, j)

In [None]:
ridge_sf = ridgeReg.predict(forwardsTest)

for i, j in zip(ridge_sf, forwardsNames):
    print(i, j)

In [None]:
lasso_sf = lassoReg.predict(forwardsTest)

for i, j in zip(lasso_sf, forwardsNames):
    print(i, j)

In [None]:
# Let's plot the linear regression predictions

plt.style.use('fivethirtyeight')
linRegSF, ax = plt.subplots()

linearPlot = []

for i in linear_sf:
    linearPlot.append(float(i))
    
combinedLinear = [[i, j] for i, j in zip(forwardsNames, linearPlot)]

sortedLinear = sorted(combinedLinear, key = itemgetter(1))
print(sortedLinear)

sortedLinearData = [row[1] for row in sortedLinear]
y_pos = np.arange(len(sortedLinearData))

colorList = []

for i in sortedLinearData:
    if i < 0:
        colorList.append('lightcoral')
    elif i > 0:
        colorList.append('lightgreen')

ax.barh(y_pos, sortedLinearData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedLinear]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = rect.get_y()
    if rect.get_y() < 3:
        ax.text(rect.get_x() + .05, height, label,
        ha='left', va='bottom', color = 'black')
    elif rect.get_y() >= 3:
        ax.text(rect.get_x() - .05, height, label,
        ha='right', va='bottom', color = 'black')

linRegSF.suptitle("Linear regression predicted SF DBPM", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.yaxis.set_visible(False)
ax.set_xlabel("Predicted DBPM")
ax.grid(alpha = .5)

linRegSF.savefig('lin-reg-sf.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Let's plot the ridge regression predictions

plt.style.use('fivethirtyeight')
ridgeRegSF, ax = plt.subplots()

ridgePlot = []

for i in ridge_sf:
    ridgePlot.append(float(i))
    
combinedRidge = [[i, j] for i, j in zip(forwardsNames, ridgePlot)]

sortedRidge = sorted(combinedRidge, key = itemgetter(1))
print(sortedRidge)

sortedRidgeData = [row[1] for row in sortedRidge]
y_pos = np.arange(len(sortedRidgeData))

colorList = []

for i in sortedRidgeData:
    if i < 0:
        colorList.append('lightcoral')
    elif i > 0:
        colorList.append('lightgreen')

ax.barh(y_pos, sortedRidgeData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedRidge]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = rect.get_y()
    if rect.get_y() < 3:
        ax.text(rect.get_x() + .05, height, label,
        ha='left', va='bottom', color = 'black')
    elif rect.get_y() >= 3:
        ax.text(rect.get_x() - .05, height, label,
        ha='right', va='bottom', color = 'black')
        
ridgeRegSF.suptitle("Ridge regression predicted SF DBPM", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.yaxis.set_visible(False)
ax.set_xlabel("Predicted DBPM")
ax.grid(alpha = .5)

ridgeRegSF.savefig('ridge-reg-sf.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Let's plot the lasso regression predictions

plt.style.use('fivethirtyeight')
lassoRegSF, ax = plt.subplots()

lassoPlot = []

for i in lasso_sf:
    lassoPlot.append(float(i))
    
combinedLasso = [[i, j] for i, j in zip(forwardsNames, lassoPlot)]

sortedLasso = sorted(combinedLasso, key = itemgetter(1))
print(sortedLasso)

sortedLassoData = [row[1] for row in sortedLasso]
y_pos = np.arange(len(sortedLassoData))

colorList = []

for i in sortedLassoData:
    if i < 0:
        colorList.append('lightcoral')
    elif i > 0:
        colorList.append('lightgreen')
        
ax.barh(y_pos, sortedLassoData, color = colorList, edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedLasso]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = rect.get_y()
    if rect.get_y() < 3:
        ax.text(rect.get_x() + .05, height, label,
        ha='left', va='bottom', color = 'black')
    elif rect.get_y() >= 3:
        ax.text(rect.get_x() - .05, height, label,
        ha='right', va='bottom', color = 'black')

lassoRegSF.suptitle("Lasso regression predicted SF DBPM", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.yaxis.set_visible(False)
ax.set_xlabel("Predicted DBPM")
ax.grid(alpha = .5)

lassoRegSF.savefig('lasso-reg-sf.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Let's see who has the highest average DBPM among the three models

avgSF, ax = plt.subplots()

averagePred = []

for i, j, h in zip(linear_sf, ridge_sf, lasso_sf):
    averagePred.append(float((i + j + h) / 3))

combinedAvg = [[i, j] for i, j in zip(forwardsNames, averagePred)]

sortedAvg = sorted(combinedAvg, key = itemgetter(1))
print(sortedAvg)

sortedAvgData = [row[1] for row in sortedAvg]
y_pos = np.arange(len(sortedAvgData))

colorList = []

for i in sortedAvgData:
    if i < 0:
        colorList.append('lightcoral')
    elif i > 0:
        colorList.append('lightgreen')

ax.barh(y_pos, sortedAvgData, color = colorList, edgecolor = 'white', linewidth = 2.75)

labels = [row[0] for row in sortedAvg]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = rect.get_y()
    if rect.get_y() < 3:
        ax.text(rect.get_x() + .05, height, label,
        ha='left', va='bottom', color = 'black')
    elif rect.get_y() >= 3:
        ax.text(rect.get_x() - .05, height, label,
        ha='right', va='bottom', color = 'black')

avgSF.suptitle("3-model average predicted SF DBPM", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.yaxis.set_visible(False)
ax.set_xlabel("Predicted DBPM")
ax.grid(alpha = .5)

avgSF.savefig('avg-sf.png', dpi = 400, bbox_inches = 'tight')

# Create machine learning model for centers

In [None]:
train, test = train_test_split(dfCenters, test_size = 0.1, random_state = 99)

xtrain = train[['Wingspan (in)', 'Height (in)', 'STL-Col', 'BLK-Col', 'PF-Col', 'STL%-Col', 'BLK%-Col', 'SOS-Col', 'DWS/40-Col', 'DBPM-Col']]
ytrain = train[['DBPM-NBA']]
 
xtest = test[['Wingspan (in)', 'Height (in)', 'STL-Col', 'BLK-Col', 'PF-Col', 'STL%-Col', 'BLK%-Col', 'SOS-Col', 'DWS/40-Col', 'DBPM-Col']]
ytest = test[['DBPM-NBA']]

In [None]:
linReg = linear_model.LinearRegression(fit_intercept = False)
linReg.fit(xtrain, ytrain)

y_predLin = linReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predLin))
print('Variance score: %.3f' % r2_score(ytest, y_predLin))

cvScoreLin = cross_val_score(linReg, xtest, ytest, cv = 2, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreLin.mean(), cvScoreLin.std() * 2))

In [None]:
ridgeReg = linear_model.Ridge(alpha = .25, fit_intercept = False)
ridgeReg.fit(xtrain, ytrain)

y_predRidge = ridgeReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predRidge))
print('Variance score: %.3f' % r2_score(ytest, y_predRidge))

cvScoreRidge = cross_val_score(ridgeReg, xtest, ytest, cv = 2, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreRidge.mean(), cvScoreRidge.std() * 2))

In [None]:
lassoReg = linear_model.Lasso(alpha = .001, max_iter = 100000, fit_intercept = False)
lassoReg.fit(xtrain, ytrain)

y_predLasso = lassoReg.predict(xtest)

print("Mean squared error: %.3f" % mean_squared_error(ytest, y_predLasso))
print('Variance score: %.3f' % r2_score(ytest, y_predLasso))

cvScoreLasso = cross_val_score(lassoReg, xtest, ytest, cv = 2, scoring = 'explained_variance')
print("Accuracy (cross validation score): %0.2f (+/- %0.2f)" % (cvScoreLasso.mean(), cvScoreLasso.std() * 2))

In [None]:
plt.style.use('fivethirtyeight')
mseCenters, ax = plt.subplots()

mseScores = [mean_squared_error(ytest, y_predLin), mean_squared_error(ytest, y_predRidge), mean_squared_error(ytest, y_predLasso)]
x_pos = np.arange(len(mseScores))

ax.bar(x_pos, mseScores, edgecolor = 'white', linewidth = 3)

mseNames = ["Linear regression", "Ridge regression", "Lasso regression"]

labels = [i for i in mseNames]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .025
    ax.text(rect.get_x() + rect.get_width() / 1.8, height, label,
            ha='center', va='bottom', rotation = 'vertical', color = 'white', size = 16)

mseCenters.suptitle("Mean squared error (MSE) of regressions", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.set_ylabel("MSE (lower is better)")

mseCenters.savefig('mse-centers.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Let's graph the variance score of all three models

r2centers, ax = plt.subplots()

r2scores = [r2_score(ytest, y_predLin), r2_score(ytest, y_predRidge), r2_score(ytest, y_predLasso)]
x_pos = np.arange(len(r2scores))

ax.bar(x_pos, r2scores, edgecolor = 'white', linewidth = 3)

r2names = ["Linear regression", "Ridge regression", "Lasso regression"]

labels = [i for i in r2names]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = .025
    ax.text(rect.get_x() + rect.get_width() / 1.8, height, label,
            ha='center', va='bottom', rotation = 'vertical', color = 'white', size = 16)

r2centers.suptitle("Variance score of regressions", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.xaxis.set_visible(False)
ax.set_ylabel(r"R$^{\rm 2}$ (higher is better)")

r2centers.savefig('r2-centers.png', dpi = 400, bbox_inches = 'tight')

# Predict defense for centers using the models 

In [None]:
linear_centers = linReg.predict(centersTest)

for i, j in zip(linear_centers, centersNames):
    print(i, j)

In [None]:
ridge_centers = ridgeReg.predict(centersTest)

for i, j in zip(ridge_centers, centersNames):
    print(i, j)

In [None]:
lasso_centers = lassoReg.predict(centersTest)

for i, j in zip(lasso_centers, centersNames):
    print(i, j)

In [None]:
# Let's plot the linear regression predictions

plt.style.use('fivethirtyeight')
linRegCenters, ax = plt.subplots()

linearPlot = []

for i in linear_centers:
    linearPlot.append(float(i))
    
combinedLinear = [[i, j] for i, j in zip(centersNames, linearPlot)]

sortedLinear = sorted(combinedLinear, key = itemgetter(1))
print(sortedLinear)

sortedLinearData = [row[1] for row in sortedLinear]
y_pos = np.arange(len(sortedLinearData))

ax.barh(y_pos, sortedLinearData, color = 'lightgreen', edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedLinear]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = rect.get_y() + .15
    ax.text(rect.get_x() + .025, height, label,
    ha='left', va='bottom', color = 'black', size = 13.75)


linRegCenters.suptitle("Linear regression predicted centers DBPM", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.yaxis.set_visible(False)
ax.set_xlabel("Predicted DBPM")
ax.grid(alpha = .5)

linRegCenters.savefig('lin-reg-centers.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Let's plot the ridge regression predictions

plt.style.use('fivethirtyeight')
ridgeRegCenters, ax = plt.subplots()

ridgePlot = []

for i in ridge_centers:
    ridgePlot.append(float(i))
    
combinedRidge = [[i, j] for i, j in zip(centersNames, ridgePlot)]

sortedRidge = sorted(combinedRidge, key = itemgetter(1))
print(sortedRidge)

sortedRidgeData = [row[1] for row in sortedRidge]
y_pos = np.arange(len(sortedRidgeData))

ax.barh(y_pos, sortedRidgeData, color = 'lightgreen', edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedRidge]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = rect.get_y() + .15
    ax.text(rect.get_x() + .05, height, label,
    ha='left', va='bottom', color = 'black')
        
ridgeRegCenters.suptitle("Ridge regression predicted centers DBPM", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.yaxis.set_visible(False)
ax.set_xlabel("Predicted DBPM")
ax.grid(alpha = .5)

ridgeRegCenters.savefig('ridge-reg-centers.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Let's plot the lasso regression predictions

plt.style.use('fivethirtyeight')
lassoRegCenters, ax = plt.subplots()

lassoPlot = []

for i in lasso_centers:
    lassoPlot.append(float(i))
    
combinedLasso = [[i, j] for i, j in zip(centersNames, lassoPlot)]

sortedLasso = sorted(combinedLasso, key = itemgetter(1))
print(sortedLasso)

sortedLassoData = [row[1] for row in sortedLasso]
y_pos = np.arange(len(sortedLassoData))
     
ax.barh(y_pos, sortedLassoData, color = 'lightgreen', edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedLasso]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = rect.get_y() + .15
    ax.text(rect.get_x() + .05, height, label,
    ha='left', va='bottom', color = 'black')

lassoRegCenters.suptitle("Lasso regression predicted centers DBPM", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.yaxis.set_visible(False)
ax.set_xlabel("Predicted DBPM")
ax.grid(alpha = .5)

lassoRegCenters.savefig('lasso-reg-centers.png', dpi = 400, bbox_inches = 'tight')

In [None]:
# Let's see who has the highest average DBPM among the three models

avgCenters, ax = plt.subplots()

averagePred = []

for i, j, h in zip(linear_centers, ridge_centers, lasso_centers):
    averagePred.append(float((i + j + h) / 3))

combinedAvg = [[i, j] for i, j in zip(centersNames, averagePred)]

sortedAvg = sorted(combinedAvg, key = itemgetter(1))
print(sortedAvg)

sortedAvgData = [row[1] for row in sortedAvg]
y_pos = np.arange(len(sortedAvgData))

ax.barh(y_pos, sortedAvgData, color = 'lightgreen', edgecolor = 'white', linewidth = 3)

labels = [row[0] for row in sortedAvg]

rects = ax.patches
for rect, label in zip(rects, labels):
    height = rect.get_y() + .15
    ax.text(rect.get_x() + .05, height, label,
    ha='left', va='bottom', color = 'black')

avgCenters.suptitle("3-model average predicted centers DBPM", weight = 'bold', size = 18, y = 1.005)
ax.set_title("http://dribbleanalytics.blogspot.com.", size = 12, fontname = 'Rockwell')
ax.yaxis.set_visible(False)
ax.set_xlabel("Predicted DBPM")
ax.grid(alpha = .5)

avgCenters.savefig('avg-centers.png', dpi = 400, bbox_inches = 'tight')