In [None]:
# Import necessary packages

%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import neighbors
from sklearn.neural_network import MLPClassifier
import sklearn.metrics as metrics
from sklearn.model_selection import KFold
from operator import itemgetter
from sklearn.model_selection import cross_val_score

In [None]:
# Import the datasets

dfHistorical = pd.read_csv('historical-all-nba.csv')
dfCurrent = pd.read_csv('current-all-nba.csv')

In [None]:
dfHistorical.head()

# Create models

In [None]:
train, test = train_test_split(dfHistorical, test_size = 0.25, random_state = 36)

xtrain = train[['Team Wins', 'Overall Seed', 'PTS', 'TRB', 'AST', 'VORP', 'WS', 'All-Star']]
ytrain = train[['All-NBA']]

xtest = test[['Team Wins', 'Overall Seed', 'PTS', 'TRB', 'AST', 'VORP', 'WS', 'All-Star']]
ytest = test[['All-NBA']]

print("Training set size: %.0f" % len(xtrain))
print("Testing set size: %.0f" % len(xtest))

In [None]:
# Create function that gives accuracy scores for each model

def scores(model):
    
    model.fit(xtrain, ytrain.values.ravel())
    y_pred = model.predict(xtest)
    
    print("Accuracy score: %.3f" % metrics.accuracy_score(ytest, y_pred))
    print("Recall: %.3f" % metrics.recall_score(ytest, y_pred))
    print("Precision: %.3f" % metrics.precision_score(ytest, y_pred))
    print("F1: %.3f" % metrics.f1_score(ytest, y_pred))
    
    proba = model.predict_proba(xtest)
    print("Log loss: %.3f" % metrics.log_loss(ytest, proba))

    pos_prob = proba[:, 1]
    print("Area under ROC curve: %.3f" % metrics.roc_auc_score(ytest, pos_prob))
    
    cv = cross_val_score(model, xtest, ytest.values.ravel(), cv = 3, scoring = 'accuracy')
    print("Accuracy (cross validation score): %0.3f (+/- %0.3f)" % (cv.mean(), cv.std() * 2))
    
    return y_pred

In [None]:
svc = SVC(kernel = 'rbf', gamma = 1e-3, C = 100, probability = True)

y_svc = scores(svc)

In [None]:
rf = RandomForestClassifier(random_state = 999, n_estimators = 100, criterion = 'gini')

y_rf = scores(rf)

In [None]:
knn = neighbors.KNeighborsClassifier(n_neighbors = 12, weights = 'uniform')

y_knn = scores(knn)

In [None]:
dnn = MLPClassifier(solver = 'lbfgs', hidden_layer_sizes = 100, random_state = 999, activation = 'relu')

y_dnn = scores(dnn)

# Prediction

In [None]:
dfCurrentNames = dfCurrent.iloc[:, 0]
dfCurrentPredict = dfCurrent[['Team Wins', 'Overall Seed', 'PTS', 'TRB', 'AST', 'VORP', 'WS', 'All-Star']]

dfCurrent.head()

In [None]:
def make_pred(model):

    proba = model.predict_proba(dfCurrentPredict)
    pos_prob = proba[:, 1]
    
    combined_list = [[i, j] for i, j in zip(dfCurrentNames, pos_prob)]
    combined_list = sorted(combined_list, key = itemgetter(1), reverse = True)
    
    for i in combined_list:
        print(i)
        
    return pos_prob

In [None]:
svc_prob = make_pred(svc)

In [None]:
rf_prob = make_pred(rf)

In [None]:
knn_prob = make_pred(knn)

In [None]:
dnn_prob = make_pred(dnn)

In [None]:
avg_prob = []

for i, j, k, l in zip(svc_prob, rf_prob, knn_prob, dnn_prob):
    avg_prob.append((i + j + k + l) / 4)
    
avg_list = [[i, j] for i, j in zip(dfCurrentNames, avg_prob)]
avg_list = sorted(avg_list, key = itemgetter(1), reverse = True)

for i in avg_list:
    print(i)