In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.ensemble import AdaBoostClassifier
import joblib
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

#read in the data
df = pd.read_excel('nfl.xlsx')
x = df[df.columns[:-1]].to_numpy()
y = df[df.columns[-1]].to_numpy()


#split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=16)

from sklearn.model_selection import GridSearchCV

# hyperparameter grid
hp_grid = [{'n_neighbors':[5, 7, 9], 'weights':['uniform', 'distance'], 'algorithm':['auto', 'ball_tree', 'kd_tree', 'brute']}]

# create the model
model = KNeighborsClassifier()

# create the grid object
grid_search = GridSearchCV(model, hp_grid, cv=5, scoring='accuracy', return_train_score=False)


# grid search
grid_search.fit(X_train, y_train)

#store the results into the variable results
results = grid_search.cv_results_

#print out the results
for mean_score, params in zip(results['mean_test_score'], results['params']):
       print(mean_score, params)

#store the best model into the variable the_best
the_best = grid_search.best_estimator_

#fit the best model
classifier = the_best.fit(X_train, y_train)

#store predictions in yhat
y_hat = classifier.predict(X_test)

print("y_hat:", y_hat)
print("y_test:", y_test)
print(accuracy_score(y_hat, y_test))

filename = 'KNN_model.sav'
#joblib.dump(classifier, filename)

0.6103896103896103 {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'uniform'}
0.5926406926406926 {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
0.6294372294372295 {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'uniform'}
0.6116883116883116 {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
0.6116883116883116 {'algorithm': 'auto', 'n_neighbors': 9, 'weights': 'uniform'}
0.6116883116883116 {'algorithm': 'auto', 'n_neighbors': 9, 'weights': 'distance'}
0.6103896103896103 {'algorithm': 'ball_tree', 'n_neighbors': 5, 'weights': 'uniform'}
0.5926406926406926 {'algorithm': 'ball_tree', 'n_neighbors': 5, 'weights': 'distance'}
0.6294372294372295 {'algorithm': 'ball_tree', 'n_neighbors': 7, 'weights': 'uniform'}
0.6116883116883116 {'algorithm': 'ball_tree', 'n_neighbors': 7, 'weights': 'distance'}
0.6116883116883116 {'algorithm': 'ball_tree', 'n_neighbors': 9, 'weights': 'uniform'}
0.6116883116883116 {'algorithm': 'ball_tree', 'n_neighbors': 9, 'weights': 'dis