In [21]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.ensemble import AdaBoostClassifier
import joblib
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

#read in the data
df = pd.read_excel('nfl.xlsx')
x = df[df.columns[:-1]].to_numpy()
y = df[df.columns[-1]].to_numpy()

#split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=10)

from sklearn.model_selection import GridSearchCV

# hyperparameter grid
hp_grid = [{'criterion':["gini", "entropy"], 'min_samples_split':[2, 3, 4, 5], 'max_features':['auto', 'sqrt', 'log2']}]

# create the model
model = DecisionTreeClassifier()

# create the grid object
grid_search = GridSearchCV(model, hp_grid, cv=5, scoring='accuracy', return_train_score=False)


# grid search
grid_search.fit(X_train, y_train)
results = grid_search.cv_results_

#print results
for mean_score, params in zip(results['mean_test_score'], results['params']):
       print(mean_score, params)

#store the best model into the variable the_best
the_best = grid_search.best_estimator_
classifier = the_best.fit(X_train, y_train)

#store predictions in y_hat
y_hat = classifier.predict(X_test)

print("y_hat:", y_hat)
print("y_test:", y_test)
print(accuracy_score(y_hat, y_test))

filename = 'Decison_Tree.sav'
#joblib.dump(classifier, filename)

0.5852813852813853 {'criterion': 'gini', 'max_features': 'auto', 'min_samples_split': 2}
0.6181818181818182 {'criterion': 'gini', 'max_features': 'auto', 'min_samples_split': 3}
0.603030303030303 {'criterion': 'gini', 'max_features': 'auto', 'min_samples_split': 4}
0.5458874458874459 {'criterion': 'gini', 'max_features': 'auto', 'min_samples_split': 5}
0.5567099567099567 {'criterion': 'gini', 'max_features': 'sqrt', 'min_samples_split': 2}
0.5367965367965367 {'criterion': 'gini', 'max_features': 'sqrt', 'min_samples_split': 3}
0.5848484848484847 {'criterion': 'gini', 'max_features': 'sqrt', 'min_samples_split': 4}
0.5835497835497835 {'criterion': 'gini', 'max_features': 'sqrt', 'min_samples_split': 5}
0.6307359307359308 {'criterion': 'gini', 'max_features': 'log2', 'min_samples_split': 2}
0.5740259740259741 {'criterion': 'gini', 'max_features': 'log2', 'min_samples_split': 3}
0.5554112554112554 {'criterion': 'gini', 'max_features': 'log2', 'min_samples_split': 4}
0.6034632034632035 {'c

['Decison_Tree.sav']