# CPSC 322
## Final Project
### Classifier Tests and Evaluation

In [36]:
# some useful mysklearn package import statements and reloads
import importlib

import mysklearn.myutils
importlib.reload(mysklearn.myutils)
import mysklearn.myutils as myutils

# uncomment once you paste your mypytable.py into mysklearn package
import mysklearn.mypytable
importlib.reload(mysklearn.mypytable)
from mysklearn.mypytable import MyPyTable 

# uncomment once you paste your myclassifiers.py into mysklearn package
import mysklearn.myclassifiers
importlib.reload(mysklearn.myclassifiers)
from mysklearn.myclassifiers import MyKNeighborsClassifier, MyDummyClassifier, MyNaiveBayesClassifier, MyDecisionTreeClassifier, MyRandomForestClassifier

import mysklearn.myevaluation
importlib.reload(mysklearn.myevaluation)
import mysklearn.myevaluation as myevaluation


### Creating dataset and classifiers

In [37]:
# Creating data and classifiers 

my_dataset = MyPyTable().load_from_file("input_data/NFL_regseason_data_clean.csv")

X_data = []
for row in my_dataset.data: # Creating X_data
    new_row = []
    new_row.append(row[my_dataset.column_names.index('WinPercentage')])
    new_row.append(row[my_dataset.column_names.index('RushYards')])
    new_row.append(row[my_dataset.column_names.index('PassYards')])
    new_row.append(row[my_dataset.column_names.index('Scoring')])
    new_row.append(row[my_dataset.column_names.index('RushYardsAllowed')])
    new_row.append(row[my_dataset.column_names.index('PassYardsAllowed')])
    new_row.append(row[my_dataset.column_names.index('DefenseScoringAllowed')])
    new_row.append(row[my_dataset.column_names.index('KickingPercentage')])
    new_row.append(row[my_dataset.column_names.index('TurnoverMargin')])

    X_data.append(new_row)

y_data = my_dataset.get_column('Winner') #Creating y_data

NFL_Bayes_Classifier = MyNaiveBayesClassifier()
NFL_Knn_Classifier = MyKNeighborsClassifier()
NFL_Tree_Classifier = MyDecisionTreeClassifier()
NFL_Forest_Classifier = MyRandomForestClassifier()

### Using K-fold Cross Validation on Classifiers

In [38]:
X_folds = myevaluation.kfold_split(X_data, 10)
y_folds = myevaluation.kfold_split(y_data, 10)
total_knn_accuracy, total_bayes_accuracy, total_tree_accuracy, total_forest_accuracy = 0, 0, 0, 0
knn_precision, bayes_precision, tree_precision, forest_precision = 0, 0, 0,0
knn_recall, bayes_recall, tree_recall, forest_recall = 0, 0, 0,0
knn_F1, bayes_F1, tree_F1, forest_F1 = 0, 0, 0,0
knn_predictions, bayes_predictions, tree_predictions, forest_predictions = [], [], [], []


for i in range(len(X_folds)): # loop for kfold cross validation (through all three classifiers)
    X_train, X_test, y_train, y_test = myutils.create_data(X_folds[i], y_folds[i], X_data, y_data)
    NFL_Bayes_Classifier.fit(X_train, y_train)
    NFL_Knn_Classifier.fit(X_train, y_train, type="discrete")
    NFL_Tree_Classifier.fit(X_train, y_train)
    NFL_Forest_Classifier.fit(X_train, y_train, 50, 5, 40)

    y_knn_pred = NFL_Knn_Classifier.predict(X_test)
    y_bayes_pred = NFL_Bayes_Classifier.predict(X_test)
    y_tree_pred = NFL_Tree_Classifier.predict(X_test)
    y_forest_pred = NFL_Forest_Classifier.predict(X_test)

    # Accuracy Calculations
    total_knn_accuracy += myevaluation.accuracy_score(y_knn_pred, y_test) / 10
    total_bayes_accuracy += myevaluation.accuracy_score(y_bayes_pred, y_test) / 10
    total_tree_accuracy += myevaluation.accuracy_score(y_tree_pred, y_test) / 10
    total_forest_accuracy += myevaluation.accuracy_score(y_forest_pred, y_test) / 10

    # Precision Calculations
    knn_precision += myevaluation.binary_precision_score(y_knn_pred, y_test, pos_label="H") / 10
    bayes_precision += myevaluation.binary_precision_score(y_bayes_pred, y_test, pos_label="H") / 10
    tree_precision += myevaluation.binary_precision_score(y_tree_pred, y_test, pos_label="H") / 10
    forest_precision += myevaluation.binary_precision_score(y_forest_pred, y_test, pos_label="H") / 10

    # Recall Calculations 
    knn_recall += myevaluation.binary_recall_score(y_knn_pred, y_test, pos_label="H") / 10
    bayes_recall += myevaluation.binary_recall_score(y_bayes_pred, y_test, pos_label="H") / 10
    tree_recall += myevaluation.binary_recall_score(y_tree_pred, y_test, pos_label="H") / 10
    forest_recall += myevaluation.binary_recall_score(y_forest_pred, y_test, pos_label="H") / 10

    # F1 Calculations
    knn_F1 += myevaluation.binary_f1_score(y_knn_pred, y_test, pos_label="H") / 10
    bayes_F1 += myevaluation.binary_f1_score(y_bayes_pred, y_test, pos_label="H") / 10
    tree_F1 += myevaluation.binary_f1_score(y_tree_pred, y_test, pos_label="H") / 10
    forest_F1 += myevaluation.binary_f1_score(y_forest_pred, y_test, pos_label="H") / 10
    # Building Confusion Matrices
    for prediction in y_knn_pred:
        knn_predictions.append(prediction)
    for prediction in y_bayes_pred:
        bayes_predictions.append(prediction)
    for prediction in y_tree_pred:
        tree_predictions.append(prediction)
    for prediction in y_forest_pred:
        forest_predictions.append(prediction)


### Showing Classifier Performance

In [None]:
print("------- Knn Classifier -------")
print("Accuracy: ", round(total_knn_accuracy, 2), "| Error Rate: ", round(1 - total_knn_accuracy, 2))
print("Precision : ", round(knn_precision, 2), "| Recall: ", round(knn_recall, 2), "| F1 measure: ", round(knn_F1, 2))
print("Knn Confusion Matrix: ")
myutils.print_matrix(myevaluation.confusion_matrix(y_data, knn_predictions, ["H", "A"]), ["H", "A"])
print()
print("------- Bayes Classifier -------")
print("Accuracy: ", round(total_bayes_accuracy, 2), "| Error Rate: ", round(1 - total_bayes_accuracy, 2))
print("Precision : ", round(bayes_precision, 2), "| Recall: ", round(bayes_recall, 2), "| F1 measure: ", round(bayes_F1, 2))
print("Bayes Confusion Matrix: ")
myutils.print_matrix(myevaluation.confusion_matrix(y_data, bayes_predictions, ["H", "A"]), ["H", "A"])
print()
print("------- Decision Tree Classifier -------")
print("Accuracy: ", round(total_tree_accuracy, 2), "| Error Rate: ", round(1 - total_tree_accuracy, 2))
print("Precision : ", round(tree_precision, 2), "| Recall: ", round(tree_recall, 2), "| F1 measure: ", round(tree_F1, 2))
print("Tree Confusion Matrix: ")
myutils.print_matrix(myevaluation.confusion_matrix(y_data, tree_predictions, ["H", "A"]), ["H", "A"])
print()
print(total_forest_accuracy)
print("------- Random Forest Classifier -------")
print("Accuracy: ", round(total_forest_accuracy, 2), "| Error Rate: ", round(1 - total_forest_accuracy, 2))
print("Precision : ", round(forest_precision, 2), "| Recall: ", round(forest_recall, 2), "| F1 measure: ", round(tree_F1, 2))
print("Tree Confusion Matrix: ")
myutils.print_matrix(myevaluation.confusion_matrix(y_data, forest_predictions, ["H", "A"]), ["H", "A"])


------- Knn Classifier -------
Accuracy:  0.54 | Error Rate:  0.46
Precision :  0.84 | Recall:  0.55 | F1 measure:  0.64
Knn Confusion Matrix: 
+-------+------+-----+-------+
|       | H    | A   | total |
+-------+------+-----+-------+
| H     | 806  | 143 | 949   |
+-------+------+-----+-------+
| A     | 664  | 149 | 813   |
+-------+------+-----+-------+
| total | 1470 | 292 | 1762  |
+-------+------+-----+-------+

------- Bayes Classifier -------
Accuracy:  0.6 | Error Rate:  0.4
Precision :  0.69 | Recall:  0.61 | F1 measure:  0.65
Bayes Confusion Matrix: 
+-------+------+-----+-------+
|       | H    | A   | total |
+-------+------+-----+-------+
| H     | 653  | 296 | 949   |
+-------+------+-----+-------+
| A     | 414  | 399 | 813   |
+-------+------+-----+-------+
| total | 1067 | 695 | 1762  |
+-------+------+-----+-------+

------- Decision Tree Classifier -------
Accuracy:  0.67 | Error Rate:  0.33
Precision :  0.67 | Recall:  0.7 | F1 measure:  0.68
Tree Confusion Matri

### Decision Tree Ruleset

In [40]:
NFL_Tree_Classifier.fit(X_train, y_train)
#NFL_Tree_Classifier.print_decision_rules(class_name="Winner")
NFL_Forest_Classifier.fit(X=X_train, y=y_train)
print(NFL_Forest_Classifier.forest)

#print(NFL_Tree_Classifier.tree)

[<mysklearn.myclassifiers.MyDecisionTreeClassifier object at 0x7f0b0896ede0>]
