In [None]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, accuracy_score, auc, roc_auc_score, classification_report
import matplotlib.pyplot as plt


In [73]:
knn = pickle.load(open("models/knn-high-res.p", "rb"))
rf = pickle.load(open("models/rf-grid-search.p", "rb"))
svm = pickle.load(open("models/svm.p", "rb"))
nn = pickle.load(open("models/nn.p", "rb"))

In [98]:
print(knn.best_params_)
print(rf.best_params_)
print(svm.best_params_)
print(nn.best_params_)

{'n_neighbors': 29}
{'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 2000}
{'C': 1, 'kernel': 'linear'}
{'alpha': 0.1, 'hidden_layer_sizes': (50,), 'max_iter': 100}


In [87]:
data = pd.read_csv('data/processed/mushrooms_pca.csv')
y = data['class']
X = data.drop('class', axis=1)

X_train, X_test, y_train, y_test = \
        train_test_split(X, y.ravel(), test_size=.1, random_state=42)


In [89]:
y_preds={}
y_true, y_preds['knn'] = y_test, knn.predict(X_test)
_, y_preds['rf'] = y_test, rf.predict(X_test)
_, y_preds['svm'] = y_test, svm.predict(X_test)
_, y_preds['nn'] = y_test, nn.predict(X_test)

In [100]:
reports = {}
tprs ={}
fprs ={}
roc_aucs={}
for classifier, y_pred in y_preds.items():
    reports[classifier] = classification_report(y_true, y_pred)
    fprs[classifier], tprs[classifier], _ = roc_curve(y_test, y_pred, pos_label=1)
    roc_aucs[classifier] = auc(fprs[classifier], tprs[classifier])
    print(reports[classifier])
    print(accuracy_score(y_true, y_pred))


              precision    recall  f1-score   support

           0       0.87      0.92      0.89       433
           1       0.90      0.84      0.87       380

   micro avg       0.88      0.88      0.88       813
   macro avg       0.89      0.88      0.88       813
weighted avg       0.88      0.88      0.88       813

0.8831488314883149
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       433
           1       1.00      1.00      1.00       380

   micro avg       1.00      1.00      1.00       813
   macro avg       1.00      1.00      1.00       813
weighted avg       1.00      1.00      1.00       813

1.0
              precision    recall  f1-score   support

           0       0.88      0.88      0.88       433
           1       0.87      0.87      0.87       380

   micro avg       0.87      0.87      0.87       813
   macro avg       0.87      0.87      0.87       813
weighted avg       0.87      0.87      0.87       8

In [None]:
ROC - curve

In [108]:
plt.figure()

lw = 2
for classifier, y_pred in y_preds.items():
    plt.plot(fprs[classifier], tprs[classifier],
             lw=lw, label='%s (area = %0.2f)' % (classifier, roc_aucs[classifier]))
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.savefig("C:/Users/user/Desktop/kto_ma_szkole_ten_ma_lzej/Erasm/mushrooms/reports/figures/model_comparison.png")
plt.show()