# 01: Classification Part 3

### Load variables

In [None]:
import pickle
import pandas as pd
with open("classification.pickle", "rb") as f:
    transform_series = lambda x: pd.DataFrame(x).iloc[:,0] if isinstance(x, pd.core.series.Series) else x
    X_train, y_train, X_val, y_val, X_test, y_test, clf = list(map(transform_series, pickle.load(f)[0:7]))

## Plot metrics

### Confusion Matrix

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix

# function to plot a confusion matrix based on inputs y_true and y_pred
def plot_confusion_matrix(y_true, y_pred, normalize=False):

    cm = confusion_matrix(y_true, y_pred)

    if normalize:
        cm = cm.astype('float') / len(y_true)
        cm = np.round(cm, decimals=3)

    fig, ax = plt.subplots()
    im = ax.imshow(cm, cmap=plt.cm.Blues)

    ax.set_xticks([0,1])
    ax.set_yticks([0,1])
    ax.set_xticklabels(['No','Yes'])
    ax.set_yticklabels(['No','Yes'])
    ax.set_xlabel('Predicted')
    ax.set_ylabel('True')

    for i in range(2):
        for j in range(2):
            text = ax.text(j, i, cm[i, j], ha="center", va="center", color="black")

    ax.set_title('Confusion Matrix')
    fig.tight_layout()
    plt.show()

In [None]:
# make prediction on test set
y_pred = clf.predict(X_test)
# plot confusion matrix for test set
plot_confusion_matrix(y_test, y_pred, normalize=True)

In [None]:
# make prediction on training set
y_pred_train = clf.predict(X_train)
# plot confusion matrix for training set
plot_confusion_matrix(y_train, y_pred_train, normalize = True)

### ROC Curves

In [None]:
from sklearn.metrics import cohen_kappa_score, roc_curve, roc_auc_score, accuracy_score, classification_report, make_scorer

# function plot a ROC curve.
def plot_roc_cur(fper, tper):
    plt.plot(fper, tper, color='orange', label='ROC')
    plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend()
    plt.show()

# ROC curve based on test set.
probs = clf.predict_proba(X_test)[:, 1]
fper, tper, thresholds = roc_curve(y_test, probs)
plot_roc_cur(fper, tper)

### Precision-Recall Curves

In [None]:
from sklearn.metrics import precision_recall_curve, roc_curve
import matplotlib.pyplot as plt

#use logistic regression model to make predictions
y_score = clf.predict_proba(X_test)[:, 1]

#calculate precision and recall
precision, recall, thresholds = precision_recall_curve(y_test, y_score)

#create precision recall curve
fig, ax = plt.subplots()
ax.plot(recall, precision, color='purple')

#add axis labels to plot
ax.set_title('Precision-Recall Curve')
ax.set_ylabel('Precision')
ax.set_xlabel('Recall')

#display plot
plt.show()