# Classification Accuracy Measures

In [None]:
# Use minimal packages to expose and explain accuracy measures
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Use a simple and small data to keep calculations understandable
# data to work with (T = target labels, y = prob. output of classifier, pred = closest class predictions)
T = np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1])
y = np.array([0,0.15,0.1,0.9,0.8,0.02,0.79,0.75,0.55,0.25,0.15,0.12,0.17,0.25,0.19,0.06,0.97,0.13,0.25,0.04,0.03,0.02,0.07,0.08,0.96,0.26,0.24,0.22,0.09,0.96,0.91,0.08,0.94,0.84,0.45,0.35,0.9,0.85,0.75,0.72,0.89,.75,0.4,0.91,0.42,0.44,0.89,0.86,0.83,1])

## Confusion Matrix
 The most common confusion matrix is a 2-by-2 matrix that contains the following 4 numbers:  
- True Positive (TP)
- True Negative (TN)
- False Positive (FP)
- False Negative (FN).  

<br/>The correct (True) predictions are on the diagonal from the upper right to the lower left.  True Positive is sometimes placed in the upper left and sometimes in the lower right of the matrix.  We will place True Positive in lower right to be consistent with sklearn.  

In [None]:
# method creates a confusion matrix
# method is designed to return the same result as sklearn.metrics.confusion_matrix
# Confusion matrix:
# [[TN FP]
#  [FN TP]]
# TP is True Positive; TN is True Negative; FP is False Positive; FN is False Negative
def confmat(actual, predicted):
    YES = max(max(actual), max(predicted))
    true_positive  = sum((predicted == actual) & (predicted == YES))
    true_negative  = sum((predicted == actual) & (predicted != YES))
    false_positive = sum((predicted != actual) & (predicted == YES))
    false_negative = sum((predicted != actual) & (predicted != YES))
    actual_negative = [true_negative, false_positive]
    actual_positive = [false_negative, true_positive]
    CM = [actual_negative, actual_positive]
    return np.array(CM)

To calculate a confusion matrix we need to threshold the target probabilites (`y`).  The typical threshold values is 0.5. 

In [None]:
# threshold y at 0.5
Y = np.round(y, 0)

In [None]:
# Confusion Matrix
# https://en.wikipedia.org/wiki/Precision_and_recall
CM = confmat(T, Y) # sklearn.metrics.confusion_matrix
print ("Confusion matrix:")
print (CM)
tn, fp, fn, tp = CM.ravel()
print ("TP:{}; TN:{},; FP:{}; FN:{}".format(tp, tn, fp, fn))

In [None]:
# Evaluate Model
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
conmat=confusion_matrix(T, Y)
cmd = ConfusionMatrixDisplay(confusion_matrix=conmat, display_labels=["0", "1"])
font = {'weight': 'bold', 'size': 24}
plt.rc('font', **font)
cmd.plot();
plt.rcdefaults();

#### Accuracy measures from the confusion matrix
The four numbers in a confusion matrix are used to calculate many accuracy measures:  
- Accuracy (Accuracy rate)
- Precision
- Recall
- F1

<br/>We cannot calculate an ROC from a single confusion matrix.  

In [None]:
Accuracy = (tp + tn)/(tp + fp + fn + tn) # sklearn.metrics.accuracy_score(T, Y)
print ("\nAccuracy (Accuracy rate):", np.round(Accuracy, 2))
Precision = tp/(tp + fp) # sklearn.metrics.precision_score(T, Y)
print ("Precision:", np.round(Precision, 2))
Recall = tp/(tp + fn) # sklearn.metrics.recall_score(T, Y)
print ("Recall:", np.round(Recall, 2))
F1 = 2./(1./Recall + 1./Precision) # sklearn.metrics.f1_score(T, Y)
print ("F1 score:", np.round(F1, 2))

## ROC Curve
The gold standard for accuracy in a binary classification is the ROC curve.  

In [None]:
# method returns the data for an ROC curve
# method is designed to emulate sklearn.metrics.roc_curve
# method returns superflous thresholds as compared to sklearn.metrics.roc_curve
# input parameters
#    actual  the actual observations.  Must be binary.  Typically 1/0 or True/False
#    predicted_probabilities   the probabilities from a predicted model in range [0, 1]
# output parameters as a tuple (roc_values)
#    false_positive_rate is the proportion of false positives among all positives
#    true_positive_rate is the proportion of true positives among all positives
#    thresholds are the values used to threshold the confusion matrix
def roc(actual, predicted_probabilities):
    thresholds = np.sort(np.unique(np.append(predicted_probabilities, [-1, 1])))
    thresholds.sort()
    thresholds
    false_positive_rate = []
    true_positive_rate = []
    for threshold in thresholds:
        CM = confmat(actual, predicted_probabilities > threshold)
        true_negative, false_positive, false_negative, true_positive = CM.ravel()
        true_positive_rate.append(true_positive/(true_positive + false_negative))
        false_positive_rate.append(false_positive/(false_positive + true_negative))
    roc_values = (false_positive_rate, true_positive_rate, thresholds)
    return(roc_values)

In [None]:
# display roc curve
# has dependency on auc method from sklearn
def roc_display(false_positive_rate, true_positive_rate):
    from sklearn.metrics import auc
    AUC = auc(false_positive_rate, true_positive_rate)
    plt.figure()
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.grid()
    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 1.01])
    plt.xlabel('FALSE Positive Rate')
    plt.ylabel('TRUE Positive Rate')
    plt.plot([0, 1], [0, 1], color='gold', lw=2, linestyle='--') # reference line for random classifier
    plt.plot(false_positive_rate, true_positive_rate, color='purple',lw=2, label='ROC curve (area = %0.2f)' % AUC)
    plt.legend(loc="lower right")
    plt.show()

In [None]:
# Plot of ROC
false_positive_rate, true_positive_rate, thresholds = roc(T, y)
roc_display(false_positive_rate, true_positive_rate)

In [None]:
# ROC calculations with sklearn
from sklearn.metrics import roc_curve, roc_auc_score, RocCurveDisplay
fpr, tpr, th = roc_curve(y_true = T, y_score = y) # False Positive Rate, True Posisive Rate, probability thresholds
auc_of_roc = round(roc_auc_score(y_true = T, y_score = y), 2)
RocCurveDisplay(fpr = fpr, tpr = tpr, roc_auc = auc_of_roc).plot()
plt.grid()
plt.show()

### Switch our definition of positive and negative
The deinition of positive or negativce is arbitrary.  When we are looking for a legitimate credit card transaction, then we define positive as a legitimate transaction and negative as a fraudulent transaction.  When we are looking for credit card fraud, then define positive as a fraudulent transaction and negative as a legitimate transaction.  We can use the same classification model in both cases.

In [None]:
# Switch positive with negative
T_neg = 1 - T
y_neg = 1 - y
Y_neg = 1 - Y # np.round(y_neg, 0)

We will show the previous results of the 4 accuracy measures from our confusion matrix

In [None]:
# Previous results shown again:
print("Prevous results:")
print ("   Accuracy (Accuracy rate):", np.round(Accuracy, 2))
print ("   Precision:", np.round(Precision, 2))
print ("   Recall:", np.round(Recall, 2))
print ("   F1:", np.round(F1, 2))

In [None]:
# Confusion Matrix
# https://en.wikipedia.org/wiki/Precision_and_recall
CM = confmat(T_neg, Y_neg) # sklearn.metrics.confusion_matrix
print ("Confusion matrix:")
print (CM)
tn, fp, fn, tp = CM.ravel()
print ("TP:{}; TN:{},; FP:{}; FN:{}".format(tp, tn, fp, fn))
Accuracy = (tp + tn)/(tp + fp + fn + tn) # sklearn.metrics.accuracy_score(T, Y)
print ("\nAccuracy (Accuracy rate):", np.round(Accuracy, 2))
Precision = tp/(tp + fp) # sklearn.metrics.precision_score(T, Y)
print ("Precision:", np.round(Precision, 2))
Recall = tp/(tp + fn) # sklearn.metrics.recall_score(T, Y)
print ("Recall:", np.round(Recall, 2))
F1 = 2./(1./Recall + 1./Precision) # sklearn.metrics.f1_score(T, Y)
print ("F1 score:", np.round(F1, 2))

**Important:  When we switch our definition, then precision, recall, and F1, have different results.**

#### Compare ROC curves before and after changing the definition of positive

In [None]:
# Plot of ROC for positive case
false_positive_rate, true_positive_rate, thresholds = roc(T, y)
roc_display(false_positive_rate, true_positive_rate)

In [None]:
# Plot of ROC for negative case
false_positive_rate, true_positive_rate, thresholds = roc(T_neg, y_neg)
roc_display(false_positive_rate, true_positive_rate)

**Important:  When we switch our definition, then the AUC of the ROC is the same.**

The ROC has another very important advantage over a single confusion matrix.  The ROC evaulates a model's full range of probabilities.  Sometimes bad models can have high accuracy scores if their probabilities are all near 0.5.  Conversely, some useable models can have very bad accuracy scores.  The ROC and it's AUC take these effects into account.