# How to evaluate following systems?

## Credit Approval (Yes/No)
## Covid detection (Positive/Negative)
## Court guilty judgement (Guilty/Not Guilty)
## Spam detection (Spam/Ham)
## Anomaly detection (Anomaly/Normal)


In [14]:
# Some classification process under study generated following labels 
Y =  [1, 0, 1, 0, 0, 0, 0, 0, 0, 0]

# 2 classification models trying to approximate above process produced following end predictions
Y_hat1 = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Y_hat2 = [1, 0, 1, 1, 1, 0, 0, 0, 0, 0]

#TBD Which is the better model
# Y_hat1 - less error

In [15]:
# TBD: Compute accuracy of the models
from sklearn.metrics import accuracy_score

accuracy1 = accuracy_score(Y, Y_hat1)
accuracy2 = accuracy_score(Y, Y_hat2)

print(f"accuracy of model1 = {accuracy1}. {accuracy1*100}% accurate")
print(f"accuracy of model2 = {accuracy2}. {accuracy2*100}% accurate")

accuracy of model1 = 0.9. 90.0% accurate
accuracy of model2 = 0.8. 80.0% accurate


In [16]:
# TBD: Compute confusion matrix for the models
import numpy as np

'''
Positive/Negative refer to class
    1 is Positive Class (Usually rare class should be kept positive as metrics like precision, recall are defined from perspective of positive class)
    0 is Negative Class

True Positives (TP)  = Classified Positive and are Correct
False Positives (FP) = Classified Positive and are Incorrect
False Negatives (FN) = Classified Negative and are Incorrect
True Negatives (TN)  = Classified Negative and are Correct

True/False refers to classification
    True means classified correctly
    False means classified incorrectly

+-------------------------------------------------+
|                 Confusion Matrix                |
+------------+-----------------+------------------+
|            | Actual Positive | Actual Negative  |
+------------+-----------------+------------------+
| Predicted  | True Positive   | False Positive   |
| Positive   |                 |                  |
+------------+-----------------+------------------+
| Predicted  | False Negative  | True Negative    |
| Negative   |                 |                  |
+------------+-----------------+------------------+

'''
# 
# Y      = [1, 0, 1, 0, 0, 0, 0, 0, 0, 0]
# Y_hat1 = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]

confusion1 = np.array([
    [1, 0],
    [1, 8]
])

# 
# Y      = [1, 0, 1, 0, 0, 0, 0, 0, 0, 0]
# Y_hat2 = [1, 0, 1, 1, 1, 0, 0, 0, 0, 0]

confusion2 = np.array([
    [2, 2],
    [0, 6]
])

print(f"confusion1 = \n{confusion1}")
print(f"confusion2 = \n{confusion2}")


confusion1 = 
[[1 0]
 [1 8]]
confusion2 = 
[[2 2]
 [0 6]]


In [17]:
# TBD: Compute precision, recall and f1 score for the models

# Precision = TP/(TP + FP)
# Precision means what fraction of class we are classifying positive is actually positive
precision1 = 1/(1 + 0)
print(f"precision1 = {precision1}")
precision2 = 2/(2 + 2)
print(f"precision2 = {precision2}")

# Recall = TP/(TP + FN)
# Recall means what fraction of total positive class we are classifying as positive
recall1 = 1/(1+1)
print(f"recall1 = {recall1}")
recall2 = 2/(2+0)
print(f"recall2 = {recall2}")


# Usually ML systems tradeoff between precision and recall, higher precision
# may lead to lower recall and vise versa. for e.g in a Serious disease detector
# we may favour recall over precision as we did not want to miss any case.
# Some judicial systems heavily tilt towards precision at the cost of recall
# It doesnt matter if 100 guilty are acquitted, but not one innocent should be punished.
# Similarly if designing a anomaly detection system you would favour recall for mission
# critical system, but for normal system you might favour precision to avoid waking
# engineers at night


# F1 score, Usually in case of skewed classes, metrics like accuracy are not
# trustworthy, precision, recall serve the purpose but they are 2 metrics
# and usually for evaluation of ML systems its reccommended to have single value metrics
# F1 combines both precision and recall, its a harmonic mean of precision and recall

# f1 = 2 * precision * recall /(precision + recall)

def f1(precision, recall):
    return 2*precision* recall/(precision + recall)

f1_1 = f1(precision1, recall1)
print(f"F1 score of model1 = {f1_1}")
f1_2 = f1(precision2, recall2)
print(f"F1 score of model2 = {f1_2}")

precision1 = 1.0
precision2 = 0.5
recall1 = 0.5
recall2 = 1.0
F1 score of model1 = 0.6666666666666666
F1 score of model2 = 0.6666666666666666


In [18]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision1 = precision_score(Y, Y_hat1) 
precision2 = precision_score(Y, Y_hat2)

print(f"precision1 = {precision1}")
print(f"precision2 = {precision2}")

recall1 = recall_score(Y, Y_hat1)
recall2 = recall_score(Y, Y_hat2)

print(f"recall1 = {recall1}")
print(f"recall2 = {recall2}")

f1_1 = f1_score(Y, Y_hat1)
f1_2 = f1_score(Y, Y_hat2)

print(f"F1 score of model1 = {f1_1}")
print(f"F1 score of model2 = {f1_2}")

# Precision, Recall and F1 are defined around notion of positive class
# 1 is default positive class, but if you want another label to be positive class
# you can override it by specifying pos_label argument

model1_precision_wrt_0_as_positive_class = precision_score(Y, Y_hat1, pos_label=0)
print(f"model1 precision with 0 as positive class = {model1_precision_wrt_0_as_positive_class}")

precision1 = 1.0
precision2 = 0.5
recall1 = 0.5
recall2 = 1.0
F1 score of model1 = 0.6666666666666666
F1 score of model2 = 0.6666666666666666
model1 precision with 0 as positive class = 0.8888888888888888


In [19]:
#Note: Confusion matrix using sklearn has Actual Class in Rows and Predicted class in columns (i.e axes are other way round)
'''
+-----------------------------------------------------+
|              Confusion Matrix (sklearn)             |
+----------+--------------------+---------------------+
|          | Predicted Positive | Predicted Negative  |
+----------+--------------------+---------------------+
| Actual   | True Positive      | False Negative      |
| Positive |                    |                     |
+----------+--------------------+---------------------+
| Actual   | False Positive     | True Negative       |
| Negative |                    |                     |
+----------+--------------------+---------------------+
'''
#Positive and Negative Labels can be specified in labels arguments 
from sklearn.metrics import confusion_matrix
confusion1_sklearn = confusion_matrix(Y, Y_hat1, labels = [1, 0]) #Order in which we specify labels will decide which will be positive class, [1, 0] means 1 is positive class, 0 is negative class
confusion2_sklearn = confusion_matrix(Y, Y_hat2, labels = [1, 0])
print(f"confusion1_sklearn = \n{confusion1_sklearn}")
print(f"confusion2_skelarn = \n{confusion2_sklearn}")

confusion1_sklearn = 
[[1 1]
 [0 8]]
confusion2_skelarn = 
[[2 0]
 [2 6]]
