# Classification metrics

In order to learn about popular methods of classifier evaluation, we will generate example sequences of example classes. For this purpose, we will use the numpy library and a random number generator from the normal distribution.

In [1]:
import numpy as np
import pandas as pd
from numpy.random import RandomState
from sklearn.metrics import precision_score, recall_score, confusion_matrix, f1_score

Let's generate an example for us to better understand classification metrics 

In [34]:
random = RandomState(30)

random_1 = random.normal(loc=0.0, size=100)
random_2 = random.normal(loc=1, size=100)
random_3 = random.logistic(size=100)
y_test = [1 if i >= 0 else 0 for i in random_1]
y_pred = [1 if i >= 0 else 0 for i in random_3]
print(y_test[:10])
print(y_pred[:10])


[0, 1, 0, 1, 0, 1, 0, 1, 1, 0]
[1, 1, 1, 0, 0, 1, 0, 1, 0, 1]


Just to illustrate the meaning of the numbers we have generated

In [35]:
example = pd.DataFrame({'Real Class': y_test, 'Class Predicted by some classification model': y_pred, 'sum': np.array(y_test)+np.array(y_pred)})
example['Correct prediction?'] = ['False' if s == 1 else 'True' for s in example['sum']]
example = example.drop(['sum'], axis = 1)

example

Unnamed: 0,Real Class,Class Predicted by some classification model,Correct prediction?
0,0,1,False
1,1,1,True
2,0,1,False
3,1,0,False
4,0,0,True
...,...,...,...
95,0,1,False
96,1,1,True
97,0,1,False
98,1,0,False


In [36]:
# True Positives  # False Positives
# False Negatives # True Negatives

def confusion_matrix(y_test, y_pred):
    tp, fp, tn, fn = 0, 0, 0, 0
    for real, predicted in zip(y_test, y_pred):
        if real == predicted:
            if predicted: # if predicted == 1: 
                tp += 1
            else:
                tn += 1
        else:
            if predicted:
                fp += 1
            else:
                fn += 1

    return {'TP': tp,'TN': tn, 'FP': fp,'FN': fn}

        

In [37]:
confusion_matrix(y_test, y_pred)

{'TP': 28, 'TN': 32, 'FP': 25, 'FN': 15}

In [38]:
# Accuracy = (TP+TN)/(TP+TN+FP+FN)

def accuracy(y_test, y_pred): 
    dct=confusion_matrix(y_test, y_pred)
    accuracy = (dct['TP']+dct['TN'])/(dct['TP']+dct['TN']+dct['FP']+dct['FN'])
    return accuracy
    

In [39]:
accuracy(y_test, y_pred)

0.6

In [None]:
# Predict earthquake 
# No earthquake 999 - Label 0,
# Earthquake 1 - Label 1

def model(input): 
    return 0



In [None]:
# accuracy = correct predictions / all observations

# accuracy = 999/1000 = 0.999

# accuracy does not evaluate well the models for imbalanced datasets