<a href="https://colab.research.google.com/github/issondl/from-data-to-solution-2021/blob/main/4_metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Metrics

## Imports

In [None]:
import numpy as np
np.random.seed(2021)
import random
random.seed(2021)

from IPython.display import Markdown, display

def printmd(string):
    display(Markdown(string))

## Create Toy Datasets

In [None]:
def pc(db):  # print count
    print("Database contains {} negative and {} positive samples".format(db.count(0), db.count(1)))

length = 100

# Balanced
db_balanced = [0] * (length//2) + [1] * (length//2)
pc(db_balanced)

# More positives
amount = random.uniform(0.9, 0.99)
db_positives = [1] * int(length*amount) + [0] * int(length*(1-amount)+1)
pc(db_positives)

# More negatives
amount = random.uniform(0.9, 0.99)
db_negatives = [0] * int(length*amount) + [1] * int(length*(1-amount)+1)
pc(db_negatives)

## Dummy model

In [None]:
top_no = 95
def dummy_model(data, threshold):
    correct=0
    output=[]
    for i, d in enumerate(data):
        if i < threshold or i > top_no :
            output.append(d)
            correct+=1
        else:
            output.append(abs(1-d))
    return output

### *Balanced dataset*

In [None]:
balanced_threshold = 80
out_balanced = dummy_model(db_balanced, balanced_threshold)

In [None]:
print('Labels:')
printmd('{}**{}**{}'.format(db_balanced[:balanced_threshold], db_balanced[balanced_threshold:top_no], db_balanced[top_no+1:],))
print('Predictions:')
printmd('{}**{}**{}'.format(out_balanced[:balanced_threshold], out_balanced[balanced_threshold:top_no], out_balanced[top_no+1:],))

### *More positives*

In [None]:
positives_threshold = 80
out_positives = dummy_model(db_positives, positives_threshold)

In [None]:
print('Labels:')
printmd('{}**{}**{}'.format(db_positives[:positives_threshold], db_positives[positives_threshold:top_no], db_positives[top_no+1:]))
print('Predictions:')
printmd('{}**{}**{}'.format(out_positives[:positives_threshold], out_positives[positives_threshold:top_no], out_positives[top_no+1:]))

### *More negatives*

In [None]:
negatives_threshold = 80
out_negatives = dummy_model(db_negatives, negatives_threshold)

In [None]:
print('Labels:')
printmd('{}**{}**{}'.format(db_negatives[:negatives_threshold], db_negatives[negatives_threshold:top_no], db_negatives[top_no+1:]))
print('Predictions:')
printmd('{}**{}**{}'.format(out_negatives[:negatives_threshold], out_negatives[negatives_threshold:top_no], db_negatives[top_no+1:]))

## Metrics

### **Accuracy**

Tasks:

* Create method implementing accuracy metric

*Balanced dataset*


In [None]:
from sklearn.metrics import accuracy_score

In [None]:
## Implement method implementing accuracy metric

def acc(labels, predictions):
    ## START

    ## END

In [None]:
printmd('Accuracy custom  {}'.format(acc(db_balanced, out_balanced)))
printmd('Accuracy sklearn {}'.format(accuracy_score(db_balanced, out_balanced)))

*More positives*

In [None]:
printmd('Accuracy custom  {}'.format(acc(db_positives, out_positives)))
printmd('Accuracy sklearn {}'.format(accuracy_score(db_positives, out_positives)))

*More negatives*

In [None]:
printmd('Accuracy custom  {}'.format(acc(db_negatives, out_negatives)))
printmd('Accuracy sklearn {}'.format(accuracy_score(db_negatives, out_negatives)))

*More positives - all positive predictions*

In [None]:
printmd('Accuracy {}'.format(accuracy_score(db_positives, np.ones(length))))

*More negatives - all negative predictions*

In [None]:
printmd('Accuracy {}'.format(accuracy_score(db_negatives, np.zeros(length))))

### **Confusion Matrix**

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

*Balanced dataset*

In [None]:
cmd = ConfusionMatrixDisplay(confusion_matrix(db_balanced, out_balanced), display_labels=[0,1])
cmd.plot()

*More positives*

In [None]:
cmd = ConfusionMatrixDisplay(confusion_matrix(db_positives, out_positives), display_labels=[0,1])
cmd.plot()

*More negatives*

In [None]:
cmd = ConfusionMatrixDisplay(confusion_matrix(db_negatives, out_negatives), display_labels=[0,1])
cmd.plot()

*More positives - all positive predictions*

In [None]:
cmd = ConfusionMatrixDisplay(confusion_matrix(db_positives, np.ones(length)), display_labels=[0,1])
cmd.plot()

*More negatives - all negative predictions*

In [None]:
cmd = ConfusionMatrixDisplay(confusion_matrix(db_negatives, np.zeros(length)), display_labels=[0,1])
cmd.plot()

### **Precision**

Tasks:

* Create method implementing precision metric

In [None]:
from sklearn.metrics import precision_score

In [None]:
## Create method implementing precision metric

def precision(labels, predictions):
    ## START

    ## END

*Balanced dataset*

In [None]:
printmd('Precision custom  {}'.format(precision(db_balanced, out_balanced)))
printmd('Precision sklearn {}'.format(precision_score(db_balanced, out_balanced)))

*More positives*

In [None]:
printmd('Precision custom  {}'.format(precision(db_positives, out_positives)))
printmd('Precision sklearn {}'.format(precision_score(db_positives, out_positives)))

*More negatives*

In [None]:
printmd('Precision custom  {}'.format(precision(db_negatives, out_negatives)))
printmd('Precision sklearn {}'.format(precision_score(db_negatives, out_negatives)))

*More positives - all positive predictions*

In [None]:
printmd('Precision custom  {}'.format(precision(db_positives, np.ones(length))))
printmd('Precision sklearn {}'.format(precision_score(db_positives, np.ones(length))))

*More negatives - all negative predictions*

In [None]:
printmd('Precision custom  {}'.format(precision(db_negatives, np.zeros(length))))
printmd('Precision sklearn {}'.format(precision_score(db_negatives, np.zeros(length))))

### **Recall**

Tasks:

* Create method implementing recall metric

In [None]:
from sklearn.metrics import recall_score

In [None]:
## Create method implementing recall metric

def recall(labels, predictions):
    ## START

    ## END

*Balanced dataset*

In [None]:
printmd('Recall custom  {}'.format(recall(db_balanced, out_balanced)))
printmd('Recall sklearn {}'.format(recall_score(db_balanced, out_balanced)))

*More positives*


In [None]:
printmd('Recall custom  {}'.format(recall(db_positives, out_positives)))
printmd('Recall sklearn {}'.format(recall_score(db_positives, out_positives)))

*More negatives*

In [None]:
printmd('Recall custom  {}'.format(recall(db_negatives, out_negatives)))
printmd('Recall sklearn {}'.format(recall_score(db_negatives, out_negatives)))

*More positives - all positive predictions*

In [None]:
printmd('Recall custom  {}'.format(recall(db_positives, np.ones(length))))
printmd('Recall sklearn {}'.format(recall_score(db_positives, np.ones(length))))

*More negatives - all negative predictions*

In [None]:
printmd('Recall custom  {}'.format(recall(db_negatives, np.zeros(length))))
printmd('Recall sklearn {}'.format(recall_score(db_negatives, np.zeros(length))))

### **False Positive Rate = Specificity**

In [None]:
def fpr(labels, predictions):
    assert len(labels)==len(predictions)
    fp=0
    tn=0
    #fpr=fp/(fp+tn)
    for i, p in enumerate(predictions):
        if p == labels[i] and p == 0:
            tn+=1
        elif p != labels[i] and p == 1:
            fp+=1
    if (fp+tn)==0:
        return 0
    return fp/(fp+tn)

*Balanced dataset*

In [None]:
printmd('fpr {}'.format(fpr(db_balanced, out_balanced)))

*More positives*

In [None]:
printmd('fpr {}'.format(fpr(db_positives, out_positives)))

*More negatives*

In [None]:
printmd('fpr {}'.format(fpr(db_negatives, out_negatives)))

*More positives - all positive predictions*

In [None]:
printmd('fpr {}'.format(fpr(db_positives, np.ones(length))))

*More negatives - all negative predictions*

### **True Positive Rate = Recall = Sensitivity**

### **F1 Score**

In [None]:
from sklearn.metrics import f1_score

In [None]:
def f1():
    pass

*Balanced dataset*

In [None]:
printmd('F1 sklearn {}'.format(f1_score(db_balanced, out_balanced)))

*More positives*

In [None]:
printmd('F1 sklearn {}'.format(f1_score(db_positives, out_positives)))
printmd('F1 sklearn weighted {}'.format(f1_score(db_positives, out_positives, average='weighted')))

*More negatives*

In [None]:
printmd('F1 sklearn {}'.format(f1_score(db_negatives, out_negatives)))
printmd('F1 sklearn weighted {}'.format(f1_score(db_negatives, out_negatives, average='weighted')))

*More positives - all positive predictions*

In [None]:
printmd('F1 sklearn {}'.format(f1_score(db_positives, np.ones(length))))
printmd('F1 sklearn weighted {}'.format(f1_score(db_positives, np.ones(length), average='weighted')))

*More negatives - all negative predictions*

In [None]:
printmd('F1 sklearn {}'.format(f1_score(db_negatives, np.zeros(length))))
printmd('F1 sklearn weighted {}'.format(f1_score(db_negatives, np.zeros(length), average='weighted')))