In [1]:
import numpy as np
import pandas as pd

# Scoring and Metrics

In [2]:
# pattern: positive => True, negative => False
df = pd.DataFrame([
    [15, True],
    [-2, False],
    [99, True],
    [-10, False],
    [8, True],
    [5, True],
    [-1000, False],
    [-1, False],
], columns=["x", "y"])
train, test = df.iloc[:4], df.iloc[4:]

In [3]:
train

Unnamed: 0,x,y
0,15,True
1,-2,False
2,99,True
3,-10,False


In [4]:
test

Unnamed: 0,x,y
4,8,True
5,5,True
6,-1000,False
7,-1,False


In [5]:
from sklearn.linear_model import LogisticRegression
# TODO: 
# 1. train a LogisticRegression on training data
# 2. score the model on the test data

lr = LogisticRegression()
lr.fit(train[["x"]], train["y"])

In [6]:
predictions = lr.predict(test[["x"]])
predictions

array([ True, False, False, False])

In [7]:
lr.score(test[["x"]], test["y"]) # uses accuracy_score by default

0.75

In [8]:
from sklearn.metrics import accuracy_score, confusion_matrix

In [9]:
accuracy_score(["A", "B"], ["A", "C"])

0.5

In [10]:
accuracy_score(test["y"], lr.predict(test[["x"]]))

0.75

# Confusion Matrices

In [11]:
actual =    ["dog", "dog", "dog", "dog", "cat", "cat", "cat", "mouse", "mouse"]
predicted = ["dog", "dog", "dog", "cat", "cat", "dog", "cat", "mouse", "mouse"]

In [12]:
# row: what it actually is
# col: what it was predicted to be
labels = ["dog", "cat", "mouse", "horse"]
cm = confusion_matrix(actual, predicted, labels=labels)
cm

array([[3, 1, 0, 0],
       [1, 2, 0, 0],
       [0, 0, 2, 0],
       [0, 0, 0, 0]])

In [13]:
pd.DataFrame(cm, index=labels, columns=labels)

Unnamed: 0,dog,cat,mouse,horse
dog,3,1,0,0
cat,1,2,0,0
mouse,0,0,2,0
horse,0,0,0,0


In [14]:
labels = [False, True]
cm = confusion_matrix(test["y"], lr.predict(test[["x"]]), labels=labels)
cm = pd.DataFrame(cm, index=labels, columns=labels)
cm

Unnamed: 0,False,True
False,2,0
True,1,1


In [15]:
print("True Positive", cm.at[True, True])
print("True Negative", cm.at[False, False])

print("False Positive", cm.at[False, True])
print("False Negative", cm.at[True, False])

True Positive 1
True Negative 2
False Positive 0
False Negative 1


# Multi-Class Metrics

In [19]:
actual =    ["dog", "dog", "dog", "dog", "cat", "cat", "cat", "cat", "mouse", "mouse"] * 199
predicted = ["dog", "dog", "dog", "dog", "cat", "dog", "cat", "dog", "mouse", "mouse"] * 199
actual += ["horse"] * 10
predicted += ["dog"] * 9 + ["horse"]

labels = ["dog", "cat", "mouse", "horse"]
cm = confusion_matrix(actual, predicted, labels=labels)
cm = pd.DataFrame(cm, index=labels, columns=labels)
cm

Unnamed: 0,dog,cat,mouse,horse
dog,796,0,0,0
cat,398,398,0,0
mouse,0,0,398,0
horse,9,0,0,1


In [21]:
# cat recall (from scratch)
cm.at["cat", "cat"] / cm.loc["cat", :].sum()

0.5

In [22]:
cm.at["horse", "horse"] / cm.loc["horse", :].sum()

0.1

In [23]:
accuracy_score(actual, predicted)

0.7965

In [34]:
from sklearn.metrics import recall_score, balanced_accuracy_score, precision_score
print(labels)
recall_score(actual, predicted, average=None, labels=labels)

['dog', 'cat', 'mouse', 'horse']


array([1. , 0.5, 1. , 0.1])

In [29]:
recall_score(actual, predicted, average=None, labels=labels).mean() # higher or lower than accuracy_score?

0.65

In [32]:
# mean of the recall scores -- useful when there's high skew in the data
balanced_accuracy_score(actual, predicted)

0.65

In [37]:
print(labels)
precision_score(actual, predicted, labels=labels, average=None)

['dog', 'cat', 'mouse', 'horse']


array([0.66167914, 1.        , 1.        , 1.        ])

In [38]:
cm

Unnamed: 0,dog,cat,mouse,horse
dog,796,0,0,0
cat,398,398,0,0
mouse,0,0,398,0
horse,9,0,0,1


# Binary Classification Metrics

Unless otherwise specified, "precision" and "recall" refer to those metrics for the positive class when we're doing binary classification.

In [39]:
actual = [False, True, True, True, True, False, False, True, True, True, True, True, True]
predicted = [False, True, True, True, True, True, True, False, False, False, True, True, True]
confusion_matrix(actual, predicted)

array([[1, 2],
       [3, 7]])

In [40]:
recall_score(actual, predicted, labels=[False, True], average=None)

array([0.33333333, 0.7       ])

In [42]:
recall_score(actual, predicted) # by default, "recall" means "True recall" when doing binary classification

0.7

In [43]:
precision_score(actual, predicted, labels=[False, True], average=None)

array([0.25      , 0.77777778])

In [44]:
precision_score(actual, predicted)

0.7777777777777778