In [3]:
import numpy as np
import pandas as pd

# Scoring and Metrics

In [7]:
# pattern: positive => True, negative => False
df = pd.DataFrame([
    [15, True],
    [-2, False],
    [99, True],
    [-10, False],
    [8, True],
    [5, True],
    [-1000, False],
    [-1, False],
], columns=["x", "y"])
train, test = df.iloc[:4], df.iloc[4:]

In [8]:
train

Unnamed: 0,x,y
0,15,True
1,-2,False
2,99,True
3,-10,False


In [6]:
test

Unnamed: 0,x,y
4,8,True
5,5,True
6,-1000,False
7,-1,False


In [11]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import *
# TODO: 
# 1. train a LogisticRegression on training data
# 2. score the model on the test data

model = LogisticRegression()
model.fit(train[["x"]], train["y"])
model.score(test[["x"]], test["y"])

0.75

In [12]:
#accuracy_score(ACTUAL, PREDICTED)
accuracy_score(["A", "B"], ["A", "C"])

0.5

In [13]:
test["y"]

4     True
5     True
6    False
7    False
Name: y, dtype: bool

In [14]:
model.predict(test[["x"]])

array([ True, False, False, False])

In [16]:
accuracy_score(test["y"], model.predict(test[["x"]]))

0.75

# Confusion Matrices

In [18]:
actual =    ["dog", "dog", "dog", "dog", "cat", "cat", "cat", "mouse", "mouse"]
predicted = ["dog", "dog", "dog", "cat", "cat", "dog", "cat", "mouse", "mouse"]

In [22]:
# each row corresponds to the actual category,
# each col corresponds to the predicted category
labels = ["dog", "cat", "mouse", "horse"]
cm = confusion_matrix(actual, predicted, labels=labels)
cm

array([[3, 1, 0, 0],
       [1, 2, 0, 0],
       [0, 0, 2, 0],
       [0, 0, 0, 0]])

In [24]:
pd.DataFrame(cm, index=labels, columns=labels)

Unnamed: 0,dog,cat,mouse,horse
dog,3,1,0,0
cat,1,2,0,0
mouse,0,0,2,0
horse,0,0,0,0


In [27]:
labels = [False, True]
cm = confusion_matrix(test["y"], model.predict(test[["x"]]), labels=labels)
cm = pd.DataFrame(cm, index=labels, columns=labels)
cm

Unnamed: 0,False,True
False,2,0
True,1,1


In [31]:
print("True Positive", cm.at[True, True])
print("False Positive", cm.at[False, True])
print("False Negative", cm.at[True, False])
print("True Negative", cm.at[False, False])

True Positive 1
False Positive 0
False Negative 1
True Negative 2


# Multi-Class Metrics

In [33]:
actual =    ["dog", "dog", "dog", "dog", "cat", "cat", "cat", "cat", "mouse", "mouse"] * 199
predicted = ["dog", "dog", "dog", "dog", "cat", "dog", "cat", "dog", "mouse", "mouse"] * 199
actual += ["horse"] * 10
predicted += ["dog"] * 9 + ["horse"]

labels = ["dog", "cat", "mouse", "horse"]
cm = confusion_matrix(actual, predicted, labels=labels)
cm = pd.DataFrame(cm, index=labels, columns=labels)
cm

Unnamed: 0,dog,cat,mouse,horse
dog,796,0,0,0
cat,398,398,0,0
mouse,0,0,398,0
horse,9,0,0,1


In [37]:
cm.loc["cat"].sum() # cat row

796

In [38]:
# cat recall is 50%
cm.at["cat", "cat"] / cm.loc["cat"].sum()

0.5

In [39]:
cm.at["horse", "horse"] / cm.loc["horse"].sum()

0.1

In [42]:
labels

['dog', 'cat', 'mouse', 'horse']

In [43]:
recall_score(actual, predicted, average=None, labels=labels)

array([1. , 0.5, 1. , 0.1])

In [44]:
precision_score(actual, predicted, average=None, labels=labels)

array([0.66167914, 1.        , 1.        , 1.        ])

In [45]:
recall_score(actual, predicted, average=None, labels=labels).mean()

0.65

In [47]:
balanced_accuracy_score(actual, predicted)

0.65

In [46]:
accuracy_score(actual, predicted)

0.7965

# Binary Classification Metrics

Unless otherwise specified, "precision" and "recall" refer to those metrics for the positive class when we're doing binary classification.

In [50]:
actual = [False, True, True, True, True, False, False, True, True, True, True, True, True]
predicted = [False, True, True, True, True, True, True, False, False, False, True, True, True]
labels = [False, True]
cm = confusion_matrix(actual, predicted)
cm = pd.DataFrame(cm, index=labels, columns=labels)
cm

Unnamed: 0,False,True
False,1,2
True,3,7


In [52]:
recall_score(actual, predicted, labels=labels, average=None)

array([0.33333333, 0.7       ])

In [53]:
# unless otherwise specified, "recall" means "true recall"
recall_score(actual, predicted)

0.7