### Lab 08: The Confusion Matrix

By definition a confusion matrix is such that 
$C_{i,j}$ is equal to the number of observations known to be in group $i$ and predicted to be in group $j$. Thus in binary classification, where 0=negative, and 1=positive, we have:

$C_{0,0}$ are the True Negatives (TN)

$C_{1,1}$ are the True Positives (TP)

$C_{0,1}$ are the False Positives (FP)

$C_{1,0}$ are the False Negatives (FN)


In [1]:
# Your own confusion matrix calculation
import numpy as np

def confusion_matrix(y_true, y_pred):
    """ Generate a confusion matrix.
    y = actual outcomes (0, 1, 2, ...)
    y_pred = predicted outcomes (0, 1, 2, ...)
    return confusion matrix as a numpy array
    """
    
    # Find unique identifiers
    unique_classes = set(y_true) | set(y_pred)
    n_classes = len(unique_classes)
    
    # Create matrix (all zeros)
    matrix = np.zeros(shape=(n_classes, n_classes), dtype=int)
    
    # Pair up each actual outcome with the corresponding prediction
    actual_prediction = list(zip(y_true, y_pred))
    
    # For each pair, increment the correct position in the matrix
    for i,j in actual_prediction:
        matrix[i,j] += 1
        
    return matrix

In [2]:
# perfect prediction
y_true = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
y_pred = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
print("\nPerfect Accuracy")
print(confusion_matrix(y_true, y_pred))

# Note we are sticking to the scikit convention that ROWS=ACTUAL, COLUMNS=PREDICTIONS
# Let's introduce a single false positive (One healthy patient predicted to be sick.)
y_true = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
y_pred = [0, 0, 1, 0, 0, 0, 1, 1, 1, 1]
print("\nOne False Positive")
print(confusion_matrix(y_true, y_pred))

# And 3/4 of our sick patients are mistakenly treated as healthy!
y_true = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
y_pred = [0, 0, 1, 0, 0, 0, 1, 0, 0, 0]
print("\nAnd Three False negatives:")
print(confusion_matrix(y_true, y_pred))

# It works for multiple class labels too
# but now True/False Positives/Negatives doesn't apply

print("\nFOUR CLASSES: ")
y_true = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3]
y_pred = [0, 0, 2, 0, 0, 0, 1, 3, 3, 2, 2, 2, 2, 2, 2, 1, 2, 2, 0]
C = confusion_matrix(y_true, y_pred)
print(C)
C.diagonal().sum() / C.sum()


Perfect Accuracy
[[6 0]
 [0 4]]

One False Positive
[[5 1]
 [0 4]]

And Three False negatives:
[[5 1]
 [3 1]]

FOUR CLASSES: 
[[5 0 1 0]
 [0 1 1 2]
 [0 0 5 0]
 [1 1 2 0]]


0.5789473684210527

In [3]:
def metrics(y_true, y_pred, places=4):
    """ Generate accuracy scores for classifier.
    Round each score to <places> decimal places """
    
    scores = {}
    C = confusion_matrix(y_true, y_pred)
    
    
    scores['accuracy'] = C.diagonal().sum() / C.sum()
    
    # Implement scores for binary classification
    # Here is a start....
    
    if C.shape == (2,2):
        TN, FP, FN, TP = C.ravel() #  ravel flattens the array row by row
        scores['sensitivity'] = TP / (TP + FN)
    else:
        pass # do later
        
    return scores  
    

In [4]:
y_true = [0,0,0,0,1,1,1]
y_pred = [1,0,0,0,0,0,1]

scores = metrics(y_true, y_pred)
scores

{'accuracy': 0.5714285714285714, 'sensitivity': 0.3333333333333333}

In [5]:
# Import the advertising dataset and describe

import pandas as pd
adv = pd.read_csv('Advertising.csv')
adv = adv.iloc[:, 1:]
adv.describe()


Unnamed: 0,TV,radio,newspaper,sales
count,200.0,200.0,200.0,200.0
mean,147.0425,23.264,30.554,14.0225
std,85.854236,14.846809,21.778621,5.217457
min,0.7,0.0,0.3,1.6
25%,74.375,9.975,12.75,10.375
50%,149.75,22.9,25.75,12.9
75%,218.825,36.525,45.1,17.4
max,296.4,49.6,114.0,27.0


In [6]:
# Set a cutoff for HIGH sales
adv.sales = (adv.sales>=14).astype(int)



In [7]:
from sklearn import linear_model as lm

def perceptron_scikit(data):
    """ Run the scikit-learn Perceptron model on the data """

    X = data[data.columns[:-1]]
    y = data.iloc[:, -1]
    clf = lm.Perceptron()
    clf.fit(X,y)
    w = list(clf.intercept_) + list(clf.coef_[0])
    return w, clf.predict(X)



In [8]:
w, sales_pred = perceptron_scikit(adv)

In [9]:
confusion_matrix(adv.sales, sales_pred)

array([[50, 60],
       [ 8, 82]])

In [10]:
# How does perceptron do on this data?
metrics(adv.sales, sales_pred)

{'accuracy': 0.66, 'sensitivity': 0.9111111111111111}

In [11]:
# What if we always predict LOW sales?
# Accuracy drops to the frequency of low sales and sensitivity is 0%

metrics(adv.sales, [0]*len(adv.sales))

{'accuracy': 0.55, 'sensitivity': 0.0}

In [12]:
# What if we always predict HIGH sales?
# Accuracy drops to the frequency of high sales and sensitivity is 100%!

metrics(adv.sales, [1]*len(adv.sales))

{'accuracy': 0.45, 'sensitivity': 1.0}