# The Confusion Matrix

In [1]:
import pandas as pd
from sklearn.metrics import confusion_matrix

df = pd.DataFrame({
    'actual': ['coffee', 'no coffee', 'no coffee', 'coffee', 'coffee', 'coffee', 'no coffee', 'coffee'],
    'prediction': ['no coffee', 'no coffee', 'coffee', 'coffee', 'coffee', 'coffee', 'no coffee', 'no coffee'],
})
# this is a mock prediction model predicting whether a person likes coffee vs whether or not they actually like coffee

df

Unnamed: 0,actual,prediction
0,coffee,no coffee
1,no coffee,no coffee
2,no coffee,coffee
3,coffee,coffee
4,coffee,coffee
5,coffee,coffee
6,no coffee,no coffee
7,coffee,no coffee


In [2]:
pd.crosstab(df.actual, df.prediction)

# always do actual first, prediction second



prediction,coffee,no coffee
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
coffee,3,2
no coffee,1,2


In [12]:
## [0,0]: true positive, we predicted they like coffee, they actually like coffee
tp = 3

## [0,1]: false negative, we predicted they do not like coffee, they actually like coffee
fn = 2

## [1,0]: false positive, we predicted they like coffee, they actually do not like coffee
fp = 1

## [1,1]: true negative, we predicted they do not like coffee, they actually do not like coffee
tn= 2


In [13]:
confusion_matrix(df.actual, df.prediction,
                 labels = ('no coffee', 'coffee'))

## in this case the array is inversed from the previous array because the labels are backwards

## positive and negative is arbitrary and is chosen by the practitioner, but one value MUST be positive and the other negative


array([[2, 1],
       [2, 3]])

# Baseline

In [14]:
df.actual.value_counts()

coffee       5
no coffee    3
Name: actual, dtype: int64

In [15]:
df['baseline_prediction'] = 'coffee'

# since coffee is the most common, it should be used as the baseline

In [16]:
df

Unnamed: 0,actual,prediction,baseline_prediction
0,coffee,no coffee,coffee
1,no coffee,no coffee,coffee
2,no coffee,coffee,coffee
3,coffee,coffee,coffee
4,coffee,coffee,coffee
5,coffee,coffee,coffee
6,no coffee,no coffee,coffee
7,coffee,no coffee,coffee


# Common Evaluation Metrics

## Accuracy

In [17]:
accuracy = (tn + tp) / (tn + tp + fn + fp)

# total of correct predictions divided by amount of total predictions
## percentage of correct predictions

accuracy

0.625

## Precision

In [18]:
precision = (tp) / (tp + fp)

# total of true positive prections divided by amount of positive predictions
## percenage of how many positive predictions were actually correct

## to be used when the cost of acting on positive predictions is high
## false positives MUST be avoided
### example: spam folder

precision

0.75

## Recall

In [19]:
recall = (tp) / (tp + fn)

# total of true positive predictions divided by amount of actually positive occurances
## percentage of times model correctly predicted positively among all positives 

## to be used when avoiding a negative occurance is costly
## false negatives MUST be avoided
## example: fruad prevention

recall

0.6

# Evaluation

In [20]:
model_accuracy = (df.prediction == df.actual).mean()
baseline_accuracy = (df.baseline_prediction == df.actual).mean()

print(f'   model accuracy: {model_accuracy:.2%}')
print(f'baseline accuracy: {baseline_accuracy:.2%}')

   model accuracy: 62.50%
baseline accuracy: 62.50%


In [21]:
subset = df[df.actual == 'coffee']

model_recall = (subset.prediction == subset.actual).mean()
baseline_recall = (subset.baseline_prediction == subset.actual).mean()

print(f'   model recall: {model_recall:.2%}')
print(f'baseline recall: {baseline_recall:.2%}')

   model recall: 60.00%
baseline recall: 100.00%


In [22]:
subset = df[df.prediction == 'coffee']
model_precision = (subset.prediction == subset.actual).mean()

subset = df[df.baseline_prediction == 'coffee']
baseline_precision = (subset.baseline_prediction == subset.actual).mean()

print(f'model precision: {model_precision:.2%}')
print(f'baseline precision: {baseline_precision:.2%}')

model precision: 75.00%
baseline precision: 62.50%
