# Confusion Matrix

A confusion matrix is a table that is often used to describe the performance of a classification model (or "classifier") on a set of data for which the true values are known.

During this notebook, we will see how get the data from the confusion matrix.

In [26]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [27]:
# Calculate Accuracy of the model

def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

# Calculate Error of the model 

def error(y_true, y_pred):
    error = np.sum(y_true != y_pred) / len(y_true)
    return error


In [28]:
#Calculate Confusion Matrix of the two classes model

def confusion_matrix(y_true, y_pred):
    true_positive = np.sum((y_true == 1) & (y_pred == 1))
    true_negative = np.sum((y_true == 0) & (y_pred == 0))
    false_positive = np.sum((y_true == 0) & (y_pred == 1))
    false_negative = np.sum((y_true == 1) & (y_pred == 0))
    return np.array([[true_negative, false_positive], [false_negative, true_positive]])

# Calculate Precision of the model

def precision(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    precision = cm[1, 1] / (cm[1, 1] + cm[0, 1])
    return precision

# Calculate Recall of the model

def recall(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    recall = cm[1, 1] / (cm[1, 1] + cm[1, 0])
    return recall

#Prositive Predictive Value

def positive_predictive_value(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    ppv = cm[1, 1] / (cm[1, 1] + cm[0, 1])
    return ppv

#True Positive Rate

def true_positive_rate(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    tpr = cm[1, 1] / (cm[1, 1] + cm[1, 0])
    return tpr

#True Negative Rate

def true_negative_rate(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    tnr = cm[0, 0] / (cm[0, 0] + cm[0, 1])
    return tnr

#False Positive Rate

def false_positive_rate(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    fpr = cm[0, 1] / (cm[0, 1] + cm[0, 0])
    return fpr

#False Negative Rate

def false_negative_rate(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    fnr = cm[1, 0] / (cm[1, 0] + cm[1, 1])
    return fnr

#F1 Score

def f1_score(y_true, y_pred):
    precision_score = precision(y_true, y_pred)
    recall_score = recall(y_true, y_pred)
    f1_score = 2 * (precision_score * recall_score) / (precision_score + recall_score)
    return f1_score


In [None]:
# Test funcionality of the functions

#importing the dataset 
data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)
model = LogisticRegression(max_iter=200000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Testing the functions
print("---------- Testing the functions ----------")
print("Accuracy: ", accuracy(y_test, y_pred))
print("Error: ", error(y_test, y_pred))
print("Confusion Matrix: ", confusion_matrix(y_test, y_pred))
print("Precision: ", precision(y_test, y_pred))
print("Recall: ", recall(y_test, y_pred))
print("Positive Predictive Value: ", positive_predictive_value(y_test, y_pred))
print("True Positive Rate: ", true_positive_rate(y_test, y_pred))
print("True Negative Rate: ", true_negative_rate(y_test, y_pred))
print("False Positive Rate: ", false_positive_rate(y_test, y_pred))
print("False Negative Rate: ", false_negative_rate(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

# Importing the metrics from sklearn

import sklearn.metrics as metrics
print("---------- Testing sklearn.metrics functions ----------")
print("Accuracy: ", metrics.accuracy_score(y_test, y_pred))
print("Error: ", 1 - metrics.accuracy_score(y_test, y_pred))
print("Confusion Matrix: ", metrics.confusion_matrix(y_test, y_pred))
print("Precision: ", metrics.precision_score(y_test, y_pred))
print("Recall: ", metrics.recall_score(y_test, y_pred))
print("Positive Predictive Value: ", metrics.precision_score(y_test, y_pred))
print("True Positive Rate: ", metrics.recall_score(y_test, y_pred))
print("True Negative Rate: Not directly available in sklearn.metrics")
print("False Positive Rate: Not directly available in sklearn.metrics")
fnr = 1 - metrics.recall_score(y_test, y_pred)
print("False Negative Rate: ", fnr)
print("F1 Score: ", metrics.f1_score(y_test, y_pred))



---------- Testing the functions ----------
Accuracy:  0.9590643274853801
Error:  0.04093567251461988
Confusion Matrix:  [[ 62   1]
 [  6 102]]
Precision:  0.9902912621359223
Recall:  0.9444444444444444
Positive Predictive Value:  0.9902912621359223
True Positive Rate:  0.9444444444444444
True Negative Rate:  0.9841269841269841
False Positive Rate:  0.015873015873015872
False Negative Rate:  0.05555555555555555
F1 Score:  0.966824644549763
---------- Testing sklearn.metrics functions ----------
Accuracy:  0.9590643274853801
Error:  0.040935672514619936
Confusion Matrix:  [[ 62   1]
 [  6 102]]
Precision:  0.9902912621359223
Recall:  0.9444444444444444
Positive Predictive Value:  0.9902912621359223
True Positive Rate:  0.9444444444444444
True Negative Rate: Not directly available in sklearn.metrics
False Positive Rate: Not directly available in sklearn.metrics
False Negative Rate:  0.05555555555555558
F1 Score:  0.966824644549763
