# Matriz de confusão

In [1]:
%load_ext autoreload
%autoreload 2

# Imports

In [2]:
from IPython.core.display import display, HTML

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn import metrics
from sklearn.preprocessing import binarize

from skanalytics.reporting.binary_confusion_matrix import BinaryConfusionMatrix

# Modelo de exemplo

In [3]:
X, y = datasets.make_classification(n_samples=100000, n_features=20,
                                    n_informative=2, n_redundant=2)

train_samples = 100  # Samples used for training the models

X_train = X[:train_samples]
X_test = X[train_samples:]
y_train = y[:train_samples]
y_test = y[train_samples:]

lr = LogisticRegression()
lr.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

# Criação de matriz de confusão

In [4]:
cm = BinaryConfusionMatrix(y_test, lr.predict(X_test))

## Padrão

In [5]:
HTML(cm.html())

0,1,2,3,4,5,6
,,True Condition,True Condition,,,
,"Total Population 99,900","Condition Positive (CP) 49,957","Condition Negative (CN) 49,943",Prevalence = ΣCP  Total  50.0%,Accuracy = ΣTP  + ΣTN  Total  94.3%,Accuracy = ΣTP  + ΣTN  Total  94.3%
Predicted Condition,"Predicted Condition Positive (PCP) 48,183","True Positive (TP)  46,201","False Positive (FP) Type I error  1,982",Positive Predictive Value (PPV) = ΣTP  ΣPCP  95.9%,False Discovery Rate (FDR) = ΣFP  ΣPCP  4.1%,False Discovery Rate (FDR) = ΣFP  ΣPCP  4.1%
Predicted Condition,"Predicted Condition Negative (PCN) 51,717","False Negative (FN) Type II error  3,756","True Negative (TN)  47,961",False Omission Rate (FOR) = ΣFN  ΣPCN  7.3%,Negative Predictive Value (NPV) = ΣTN  ΣPCN  92.7%,Negative Predictive Value (NPV) = ΣTN  ΣPCN  92.7%
,,True Positive Rate (TPR) = ΣTP  ΣCP  92.5%,False Positive Rate (FPR) = ΣFP  ΣCN  4.0%,Positive Likelihood Ratio (PLR) = ΣTPR  ΣFPR  23.304,Diagnostic Odds Ratio (DOR) = ΣPLR  ΣNLR  297.653,F1 score = 2  1  TPR  + 1  PPV  94.2%
,,False Negative Rate (FNR) = ΣFN  ΣCP  7.5%,True Negative Rate (TNR) = ΣTN  ΣCN  96.0%,Negative Likelihood Ratio (NLR) = FNR  TNR  0.078,Diagnostic Odds Ratio (DOR) = ΣPLR  ΣNLR  297.653,F1 score = 2  1  TPR  + 1  PPV  94.2%


## Sem rótulos

In [6]:
HTML(cm.html(label=False))

0,1,2,3,4,5,6
,,True Condition,True Condition,,,
,"Total 99,900","CP 49,957","CN 49,943",Prev = ΣCP  Total  50.0%,Acc = ΣTP  + ΣTN  Total  94.3%,Acc = ΣTP  + ΣTN  Total  94.3%
Predicted Condition,"PCP 48,183","TP 46,201","FP 1,982",PPV = ΣTP  ΣPCP  95.9%,FDR = ΣFP  ΣPCP  4.1%,FDR = ΣFP  ΣPCP  4.1%
Predicted Condition,"PCN 51,717","FN 3,756","TN 47,961",FOR = ΣFN  ΣPCN  7.3%,NPV = ΣTN  ΣPCN  92.7%,NPV = ΣTN  ΣPCN  92.7%
,,TPR = ΣTP  ΣCP  92.5%,FPR = ΣFP  ΣCN  4.0%,PLR = ΣTPR  ΣFPR  23.304,DOR = ΣPLR  ΣNLR  297.653,F1 = 2  1  TPR  + 1  PPV  94.2%
,,FNR = ΣFN  ΣCP  7.5%,TNR = ΣTN  ΣCN  96.0%,NLR = FNR  TNR  0.078,DOR = ΣPLR  ΣNLR  297.653,F1 = 2  1  TPR  + 1  PPV  94.2%


## Sem rótulos e sem legenda

In [7]:
HTML(cm.html(label=False, legend=False))

0,1,2,3,4,5,6
,,True Condition,True Condition,,,
,"Total 99,900","CP 49,957","CN 49,943",Prev = ΣCP  Total  50.0%,Acc = ΣTP  + ΣTN  Total  94.3%,Acc = ΣTP  + ΣTN  Total  94.3%
Predicted Condition,"PCP 48,183","TP 46,201","FP 1,982",PPV = ΣTP  ΣPCP  95.9%,FDR = ΣFP  ΣPCP  4.1%,FDR = ΣFP  ΣPCP  4.1%
Predicted Condition,"PCN 51,717","FN 3,756","TN 47,961",FOR = ΣFN  ΣPCN  7.3%,NPV = ΣTN  ΣPCN  92.7%,NPV = ΣTN  ΣPCN  92.7%
,,TPR = ΣTP  ΣCP  92.5%,FPR = ΣFP  ΣCN  4.0%,PLR = ΣTPR  ΣFPR  23.304,DOR = ΣPLR  ΣNLR  297.653,F1 = 2  1  TPR  + 1  PPV  94.2%
,,FNR = ΣFN  ΣCP  7.5%,TNR = ΣTN  ΣCN  96.0%,NLR = FNR  TNR  0.078,DOR = ΣPLR  ΣNLR  297.653,F1 = 2  1  TPR  + 1  PPV  94.2%


## Sem rótulos e equações

In [8]:
HTML(cm.html(label=False, equation=False))

0,1,2,3,4,5,6
,,True Condition,True Condition,,,
,"Total 99,900","CP 49,957","CN 49,943",Prev 50.0%,Acc 94.3%,Acc 94.3%
Predicted Condition,"PCP 48,183","TP 46,201","FP 1,982",PPV 95.9%,FDR 4.1%,FDR 4.1%
Predicted Condition,"PCN 51,717","FN 3,756","TN 47,961",FOR 7.3%,NPV 92.7%,NPV 92.7%
,,TPR 92.5%,FPR 4.0%,PLR 23.304,DOR 297.653,F1 94.2%
,,FNR 7.5%,TNR 96.0%,NLR 0.078,DOR 297.653,F1 94.2%


## Sem rótulos, sem equações e sem legenda

In [9]:
HTML(cm.html(label=False, equation=False, legend=False))

0,1,2,3,4,5,6
,,True Condition,True Condition,,,
,"Total 99,900","CP 49,957","CN 49,943",Prev 50.0%,Acc 94.3%,Acc 94.3%
Predicted Condition,"PCP 48,183","TP 46,201","FP 1,982",PPV 95.9%,FDR 4.1%,FDR 4.1%
Predicted Condition,"PCN 51,717","FN 3,756","TN 47,961",FOR 7.3%,NPV 92.7%,NPV 92.7%
,,TPR 92.5%,FPR 4.0%,PLR 23.304,DOR 297.653,F1 94.2%
,,FNR 7.5%,TNR 96.0%,NLR 0.078,DOR 297.653,F1 94.2%


## Mínimo possível

In [10]:
HTML(cm.html(label=False, acronym=False, equation=False, legend=False))

0,1,2,3,4,5,6
,,True Condition,True Condition,,,
,99900.0,49957,49943,50.0%,94.3%,94.3%
Predicted Condition,48183.0,46201,1982,95.9%,4.1%,4.1%
Predicted Condition,51717.0,3756,47961,7.3%,92.7%,92.7%
,,92.5%,4.0%,23.304,297.653,94.2%
,,7.5%,96.0%,0.078,297.653,94.2%


# Apenas a tabela, sem valores 

Útil para fazer consulta ou deixar os nomes separados.

In [11]:
HTML(cm.html(value=False))

0,1,2,3,4,5,6
,,True Condition,True Condition,,,
,Total Population,Condition Positive (CP),Condition Negative (CN),Prevalence = ΣCP  Total,Accuracy = ΣTP  + ΣTN  Total,Accuracy = ΣTP  + ΣTN  Total
Predicted Condition,Predicted Condition Positive (PCP),True Positive (TP),False Positive (FP) Type I error,Positive Predictive Value (PPV) = ΣTP  ΣPCP,False Discovery Rate (FDR) = ΣFP  ΣPCP,False Discovery Rate (FDR) = ΣFP  ΣPCP
Predicted Condition,Predicted Condition Negative (PCN),False Negative (FN) Type II error,True Negative (TN),False Omission Rate (FOR) = ΣFN  ΣPCN,Negative Predictive Value (NPV) = ΣTN  ΣPCN,Negative Predictive Value (NPV) = ΣTN  ΣPCN
,,True Positive Rate (TPR) = ΣTP  ΣCP,False Positive Rate (FPR) = ΣFP  ΣCN,Positive Likelihood Ratio (PLR) = ΣTPR  ΣFPR,Diagnostic Odds Ratio (DOR) = ΣPLR  ΣNLR,F1 score = 2  1  TPR  + 1  PPV
,,False Negative Rate (FNR) = ΣFN  ΣCP,True Negative Rate (TNR) = ΣTN  ΣCN,Negative Likelihood Ratio (NLR) = FNR  TNR,Diagnostic Odds Ratio (DOR) = ΣPLR  ΣNLR,F1 score = 2  1  TPR  + 1  PPV


# Customização dos rótulos

In [12]:
portuguese_labels = {
    'true_condition': 'Esperado',
    'predicted_condition': 'Predito',
    'positive_predictive_value': 'Precisão',
    'true_positive_rate': 'Recall',
    'true_positive': 'Verdadeiro Positivo',
    'false_positive': 'Falso Positivo',
    'false_negative': 'Falso Negativo',
    'true_negative': 'Verdadeiro Negativo',
    'true_negative_rate': 'Especificidade',
    'false_negative_rate': '<i>Miss Rate</i>',
    'accuracy': 'Acurácia',
    'total_population': 'Total',
    'condition_positive': 'Positivo',
    'condition_negative': 'Negativo',
    'predicted_condition_negative': 'Predito Negativo',
    'predicted_condition_positive': 'Predito Positivo',
    'false_positive_rate': 'Alarme Falso',
    'prevalence': 'Proporção de Positivos'
}

cm = BinaryConfusionMatrix(
    expected=y_test, 
    predicted=lr.predict(X_test),
    labels=portuguese_labels)

HTML(cm.html(
    label=True, acronym=False, legend=False, equation=False,
))

0,1,2,3,4,5,6
,,Esperado,Esperado,,,
,"Total 99,900","Positivo 49,957","Negativo 49,943",Proporção de Positivos 50.0%,Acurácia 94.3%,Acurácia 94.3%
Predito,"Predito Positivo 48,183","Verdadeiro Positivo 46,201","Falso Positivo 1,982",Precisão 95.9%,False Discovery Rate 4.1%,False Discovery Rate 4.1%
Predito,"Predito Negativo 51,717","Falso Negativo 3,756","Verdadeiro Negativo 47,961",False Omission Rate 7.3%,Negative Predictive Value 92.7%,Negative Predictive Value 92.7%
,,Recall 92.5%,Alarme Falso 4.0%,Positive Likelihood Ratio 23.304,Diagnostic Odds Ratio 297.653,F1 score 94.2%
,,Miss Rate 7.5%,Especificidade 96.0%,Negative Likelihood Ratio 0.078,Diagnostic Odds Ratio 297.653,F1 score 94.2%
