# Evaluation - Generate Results

The main evaluation matrics to report performance of the tested methods.

In [None]:
from copy import deepcopy
from glob import glob
import json
import math
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import (
    ConfusionMatrixDisplay,
    accuracy_score,
    auc,
    classification_report,
    multilabel_confusion_matrix,
    roc_auc_score,
    roc_curve,
)

%matplotlib inline

In [None]:
LABELS = {
    0: "Atelectasis",
    1: "Cardiomegaly",
    2: "Consolidation",
    3: "Edema",
    4: "Enlarged Cardiomediastinum",
    5: "Fracture",
    6: "Lung Lesion",
    7: "Lung Opacity",
    8: "No Finding",
    9: "Pleural Effusion",
    10: "Pleural Other",
    11: "Pneumonia",
    12: "Pneumothorax",
    13: "Support Devices",
}

THRESHOLD = 0.5

In [None]:
def multilabelConfussionMatrix(y_test, predictions):
    """
    Returns the TP, FP, TN, FN
    """
    TP = np.zeros(y_test.shape[1])
    FP = np.zeros(y_test.shape[1])
    TN = np.zeros(y_test.shape[1])
    FN = np.zeros(y_test.shape[1])

    for j in range(y_test.shape[1]):
        TPaux = 0
        FPaux = 0
        TNaux = 0
        FNaux = 0
        for i in range(y_test.shape[0]):
            if int(y_test[i,j]) == 1:
                if int(y_test[i,j]) == 1 and int(predictions[i,j]) == 1:
                    TPaux += 1
                else:
                    FPaux += 1
            else:
                if int(y_test[i,j]) == 0 and int(predictions[i,j]) == 0:
                    TNaux += 1
                else:
                    FNaux += 1
        TP[j] = TPaux
        FP[j] = FPaux
        TN[j] = TNaux
        FN[j] = FNaux

    return TP, FP, TN, FN

def multilabelMicroConfussionMatrix(TP, FP, TN, FN):
    TPMicro = 0.0
    FPMicro = 0.0
    TNMicro = 0.0
    FNMicro = 0.0
    
    for i in range(len(TP)):
        TPMicro = TPMicro + TP[i]
        FPMicro = FPMicro + FP[i]
        TNMicro = TNMicro + TN[i]
        FNMicro = FNMicro + FN[i]
    
    return TPMicro, FPMicro, TNMicro, FNMicro

In [None]:
def accuracy_per_class(y_test, predictions):
    accuracy = []
    for i in range(14):
        accuracy.append(
            accuracy_score(y_test[i], predictions[i])
        )
        
    return accuracy


def accuracyMacro(y_test, predictions):
    """
    Accuracy Macro of our model
    Params
    ======
    y_test : sparse or dense matrix (n_samples, n_labels)
        Matrix of labels used in the test phase
    predictions: sparse or dense matrix (n_samples, n_labels)
        Matrix of predicted labels given by our model
    Returns
    =======
    accuracymacro : float
        Accuracy Macro of our model
    """
    accuracymacro = 0.0
    TP, FP, TN, FN = multilabelConfussionMatrix(y_test, predictions)
    for i in range(len(TP)):
        accuracymacro += ((TP[i] + TN[i])/(TP[i] + FP[i] + TN[i] + FN[i]))
    
    accuracymacro = float(accuracymacro/len(TP))

    return accuracymacro


def precisionMacro(y_test, predictions):
    """
    Precision Macro of our model
    Params
    ======
    y_test : sparse or dense matrix (n_samples, n_labels)
        Matrix of labels used in the test phase
    predictions: sparse or dense matrix (n_samples, n_labels)
        Matrix of predicted labels given by our model
    Returns
    =======
    precisionmacro : float
        Precision macro of our model
    """
    precisionmacro = 0.0
    TP, FP, TN, FN = multilabelConfussionMatrix(y_test, predictions)
    for i in range(len(TP)):
        if TP[i] + FP[i] != 0:
            precisionmacro = precisionmacro + (TP[i]/(TP[i] + FP[i]))

    precisionmacro = float(precisionmacro/len(TP))
    return precisionmacro


def precisionMicro(y_test, predictions):
    """
    Precision Micro of our model
    Params
    ======
    y_test : sparse or dense matrix (n_samples, n_labels)
        Matrix of labels used in the test phase
    predictions: sparse or dense matrix (n_samples, n_labels)
        Matrix of predicted labels given by our model
    Returns
    =======
    precisionmicro : float
        Precision micro of our model
    """
    precisionmicro = 0.0
    TP, FP, TN, FN = multilabelConfussionMatrix(y_test, predictions)
    TPMicro, FPMicro, TNMicro, FNMicro =\
        multilabelMicroConfussionMatrix(TP, FP, TN, FN)
    if (TPMicro + FPMicro) != 0:
        precisionmicro = float(TPMicro/(TPMicro + FPMicro))


    return precisionmicro


def recallMacro(y_test, predictions):
    """
    Recall Macro of our model
    Params
    ======
    y_test : sparse or dense matrix (n_samples, n_labels)
        Matrix of labels used in the test phase
    predictions: sparse or dense matrix (n_samples, n_labels)
        Matrix of predicted labels given by our model
    Returns
    =======
    recallmacro : float
        Recall Macro of our model
    """
    recallmacro = 0.0
    TP, FP, TN, FN = multilabelConfussionMatrix(y_test, predictions)
    for i in range(len(TP)):
        if TP[i] + FN[i] != 0:
            recallmacro = recallmacro + (TP[i]/(TP[i] + FN[i]))

    recallmacro = recallmacro/len(TP)
    return recallmacro


def recallMicro(y_test, predictions):
    """
    Recall Micro of our model
    Params
    ======
    y_test : sparse or dense matrix (n_samples, n_labels)
        Matrix of labels used in the test phase
    predictions: sparse or dense matrix (n_samples, n_labels)
        Matrix of predicted labels given by our model
    Returns
    =======
    recallmicro : float
        Recall Micro of our model
    """
    recallmicro = 0.0
    TP, FP, TN, FN = multilabelConfussionMatrix(y_test, predictions)
    TPMicro, FPMicro, TNMicro, FNMicro =\
        multilabelMicroConfussionMatrix(TP, FP, TN, FN)

    if (TPMicro + FNMicro) != 0:
        recallmicro = float(TPMicro/(TPMicro + FNMicro))

    return recallmicro


def fbetaMacro(y_test, predictions, beta=1):
    """
    FBeta Macro of our model
    Params
    ======
    y_test : sparse or dense matrix (n_samples, n_labels)
        Matrix of labels used in the test phase
    predictions: sparse or dense matrix (n_samples, n_labels)
        Matrix of predicted labels given by our model
    Returns
    =======
    fbetamacro : float
        FBeta Macro of our model
    """
    fbetamacro = 0.0
    TP, FP, TN, FN = multilabelConfussionMatrix(y_test, predictions)

    for i in range(len(TP)):
        num = float((1+pow(beta,2))*TP[i])
        den = float((1+pow(beta,2))*TP[i] + pow(beta,2)*FN[i] + FP[i])
        if den != 0:
            fbetamacro = fbetamacro + num/den

    fbetamacro = fbetamacro/len(TP)
    return fbetamacro


def fbetaMicro(y_test, predictions, beta=1):
    """
    FBeta Micro of our model
    Params
    ======
    y_test : sparse or dense matrix (n_samples, n_labels)
        Matrix of labels used in the test phase
    predictions: sparse or dense matrix (n_samples, n_labels)
        Matrix of predicted labels given by our model
    Returns
    =======
    fbetamicro : float
        FBeta Micro of our model
    """
    fbetamicro = 0.0
    TP, FP, TN, FN = multilabelConfussionMatrix(y_test, predictions)
    TPMicro, FPMicro, TNMicro, FNMicro =\
        multilabelMicroConfussionMatrix(TP, FP, TN, FN)

    num = float((1+pow(beta,2))*TPMicro)
    den = float((1+pow(beta,2))*TPMicro + pow(beta,2)*FNMicro + FPMicro)
    fbetamicro = float(num/den)

    return fbetamicro

In [None]:
def calc_roc(ground_truth, predictions):
    # Micro.
    fpr, tpr, _ = roc_curve(ground_truth.ravel(), predictions.ravel())
    micro_roc_auc = auc(fpr, tpr)

    # Macro.
    macro_roc_auc = 0
    for i in range(14):
        fpr, tpr, _ = roc_curve(ground_truth[:, i], predictions[:, i])
        macro_roc_auc += (auc(fpr, tpr))

    macro_roc_auc /= 14

    return macro_roc_auc, micro_roc_auc
    

def print_stats(ground_truth, predictions):
    thresholded = (np.array(predictions) > THRESHOLD).astype(int)

    print("Accuracy:" + str(np.round(accuracyMacro(ground_truth, thresholded), 3)))
    print("Precision Macro:" + str(np.round(precisionMacro(ground_truth, thresholded), 3)))
    print("Precision Micro:" + str(np.round(precisionMicro(ground_truth, thresholded), 3)))
    print("Recall Macro:" + str(np.round(recallMacro(ground_truth, thresholded), 3)))
    print("Recall Micro:" + str(np.round(recallMicro(ground_truth, thresholded), 3)))
    print("F1 Macro:" + str(np.round(fbetaMacro(ground_truth, thresholded), 3)))
    print("F1 Micro:" + str(np.round(fbetaMicro(ground_truth, thresholded), 3)))

    macro_roc_auc, micro_roc_auc = calc_roc(ground_truth, np.array(predictions))
    print("AUROC Macro:" + str(np.round(np.average(macro_roc_auc), 3)))
    print("AUROC Micro:" + str(np.round(micro_roc_auc, 3)))

In [None]:
PREFIX = "../training"

for i in glob(os.path.join(PREFIX_A, "*", "*", "reports", "*.json")):
    print(i.split("/")[-4])
    print("-" * 80)

    with open(i, "r") as f:
        results = json.load(f)
        
    predictions = [i['probabilities'] for i in results]
    ground_truth = np.array([i['ground_truth'] for i in results])

    print_stats(ground_truth, predictions)

    print("=" * 80)