# Evaluation - Class Statistics

Some helper methods to visualise the predictions and calculate per-class
statistics.

In [None]:
from glob import glob
import json
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import (
    ConfusionMatrixDisplay,
    classification_report,
    multilabel_confusion_matrix,
    roc_auc_score,
)

%matplotlib inline

In [None]:
LABELS = {
    0: "Atelectasis",
    1: "Cardiomegaly",
    2: "Consolidation",
    3: "Edema",
    4: "Enlarged Cardiomediastinum",
    5: "Fracture",
    6: "Lung Lesion",
    7: "Lung Opacity",
    8: "No Finding",
    9: "Pleural Effusion",
    10: "Pleural Other",
    11: "Pneumonia",
    12: "Pneumothorax",
    13: "Support Devices",
}

THRESHOLD = 0.5

### Statistics

In [None]:
def print_confusion_matrix(confusion_matrix, axes, class_label,
                           class_names, fontsize=14):

    df_cm = pd.DataFrame(
        confusion_matrix, index=class_names, columns=class_names,
    )

    heatmap = sns.heatmap(df_cm, annot=True, fmt="d", cbar=False, ax=axes)
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0,
                                 ha='right', fontsize=fontsize)
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45,
                                 ha='right', fontsize=fontsize)

    axes.set_xlabel("Prediction")
    axes.set_ylabel("Truth")
    axes.set_title(class_label)

In [None]:
def return_statistics(results):
    predictions = [(
        np.array(i['probabilities']) > THRESHOLD).astype(int) for i in results]
    ground_truth = [i['ground_truth'] for i in results]
    probabilities = [i['probabilities'] for i in results]
    
    # Confusion matrix.
    cm_image = multilabel_confusion_matrix(ground_truth, predictions)

    fig, ax = plt.subplots(4, 4, figsize=(12, 7))

    for axes, cfs_matrix, label in zip(ax.flatten(), cm_image, LABELS.values()):
        print_confusion_matrix(cfs_matrix, axes, label, ["N", "Y"])

    fig.tight_layout()
    plt.show()
    
    # Classification report.
    print("Classification Report")
    print("-" * 80)
    print(classification_report(ground_truth, predictions,
                                target_names=LABELS.values()))

    # AUROC.
    print("Area Under an ROC Curve")
    print("-" * 80)

    roc = 0

    for i, j in enumerate(zip(np.array(ground_truth).T,
                              np.array(probabilities).T)):
        _roc = roc_auc_score(j[0], j[1])
        roc += _roc
        print(LABELS[i], "%.4f" % _roc)

    print("Average:", "%.4f" % float(roc / 14))

### Results

In [None]:
PREFIX = "../training"

for i in glob(os.path.join(PREFIX, "*training*",
                           "mimic*", "reports", "*json")):
    print("*" * 80)
    print(i.split("/")[7])
    print("*" * 80)
    
    with open(i, "r") as f:
        results = json.load(f)

    return_statistics(results)