# Results Analysis

## Utils link

- [Multi Label Model Evaluation](https://www.kaggle.com/code/kmkarakaya/multi-label-model-evaluation)

## Code prepare

### Import Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.metrics import roc_auc_score, multilabel_confusion_matrix, classification_report, accuracy_score, jaccard_score, f1_score
import os
import warnings; warnings.filterwarnings('ignore')
from IPython.display import display
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sys import path
import ast
from sklearn.preprocessing import MultiLabelBinarizer
path.append("./code/")
from utils.utils import compute_label_aggregations

In [None]:
plt.style.use([s for s in plt.style.available if 'whitegrid' in s][0])
plt.rcParams['figure.figsize'] = [16, 9]
plt.rcParams['figure.dpi'] = 100

### Utils functions

In [None]:
def list_folders(path):
    # Check if the path exists
    if not os.path.exists(path):
        return "The specified path does not exist."

    # Get a list of all items in the path
    contents = os.listdir(path)

    # Filter only the folders
    folders = [item for item in contents if os.path.isdir(os.path.join(path, item))]

    return folders

def experiments(name:str):
    experiments = [
        ('exp0', 'all'),
        ('exp1', 'diagnostic'),
        ('exp1.1', 'subdiagnostic'),
        ('exp1.1.1', 'superdiagnostic'),
        ('exp2', 'form'),
        ('exp3', 'rhythm')
       ]
    for exp in experiments:
        if name == exp[0]:
            return exp[1]

### Variables

In [None]:
experiments = {
        'exp0': 'all',
        'exp1': 'diagnostic',
        'exp1.1': 'subdiagnostic',
        'exp1.1.1': 'superdiagnostic',
        'exp2': 'form',
        'exp3': 'rhythm'
       }

datas = {
"test": None, 
"train": None, 
"val": None
}

predictions = {
"test": None, 
"train": None, 
"val": None
}

path_out = "./output/"

threshold = 0.5

## Analysis

In [None]:
for exp in list_folders(path_out):
    print("_"*100)
    print(exp)
    for dt in datas:
        datas[dt] = np.load(f'{path_out}/{exp}/data/y_{dt}.npy', allow_pickle=True)
    for model in list_folders(f'{path_out}/{exp}/models'):
        print(f'\n\t{model}')
        for pred in predictions:
            predictions[pred] = np.load(f'{path_out}/{exp}/models/{model}/y_{pred}_pred.npy', allow_pickle=True)
            print(f'\t\t{pred}')
            print(f'\t\t\t- Shape: {predictions[pred].shape}')
            print(f'\t\t\t- Binary values: {len(np.unique(predictions[pred]))==2}')
            print(f'\t\t\t- Min value: {np.min(predictions[pred])}')
            print(f'\t\t\t- Max value: {np.max(predictions[pred])}')
            print(f'\t\t\t- Range 0-1: {(np.min(predictions[pred]) >= 0) and (np.max(predictions[pred]) <= 1)}')
            


    print("\n\n")

## Metrics

In [None]:
results = []

res = {}

for exp in sorted(list_folders(path_out)):
    res[exp] = {}
    for dt in datas:
        datas[dt] = np.load(f'{path_out}/{exp}/data/y_{dt}.npy', allow_pickle=True)
    res[exp]["data"] = datas.copy()
    for model in list_folders(f'{path_out}/{exp}/models'):
        for pred in predictions:
            predictions[pred] = np.load(f'{path_out}/{exp}/models/{model}/y_{pred}_pred.npy', allow_pickle=True)
            res[exp][model]=predictions.copy()

            # AUC score
            results.append({
                "experiment" : experiments[exp],
                "model" : model,
                "metric" : "AUC",
                "set": pred,
                "value": roc_auc_score(datas[pred], predictions[pred], average="weighted")
            })

            # Accuracy score
            results.append({
                "experiment" : experiments[exp],
                "model" : model,
                "metric" : "Accuracy",
                "set": pred,
                "value": accuracy_score(datas[pred], (predictions[pred]>threshold))
            })

            # Jaccard score
            results.append({
                "experiment" : experiments[exp],
                "model" : model,
                "metric" : "Jaccard",
                "set": pred,
                "value": jaccard_score(datas[pred], (predictions[pred]>threshold), average="weighted")
            })

            # F1-score
            results.append({
                "experiment" : experiments[exp],
                "model" : model,
                "metric" : "F1",
                "set": pred,
                "value": f1_score(datas[pred], (predictions[pred]>threshold), average="weighted")
            })

            # Precision score
            results.append({
                "experiment" : experiments[exp],
                "model" : model,
                "metric" : "Precision",
                "set": pred,
                "value": metrics.precision_score(datas[pred], (predictions[pred]>threshold), average="weighted")
            })

            # Recall score
            results.append({
                "experiment" : experiments[exp],
                "model" : model,
                "metric" : "Recall",
                "set": pred,
                "value": metrics.recall_score(datas[pred], (predictions[pred]>threshold), average="weighted")
            })

results = pd.DataFrame(results)
results

In [None]:
results[(results["experiment"]=="all") & (results["metric"]=="AUC") & (results["set"]=="test")]

In [None]:
results[(results["experiment"]=="diagnostic") & (results["metric"]=="Accuracy") & (results["set"]=="test")]

## Baseline MI vs NORM

In [None]:
agg_df = pd.read_csv(f'{path_out}/../data/ptbxl/scp_statements.csv', index_col=0)
print(agg_df.shape)
agg_df.head()

In [None]:
agg_df[agg_df.diagnostic_class == "MI"]

In [None]:
agg_df[agg_df.diagnostic_class == "NORM"]

### EXP 0

Prepare columns

In [None]:
mlb = MultiLabelBinarizer()

Y: pd.DataFrame = pd.read_csv('./data/ptbxl/ptbxl_database.csv', index_col='ecg_id')
Y['scp_codes'] = Y['scp_codes'].apply(lambda x: ast.literal_eval(x))

Y_clean: pd.DataFrame = compute_label_aggregations(Y, "./data/ptbxl/", 'all')

counts = pd.Series(np.concatenate(Y_clean['all_scp'].values)).value_counts()
Y_clean['all_scp'] = Y_clean['all_scp'].apply(lambda x: list(set(x).intersection(set(counts.index.values))))
Y_clean['all_scp_len'] = Y_clean['all_scp'].apply(lambda x: len(x))

# select
Y = Y_clean[Y_clean['all_scp_len'] > 0]
mlb.fit(Y['all_scp'].values)
y = mlb.transform(Y['all_scp'].values)

diagnosis_complete = {
    "MI" : None,
    "NORM" : None
}

for dia in diagnosis_complete:
    diagnosis_complete[dia] = {el: np.where(mlb.classes_ == el)[0][0] for el in agg_df[agg_df.diagnostic_class == dia].index}

Experiments

In [None]:
baseline = {}
exp = "exp0"

# find diagnostic index
diagnosis = {}
dia_glob = []
for dia in diagnosis_complete:
    diagnosis[dia] = list(diagnosis_complete[dia].values())
    dia_glob.extend(diagnosis[dia])

dia_glob = sorted(dia_glob)
for dia in diagnosis:
    diagnosis[dia] = np.array([dia_glob.index(idx) for idx in diagnosis[dia]], dtype=np.int8)

# prepare baseline
for model in res[exp]:
    baseline[model] = {}
    for set in res[exp][model]:
        baseline[model][set] = res[exp][model][set][:, dia_glob]

# prepare target
y = baseline.pop("data")
for set in y:
    tmp = []
    for dia in diagnosis:
        tmp.append(np.any(y[set][:, diagnosis[dia]], axis=1).astype(int))
    y[set] = np.transpose(np.array(tmp))


# find utils rows
utils = {}
for set in y:
    utils[set] = np.sum(y[set], axis=1) == 1

# prepare prediction values
for model in baseline:
    for set in baseline[model]:
        baseline[model][set] = np.where(np.isin(np.argmax(baseline[model][set], axis = 1), diagnosis["MI"]), 1, 0)

# keep only utils rows in predictions
for model in baseline:
    for set in baseline[model]:
        baseline[model][set] = baseline[model][set][utils[set]]

# keep only utils rows in target & prepare it
for set in y:
    y[set] = y[set][utils[set]]
    y[set] = y[set][:, 0]

In [None]:
counts = {'Dataset': [], 'NORM': [], 'MI': []}

for dataset, values in y.items():
    norm_count = sum(values == 0)
    mi_count = sum(values == 1)
    counts['Dataset'].append(dataset)
    counts['NORM'].append(norm_count)
    counts['MI'].append(mi_count)

df = pd.DataFrame(counts)
df.set_index('Dataset', inplace=True)

df

In [None]:
df.plot(kind='bar', stacked=False, figsize=(10, 5), title='Class Distribution in the Datasets')
plt.show()

In [None]:
base_res = []

for model in baseline:
    for set in baseline[model]:

        # AUC score
        base_res.append({
            "model" : model,
            "metric" : "AUC",
            "set": set,
            "value": roc_auc_score(y[set], baseline[model][set], average="weighted")
        })

        # Accuracy score
        base_res.append({
            "model" : model,
            "metric" : "Accuracy",
            "set": set,
            "value": accuracy_score(y[set], baseline[model][set])
        })

        # Jaccard score
        base_res.append({
            "model" : model,
            "metric" : "Jaccard",
            "set": set,
            "value": jaccard_score(y[set], baseline[model][set], average="weighted")
        })

        # F1-score
        base_res.append({
            "model" : model,
            "metric" : "F1",
            "set": set,
            "value": f1_score(y[set], baseline[model][set], average="weighted")
        })

        # Precision score
        base_res.append({
            "model" : model,
            "metric" : "Precision",
            "set": set,
            "value": metrics.precision_score(y[set], baseline[model][set], average="weighted")
        })

        # Recall score
        base_res.append({
            "model" : model,
            "metric" : "Recall",
            "set": set,
            "value": metrics.recall_score(y[set], baseline[model][set], average="weighted")
        })

base_res = pd.DataFrame(base_res)
base_res

In [None]:
base_res.metric.unique()

In [None]:
base_res[(base_res["metric"]=="Recall") & (base_res["set"]=="test")].sort_values(by="model", ascending=False)[["model", "value"]]

### Other experiments

In [None]:
set =  "test"
model = "data"

for exp in res:
    print(f'labels for {exp} - {res[exp][model][set].shape[0]}')