# Results Analysis

## Utils link

- [Multi Label Model Evaluation](https://www.kaggle.com/code/kmkarakaya/multi-label-model-evaluation)

## Code prepare

### Import Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.metrics import roc_auc_score, multilabel_confusion_matrix, classification_report, accuracy_score, jaccard_score, f1_score
import os
import warnings; warnings.filterwarnings('ignore')
from IPython.display import display
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.style.use([s for s in plt.style.available if 'whitegrid' in s][0])
plt.rcParams['figure.figsize'] = [16, 9]
plt.rcParams['figure.dpi'] = 100

### Utils functions

In [None]:
def list_folders(path):
    # Check if the path exists
    if not os.path.exists(path):
        return "The specified path does not exist."

    # Get a list of all items in the path
    contents = os.listdir(path)

    # Filter only the folders
    folders = [item for item in contents if os.path.isdir(os.path.join(path, item))]

    return folders

def experiments(name:str):
    experiments = [
        ('exp0', 'all'),
        ('exp1', 'diagnostic'),
        ('exp1.1', 'subdiagnostic'),
        ('exp1.1.1', 'superdiagnostic'),
        ('exp2', 'form'),
        ('exp3', 'rhythm')
       ]
    for exp in experiments:
        if name == exp[0]:
            return exp[1]

### Variables

In [None]:
experiments = {
        'exp0': 'all',
        'exp1': 'diagnostic',
        'exp1.1': 'subdiagnostic',
        'exp1.1.1': 'superdiagnostic',
        'exp2': 'form',
        'exp3': 'rhythm'
       }

datas = {
"test": None, 
"train": None, 
"val": None
}

predictions = {
"test": None, 
"train": None, 
"val": None
}

path = "./output/"

threshold = 0.5

## Analysis

In [None]:
for exp in list_folders(path):
    print("_"*100)
    print(exp)
    for dt in datas:
        datas[dt] = np.load(f'{path}/{exp}/data/y_{dt}.npy', allow_pickle=True)
    for model in list_folders(f'{path}/{exp}/models'):
        print(f'\n\t{model}')
        for pred in predictions:
            predictions[pred] = np.load(f'{path}/{exp}/models/{model}/y_{pred}_pred.npy', allow_pickle=True)
            print(f'\t\t{pred}')
            print(f'\t\t\t- Shape: {predictions[pred].shape}')
            print(f'\t\t\t- Binary values: {len(np.unique(predictions[pred]))==2}')
            print(f'\t\t\t- Min value: {np.min(predictions[pred])}')
            print(f'\t\t\t- Max value: {np.max(predictions[pred])}')
            print(f'\t\t\t- Range 0-1: {(np.min(predictions[pred]) >= 0) and (np.max(predictions[pred]) <= 1)}')
            


    print("\n\n")

## Metrics

In [None]:
results = []

res = {}

for exp in sorted(list_folders(path)):
    res[exp] = {}
    for dt in datas:
        datas[dt] = np.load(f'{path}/{exp}/data/y_{dt}.npy', allow_pickle=True)
    res[exp]["data"] = datas.copy()
    for model in list_folders(f'{path}/{exp}/models'):
        for pred in predictions:
            predictions[pred] = np.load(f'{path}/{exp}/models/{model}/y_{pred}_pred.npy', allow_pickle=True)
            res[exp][model]=predictions.copy()

            # AUC score
            results.append({
                "experiment" : experiments[exp],
                "model" : model,
                "metric" : "AUC",
                "set": pred,
                "value": roc_auc_score(datas[pred], predictions[pred], average="weighted")
            })

            # Accuracy score
            results.append({
                "experiment" : experiments[exp],
                "model" : model,
                "metric" : "Accuracy",
                "set": pred,
                "value": accuracy_score(datas[pred], (predictions[pred]>threshold))
            })

            # Jaccard score
            results.append({
                "experiment" : experiments[exp],
                "model" : model,
                "metric" : "Jaccard",
                "set": pred,
                "value": jaccard_score(datas[pred], (predictions[pred]>threshold), average="weighted")
            })

            # F1-score
            results.append({
                "experiment" : experiments[exp],
                "model" : model,
                "metric" : "F1",
                "set": pred,
                "value": f1_score(datas[pred], (predictions[pred]>threshold), average="weighted")
            })

            # Precision score
            results.append({
                "experiment" : experiments[exp],
                "model" : model,
                "metric" : "Precision",
                "set": pred,
                "value": metrics.precision_score(datas[pred], (predictions[pred]>threshold), average="weighted")
            })

            # Recall score
            results.append({
                "experiment" : experiments[exp],
                "model" : model,
                "metric" : "Recall",
                "set": pred,
                "value": metrics.recall_score(datas[pred], (predictions[pred]>threshold), average="weighted")
            })

results = pd.DataFrame(results)
results

In [None]:
results[(results["experiment"]=="all") & (results["metric"]=="AUC") & (results["set"]=="test")]

In [None]:
results[(results["experiment"]=="diagnostic") & (results["metric"]=="Accuracy") & (results["set"]=="test")]

## Baseline MI vs NORM

### EXP 0

In [None]:
agg_df = pd.read_csv(f'{path}../data/ptbxl/scp_statements.csv')
print(agg_df.shape)
agg_df.head()

In [None]:
agg_df[agg_df.diagnostic_class == "MI"]

In [None]:
exp = "exp0"
model = "data"
y = []
for set in res[exp][model]:
    y.append(np.sum(res[exp][model][set], axis = 0))
    #print(y[set].shape)
y = np.array(y, dtype=int)
y = np.sum(y, axis = 0)
y

In [None]:
def elementi_diversi(array):
    # Crea una lista vuota per tenere traccia degli elementi già visti
    elementi_visti = []

    # Itera attraverso gli elementi dell'array
    for elemento in array:
        # Se l'elemento è già presente nella lista, restituisci False
        if elemento in elementi_visti:
            print(f"Elemento duplicato: {elemento}")
            return False
        # Aggiungi l'elemento alla lista degli elementi visti
        elementi_visti.append(elemento)

    # Se il ciclo è completo senza restituire False, tutti gli elementi sono diversi
    return True
    

elementi_diversi(y)

In [None]:
baseline = {}
diagnosis = {
    "MI": None,
    "NORM": None
    }
exp = "exp0"
model = "data"

# find diagnostic index
dia_glob = []
for dia in diagnosis:
    diagnosis[dia] = agg_df.diagnostic_class == dia
    diagnosis[dia] = np.where(diagnosis[dia])[0]
    dia_glob.extend(diagnosis[dia])

dia_glob = sorted(dia_glob)
for dia in diagnosis:
    diagnosis[dia] = [dia_glob.index(idx) for idx in diagnosis[dia]]



y = {}
for set in res[exp][model]:
    y[set] = {}
    for dia in diagnosis:
        y[set][dia] = np.any((res[exp][model][set][:, agg_df.diagnostic_class == dia]), axis=1).astype(int)
        print(f'{set} - {dia} {np.sum(y[set][dia])}/{len(y[set][dia])}')

y

In [None]:
res[exp][model][set]

In [None]:
baseline = {}
diagnosis = {
    "MI": None,
    "NORM": None
    }
exp = "exp0"
model = "data"

# find diagnostic index
dia_glob = []
for dia in diagnosis:
    diagnosis[dia] = agg_df.diagnostic_class == dia
    diagnosis[dia] = np.where(diagnosis[dia])[0]
    dia_glob.extend(diagnosis[dia])

dia_glob = sorted(dia_glob)
for dia in diagnosis:
    diagnosis[dia] = [dia_glob.index(idx) for idx in diagnosis[dia]]

# prepare baseline
for model in res[exp]:
    baseline[model] = {}
    for set in res[exp][model]:
        baseline[model][set] = res[exp][model][set][:, (agg_df.diagnostic_class.isin(diagnosis.keys()))]

# prepare target
y = baseline.pop("data")
for set in y:
    tmp = []
    for dia in diagnosis:
        tmp.append(np.any(y[set][:, diagnosis[dia]], axis=1).astype(int))
    y[set] = np.transpose(np.array(tmp))

In [None]:


model = "data"
y = {}
# prepare baseline
for set in res[exp][model]:
        y[set] = []
        for dia in diagnosis:
            y[set].append(np.any(res[exp][model][set][:, diagnosis[dia]], axis=1).astype(int))
        y[set] = np.transpose(np.array(y[set]))
# first count of elements
counts = {'Dataset': [], 'NORM': [], 'MI': []}

for dataset, values in y.items():
    mi_count = np.sum(values[:, 0])
    norm_count = np.sum(values[:, 1])
    counts['Dataset'].append(dataset)
    counts['NORM'].append(norm_count)
    counts['MI'].append(mi_count)

df2 = pd.DataFrame(counts)

df2.set_index('Dataset', inplace=True)
df2

In [None]:
df2

In [None]:
baseline = {}
diagnosis = {
    "MI": None,
    "NORM": None
    }
exp = "exp0"

# find diagnostic index
dia_glob = []
for dia in diagnosis:
    diagnosis[dia] = agg_df.diagnostic_class == dia
    diagnosis[dia] = np.where(diagnosis[dia])[0]
    dia_glob.extend(diagnosis[dia])

dia_glob = sorted(dia_glob)
for dia in diagnosis:
    diagnosis[dia] = [dia_glob.index(idx) for idx in diagnosis[dia]]

# prepare baseline
for model in res[exp]:
    baseline[model] = {}
    for set in res[exp][model]:
        baseline[model][set] = res[exp][model][set][:, (agg_df.diagnostic_class.isin(diagnosis.keys()))]

# prepare target
y = baseline.pop("data")
for set in y:
    tmp = []
    for dia in diagnosis:
        tmp.append(np.any(y[set][:, diagnosis[dia]], axis=1).astype(int))
    y[set] = np.transpose(np.array(tmp))


# find utils rows
utils = {}
for set in y:
    utils[set] = np.sum(y[set], axis=1) == 1

# prepare prediction values
for model in baseline:
    for set in baseline[model]:
        baseline[model][set] = np.where(np.isin(np.argmax(baseline[model][set], axis = 1), diagnosis["MI"]), 1, 0)

# keep only utils rows in predictions
for model in baseline:
    for set in baseline[model]:
        baseline[model][set] = baseline[model][set][utils[set]]

# keep only utils rows in target & prepare it
for set in y:
    y[set] = y[set][utils[set]]
    y[set] = y[set][:, 0]

In [None]:
model = "naive"
for set in baseline[model]:
    print(f'{set} - {np.sum(baseline[model][set])} / {len(baseline[model][set])}')

In [None]:
for set in y:
    print(f'{set} - {np.sum(y[set])} / {len(y[set])}')

In [None]:
base_res = []

for model in baseline:
    for set in baseline[model]:

        # AUC score
        base_res.append({
            "model" : model,
            "metric" : "AUC",
            "set": set,
            "value": roc_auc_score(y[set], baseline[model][set], average="weighted")
        })

        # Accuracy score
        base_res.append({
            "model" : model,
            "metric" : "Accuracy",
            "set": set,
            "value": accuracy_score(y[set], baseline[model][set])
        })

        # Jaccard score
        base_res.append({
            "model" : model,
            "metric" : "Jaccard",
            "set": set,
            "value": jaccard_score(y[set], baseline[model][set], average="weighted")
        })

        # F1-score
        base_res.append({
            "model" : model,
            "metric" : "F1",
            "set": set,
            "value": f1_score(y[set], baseline[model][set], average="weighted")
        })

        # Precision score
        base_res.append({
            "model" : model,
            "metric" : "Precision",
            "set": set,
            "value": metrics.precision_score(y[set], baseline[model][set], average="weighted")
        })

        # Recall score
        base_res.append({
            "model" : model,
            "metric" : "Recall",
            "set": set,
            "value": metrics.recall_score(y[set], baseline[model][set], average="weighted")
        })

base_res = pd.DataFrame(base_res)
base_res

In [None]:
counts = {'Dataset': [], 'NORM': [], 'MI': []}

for dataset, values in y.items():
    norm_count = sum(values == 0)
    mi_count = sum(values == 1)
    counts['Dataset'].append(dataset)
    counts['NORM'].append(norm_count)
    counts['MI'].append(mi_count)

df = pd.DataFrame(counts)

df.set_index('Dataset', inplace=True)

df

In [None]:
np.sum(df2)

In [None]:
df.plot(kind='bar', stacked=False, figsize=(10, 5), title='Class Distribution in the Datasets')

### Columns

In [None]:
baseline = {}
diagnosis_complete = {
    'MI' :{
        'IMI': 18,
        'ASMI': 9,
        'ILMI': 17,
        'AMI': 7,
        'ALMI': 6,
        'INJAS': 20,
        'LMI': 38,
        'INJAL': 19,
        'IPLMI': 25,
        'IPMI': 26,
        'INJIN': 22,
        'INJLA': 23,
        'PMI': 23,
        'INJIL': 21
    }, 'NORM': {
        'NORM': 46
    }}
exp = "exp0"

# find diagnostic index
diagnosis = {}
dia_glob = []
for dia in diagnosis_complete:
    diagnosis[dia] = list(diagnosis_complete[dia].values())
    dia_glob.extend(diagnosis[dia])

dia_glob = sorted(dia_glob)
for dia in diagnosis:
    diagnosis[dia] = np.array([dia_glob.index(idx) for idx in diagnosis[dia]], dtype=np.int8)

# prepare baseline
for model in res[exp]:
    baseline[model] = {}
    for set in res[exp][model]:
        baseline[model][set] = res[exp][model][set][:, dia_glob]

# prepare target
y = baseline.pop("data")
for set in y:
    tmp = []
    for dia in diagnosis:
        tmp.append(np.any(y[set][:, diagnosis[dia]], axis=1).astype(int))
    y[set] = np.transpose(np.array(tmp))


# find utils rows
utils = {}
for set in y:
    utils[set] = np.sum(y[set], axis=1) == 1

# prepare prediction values
for model in baseline:
    for set in baseline[model]:
        baseline[model][set] = np.where(np.isin(np.argmax(baseline[model][set], axis = 1), diagnosis["MI"]), 1, 0)

# keep only utils rows in predictions
for model in baseline:
    for set in baseline[model]:
        baseline[model][set] = baseline[model][set][utils[set]]

# keep only utils rows in target & prepare it
for set in y:
    y[set] = y[set][utils[set]]
    y[set] = y[set][:, 0]

In [None]:
counts = {'Dataset': [], 'NORM': [], 'MI': []}

for dataset, values in y.items():
    norm_count = sum(values == 0)
    mi_count = sum(values == 1)
    counts['Dataset'].append(dataset)
    counts['NORM'].append(norm_count)
    counts['MI'].append(mi_count)

df = pd.DataFrame(counts)

df.set_index('Dataset', inplace=True)

df

In [None]:
df.plot(kind='bar', stacked=False, figsize=(10, 5), title='Class Distribution in the Datasets')

In [None]:
base_res = []

for model in baseline:
    for set in baseline[model]:

        # AUC score
        base_res.append({
            "model" : model,
            "metric" : "AUC",
            "set": set,
            "value": roc_auc_score(y[set], baseline[model][set], average="weighted")
        })

        # Accuracy score
        base_res.append({
            "model" : model,
            "metric" : "Accuracy",
            "set": set,
            "value": accuracy_score(y[set], baseline[model][set])
        })

        # Jaccard score
        base_res.append({
            "model" : model,
            "metric" : "Jaccard",
            "set": set,
            "value": jaccard_score(y[set], baseline[model][set], average="weighted")
        })

        # F1-score
        base_res.append({
            "model" : model,
            "metric" : "F1",
            "set": set,
            "value": f1_score(y[set], baseline[model][set], average="weighted")
        })

        # Precision score
        base_res.append({
            "model" : model,
            "metric" : "Precision",
            "set": set,
            "value": metrics.precision_score(y[set], baseline[model][set], average="weighted")
        })

        # Recall score
        base_res.append({
            "model" : model,
            "metric" : "Recall",
            "set": set,
            "value": metrics.recall_score(y[set], baseline[model][set], average="weighted")
        })

base_res = pd.DataFrame(base_res)
base_res

In [None]:
base_res[(base_res["metric"]=="AUC") & (base_res["set"]=="test")]

In [None]:
set =  "test"
model = "data"

for exp in res:
    print(f'labels for {exp} - {res[exp][model][set].shape[0]}')