## **Preparations**

In [1]:
import os
import sys
import os.path as op
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from functools import partial

import torch

sys.path.append("..")
from mtecg.classifier import ECGClassifier
from mtecg.evaluation import evaluate_from_dataframe
from mtecg.utils import load_ecg_dataframe


SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.cuda.manual_seed(SEED)

c:\Anaconda3\envs\ecg\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
c:\Anaconda3\envs\ecg\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll


In [2]:
device = "cuda"
round_probabilities = False

singletask_scar_model_path = "../trained_models/single-task-scar/resnet34d_384_LVEF50"
single_task_scar_clinical_model_path = "../trained_models/single-task-scar-clinical/resnet34d_384_LVEF50_birnn_dim512"
singletask_lvef_model_path = "../trained_models/single-task-lvef/resnet34d_384_LVEF50"
singletask_lvef_clinical_model_path = "../trained_models/single-task-lvef-clinical/resnet34d_384_LVEF50_birnn_dim512"

multitask_old_format_model_path = "../trained_models/multi-task-old-format/resnet34d_384_LVEF50"
multitask_transferred_model_path = "../trained_models/multi-task-transferred/resnet34d_384_LVEF50"
multitask_model_path = "../trained_models/multi-task/resnet34d_384_LVEF50"
multitask_clinical_model_path = "../trained_models/multi-task-clinical/resnet34d_384_LVEF50_birnn_dim512/"

In [92]:
multitask_old_format_classifier = ECGClassifier(
    multitask_old_format_model_path, model_class="multi-task", device=device, round_probabilities=round_probabilities
)
multitask_transferred_classifier = ECGClassifier(
    multitask_transferred_model_path, model_class="multi-task", device=device, round_probabilities=round_probabilities
)
singletask_scar_classifier = ECGClassifier(
    singletask_scar_model_path,
    model_class="single-task",
    device=device,
    task="scar",
    round_probabilities=round_probabilities,
)
singletask_scar_clinical_classifier = ECGClassifier(
    singletask_scar_clinical_model_path,
    model_class="single-task-clinical",
    device=device,
    task="scar",
    round_probabilities=round_probabilities,
)
singletask_lvef_classifier = ECGClassifier(
    singletask_lvef_model_path,
    model_class="single-task",
    device=device,
    task="lvef",
    round_probabilities=round_probabilities,
)
singletask_lvef_clinical_classifier = ECGClassifier(
    singletask_lvef_clinical_model_path,
    model_class="single-task-clinical",
    device=device,
    task="lvef",
    round_probabilities=round_probabilities,
)
multitask_classifier = ECGClassifier(
    multitask_model_path, model_class="multi-task", device=device, round_probabilities=round_probabilities
)
multitask_clinical_classifier = ECGClassifier(
    multitask_clinical_model_path,
    model_class="multi-task-clinical",
    device=device,
    round_probabilities=round_probabilities,
)

In [3]:
# old_csv_path = "../datasets/old_test_with_qrs_duration.csv"
# new_csv_path = "../datasets/new_test_with_qrs_duration.csv"
# control_csv_path = "../datasets/siriraj_data/ECG_Normal/ECG_normal_n2097_220906_modified.csv"
# control_image_dir = "../datasets/siriraj_data/ECG_normal_images"

old_csv_path = "../datasets/all_ECG_cleared_duplicate_may23_final.csv"
old_image_dir = "../datasets/siriraj_data/ECG_MRI_images_new/"
new_csv_path = "../datasets/all_ECG_cleared_duplicate_may23_final.csv"
new_image_dir = "../datasets/siriraj_data/ECG_MRI_test_images_new/"

In [94]:
# Old test set.
old_test_df = load_ecg_dataframe(
    old_csv_path, old_image_dir, imputer_dir=multitask_clinical_model_path, do_split=True
)
old_test_df = old_test_df[old_test_df["split"] == "old_test"].reset_index(drop=True)
old_test_df = load_ecg_dataframe(
    old_csv_path, old_image_dir, imputer_dir=multitask_clinical_model_path, do_split=False
)

# Old test set with lvef_threshold= 40.
sensitivity_old_test_df = load_ecg_dataframe(
    old_csv_path,
    old_image_dir,
    imputer_dir=multitask_clinical_model_path,
    do_split=True,
    lvef_threshold=40,
)
sensitivity_old_test_df = sensitivity_old_test_df[sensitivity_old_test_df["split"] == "old_test"].reset_index(drop=True)

# New test set. No need to impute.
new_test_df = load_ecg_dataframe(
    new_csv_path,
    new_image_dir,
    # imputer_dir=multitask_clinical_model_path,
    do_split=False,
)

# New test set with lvef_threshold= 40. No need to impute.
sensitivity_new_test_df = load_ecg_dataframe(
    new_csv_path,
    new_image_dir,
    # imputer_dir=multitask_clinical_model_path,
    do_split=False,
    lvef_threshold=40,
)

In [95]:
# For convenience.

EVAL_DATA_MAP = {
    "old-test": {"data": old_test_df, "save_suffix": "old_test",},
    "old-test-sensitivity": {"data": sensitivity_old_test_df, "save_suffix": "old_test_sensitivity",},
    "new-test": {"data": new_test_df, "save_suffix": "new_test",},
    "new-test-sensitivity": {"data": sensitivity_new_test_df, "save_suffix": "new_test_sensitivity",},
    # "control-test": {"data": control_test_df, "save_suffix": "control_test",},
}

TEST_SET_SAVE_SUFFIX_LIST = [param_dict["save_suffix"] for param_dict in EVAL_DATA_MAP.values()]


def evaluate_and_save(
    classifier: ECGClassifier,
    save_dir: str,
    average: str = "weighted",
    prediction_csv_name_pattern: str = "prediction_{save_suffix}.csv",
    metric_csv_name_pattern: str = "metrics_{save_suffix}.csv",
):
    for test_set_name, param_dict in tqdm(EVAL_DATA_MAP.items()):
        dataframe, save_suffix = param_dict["data"], param_dict["save_suffix"]
        if "control" in test_set_name:
            result_dataframe, metric_dataframe = evaluate_from_dataframe(
                dataframe,
                classifier,
                is_control_population=True,
                average=average,
                )
        else:
            result_dataframe, metric_dataframe = evaluate_from_dataframe(dataframe, classifier)

        result_save_path = op.join(save_dir, prediction_csv_name_pattern.format(save_suffix=save_suffix))
        metric_save_path = op.join(save_dir, metric_csv_name_pattern.format(save_suffix=save_suffix))

        result_dataframe.to_csv(result_save_path, index=False)
        metric_dataframe.to_csv(metric_save_path, index=True)

## **Evaluation**

### **Baseline**

In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, roc_auc_score

def get_baseline_metrics(
    save_dir: str = None,
    label_column_name: str = "scar_cad",
    metric_csv_name_pattern: str = "{save_suffix}.csv",
    average: str = "weighted",
    ):
    baseline_metric_dict = {}
    for test_set_name, param_dict in EVAL_DATA_MAP.items():
        if "control" in test_set_name:
            continue
        dataframe = param_dict["data"]
        baseline_predictions = np.zeros(len(dataframe))

        # get specificity from confusion matrix
        tn, fp, fn, tp = confusion_matrix(dataframe[label_column_name], baseline_predictions).ravel()
        specificity = tn / (tn+fp)
        fpr = fp / (fp+tn)

        baseline_metric_dict[test_set_name] = {
            "Accuracy": accuracy_score(dataframe[label_column_name], baseline_predictions),
            "Sensitivity": None,
            "Specificity": specificity,
            "F1": f1_score(dataframe[label_column_name], baseline_predictions, average=average),
            "AUC": None,
            "FPR": fpr,
            "FNR": None,
        }

    if save_dir:
        os.makedirs(save_dir, exist_ok=True)
        for test_set_name, param_dict in EVAL_DATA_MAP.items():
            if "control" in test_set_name:
                continue
            save_suffix = param_dict["save_suffix"]
            metric_save_path = op.join(save_dir, metric_csv_name_pattern.format(save_suffix=save_suffix))
            pd.DataFrame(baseline_metric_dict[test_set_name], index=[0]).T.to_csv(metric_save_path, index=True)

    return baseline_metric_dict

In [15]:
scar_baseline_metric_dict = get_baseline_metrics(
    save_dir=op.join("../resources/statistics/scar_baseline_metrics"),
    label_column_name="scar_cad",
)

lvef_baseline_metric_dict = get_baseline_metrics(
    save_dir=op.join("../resources/statistics/lvef_baseline_metrics"),
    label_column_name="lvef",
)

### **single-task**

In [None]:
evaluate_and_save(singletask_scar_classifier, save_dir=singletask_scar_model_path)
evaluate_and_save(singletask_lvef_classifier, save_dir=singletask_lvef_model_path)

### **single-task-clinical**

In [None]:
evaluate_and_save(singletask_scar_clinical_classifier, save_dir=singletask_scar_clinical_model_path)
evaluate_and_save(singletask_lvef_clinical_classifier, save_dir=singletask_lvef_clinical_model_path)

### **multi-task**

In [None]:
evaluate_and_save(multitask_classifier, save_dir=multitask_model_path)

**multi-task-old-format**

In [None]:
evaluate_and_save(multitask_old_format_classifier, save_dir=multitask_old_format_model_path)

**multi-task-transferred**

In [None]:
evaluate_and_save(multitask_transferred_classifier, save_dir=multitask_transferred_model_path)

### **multi-task-clinical**

In [None]:
# Overwrite the EVAL_DATA_MAP to only evaluate the test sets with available clinical data.
EVAL_DATA_MAP = {
    "old-test": {"data": old_test_df, "save_suffix": "old_test",},
    "old-test-sensitivity": {"data": sensitivity_old_test_df, "save_suffix": "old_test_sensitivity",},
    "new-test": {"data": new_test_df, "save_suffix": "new_test",},
    "new-test-sensitivity": {"data": sensitivity_new_test_df, "save_suffix": "new_test_sensitivity",},
}

evaluate_and_save(multitask_clinical_classifier, save_dir=multitask_clinical_model_path)

[scar] Prevalence-specific Evaluation

In [105]:
import random

def sample(dataframe, num_samples: int = 500, prevalence: float = 7.9):
    """
    Sample a dataframe to a prevalence of 7.9% (the prevalence of scar in the dataset).
    """
    positive_dataframe = dataframe[dataframe.scar_label == 1].reset_index(drop=True).copy()
    negative_dataframe = dataframe[dataframe.scar_label == 0].reset_index(drop=True).copy()
    
    num_positive_samples = int(num_samples * prevalence / 100)
    num_negative_samples = num_samples - num_positive_samples
    
    random_state = random.randint(0, 1000)
    positive_sample_dataframe = positive_dataframe.sample(n=num_positive_samples, random_state = random_state).reset_index(drop=True)
    negative_sample_dataframe = negative_dataframe.sample(n=num_negative_samples, random_state = random_state).reset_index(drop=True)
    sampled_dataframe = pd.concat([positive_sample_dataframe, negative_sample_dataframe]).reset_index(drop=True)
    return sampled_dataframe

In [None]:
import scipy.stats as st
from mtecg.evaluation import calculate_metrics

num_sample_per_iteration = 500
# num_iteration_list = [20, 50, 200]
num_iteration_list = [1000]


multitask_prediction_df = pd.read_csv(op.join(multitask_model_path, "prediction_new_test.csv"))

for num_iteration in tqdm(num_iteration_list):
    scar_auc_list = []
    scar_f1_list = []
    for i in tqdm(range(num_iteration)):
        sampled_test_df = sample(multitask_prediction_df, num_samples = num_sample_per_iteration)
        metric_df = calculate_metrics(sampled_test_df, tasks=["scar"])
        # _, metric_df = evaluate_from_dataframe(sampled_test_df, multitask_classifier)
        # _, metric_df = evaluate_from_dataframe(sampled_clinical_test_df, multitask_clinical_classifier)
        scar_auc_list.append(metric_df.T["AUC"][0])
        scar_f1_list.append(metric_df.T["F1"][0])

    # Create 95% confidence interval for population mean scar auc.
    auc_confidence_interval_tuple = st.t.interval(
        alpha=0.95,
        df=len(scar_auc_list)-1,
        loc=np.mean(scar_auc_list),
        scale=st.sem(scar_auc_list)
        )
    
    f1_confidence_interval_tuple = st.t.interval(
        alpha=0.95,
        df=len(scar_f1_list)-1,
        loc=np.mean(scar_f1_list),
        scale=st.sem(scar_f1_list)
        )

    auc_summary_df = pd.DataFrame(
        { 
            "mean": [np.mean(scar_auc_list)],
            "std": [np.std(scar_auc_list)],
            "lower_bound_ci": [auc_confidence_interval_tuple[0]],
            "upper_bound_ci": [auc_confidence_interval_tuple[1]],
            }
    )\
        .round(4)

    f1_summary_df = pd.DataFrame(
        { 
            "mean": [np.mean(scar_f1_list)],
            "std": [np.std(scar_f1_list)],
            "lower_bound_ci": [f1_confidence_interval_tuple[0]],
            "upper_bound_ci": [f1_confidence_interval_tuple[1]],
            }
    )\
        .round(4)

    auc_summary_df.to_csv(op.join(multitask_model_path, f"prevalence_specific_auc_{num_sample_per_iteration}_{num_iteration}.csv"), index=False)
    f1_summary_df.to_csv(op.join(multitask_model_path, f"prevalence_specific_f1_{num_sample_per_iteration}_{num_iteration}.csv"), index=False)

### **XGB**

In [45]:
# A function to get XGBoost predictions.
from mtecg.evaluation import calculate_metrics_per_task
import mtecg.constants as constants
import pandas as pd
from typing import List

def evaluate_xgb_from_dataframe(
    dataframe: pd.DataFrame,
    model,
    feature_column_names: List[str],
    label_column_name: str = "scar_cad",
    is_control_population=False,
    task="scar",
    average="weighted",
    ):
    x = dataframe[feature_column_names]
    predicted_probability_array = model.predict_proba(x)[:, 1]
    prediction_array = model.predict(x)

    prediction_dataframe = pd.DataFrame(
        {
            f"{task}_label": dataframe[label_column_name].values,
            f"{task}_prediction": prediction_array,
            f"{task}_probability": predicted_probability_array,
            "filename": dataframe["file_name"].values,
        }
    )
    metrics_dataframe = calculate_metrics_per_task(
        prediction_dataframe,
        task,
        is_control_population=is_control_population,
        average=average,
        )
    return prediction_dataframe, metrics_dataframe

def evaluate_xgb_and_save(
    xgb_model,
    save_dir: str,
    task: str = "scar",
    average: str = "weighted",
    feature_column_names: List[str] = constants.numerical_feature_column_names + constants.categorical_feature_column_names,
    label_column_name: str = "scar_cad",
    prediction_csv_name_pattern: str = "prediction_{save_suffix}.csv",
    metric_csv_name_pattern: str = "metrics_{save_suffix}.csv",
):
    for test_set_name, param_dict in tqdm(EVAL_DATA_MAP.items()):
        dataframe, save_suffix = param_dict["data"], param_dict["save_suffix"]
        if "control" in test_set_name:
            result_dataframe, metric_dataframe = evaluate_xgb_from_dataframe(
                dataframe,
                xgb_model,
                task=task,
                feature_column_names=feature_column_names,
                is_control_population=True,
                average=average,
                )
        else:
            result_dataframe, metric_dataframe = evaluate_xgb_from_dataframe(
                dataframe,
                xgb_model,
                task=task,
                feature_column_names=feature_column_names,
                average=average,
                )

        result_save_path = op.join(save_dir, prediction_csv_name_pattern.format(save_suffix=save_suffix))
        metric_save_path = op.join(save_dir, metric_csv_name_pattern.format(save_suffix=save_suffix))

        result_dataframe.to_csv(result_save_path, index=False)
        metric_dataframe.to_csv(metric_save_path, index=True)

In [46]:
import joblib

xgb_model_dir = "../scripts/mtecg/xgb"
scar_xgb_model_dir = op.join(xgb_model_dir, "scar_model")
lvef_xgb_model_dir = op.join(xgb_model_dir, "lvef_model")

scar_xgb_classifier = joblib.load(op.join(scar_xgb_model_dir, "model.joblib"))
lvef_xgb_classifier = joblib.load(op.join(lvef_xgb_model_dir, "model.joblib"))

In [47]:
evaluate_xgb_and_save(
    scar_xgb_classifier,
    task="scar",
    label_column_name="scar_cad",
    save_dir=scar_xgb_model_dir,
    )

evaluate_xgb_and_save(
    lvef_xgb_classifier,
    task="lvef",
    label_column_name="lvef",
    save_dir=lvef_xgb_model_dir,
)

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

## **Save Prediction Probabilities on Each Test Set as a single file**

In [86]:
import pandas as pd
from typing import Dict, List

# A function to read the predictions from the save csv file in each model folder.
# The probability columns of each task are then concatenated into a single dataframe.
# The probability columns are in the format f"{task}_probability".

def get_probabilities(
    model_name_to_dir_map: Dict[str, str],
    test_set_suffix_list: List[str],
    probability_column_name_pattern: str = "{task}_probability",
    prediction_csv_name_pattern: str = "prediction_{test_set_suffix}.csv",
    task: str = "scar",
) -> List[pd.DataFrame]:
    """
    Get the probabilities of the given task from the predictions of the models.
    """
    probability_column_name = probability_column_name_pattern.format(task=task)

    test_set_to_probability_dataframe_dict = {}
    for test_set_suffix in test_set_suffix_list:
        if "control" in test_set_suffix:
            continue
        model_name_to_probabilities_dict = {}
        for model_name, model_dir in model_name_to_dir_map.items():
            filename = prediction_csv_name_pattern.format(test_set_suffix=test_set_suffix)
            prediction_path = op.join(model_dir, filename)
            prediction_dataframe = pd.read_csv(prediction_path)
            if "true_label" not in model_name_to_probabilities_dict.keys():
                model_name_to_probabilities_dict["true_label"] = prediction_dataframe[f"{task}_label"]
            model_name_to_probabilities_dict[model_name] = prediction_dataframe[probability_column_name]
        probability_dataframe = pd.DataFrame(model_name_to_probabilities_dict)
        test_set_to_probability_dataframe_dict[test_set_suffix] = probability_dataframe
    return test_set_to_probability_dataframe_dict

In [88]:
singletask_scar_model_path = "../trained_models/single-task-scar/resnet34d_384_LVEF50"
single_task_scar_clinical_model_path = "../trained_models/single-task-scar-clinical/resnet34d_384_LVEF50_birnn_dim512"
singletask_lvef_model_path = "../trained_models/single-task-lvef/resnet34d_384_LVEF50"
singletask_lvef_clinical_model_path = "../trained_models/single-task-lvef-clinical/resnet34d_384_LVEF50_birnn_dim512"

multitask_old_format_model_path = "../trained_models/multi-task-old-format/resnet34d_384_LVEF50"
multitask_transferred_model_path = "../trained_models/multi-task-transferred/resnet34d_384_LVEF50"
multitask_model_path = "../trained_models/multi-task/resnet34d_384_LVEF50"
multitask_clinical_model_path = "../trained_models/multi-task-clinical/resnet34d_384_LVEF50_birnn_dim512/"

xgboost_scar_model_path = "../trained_models/xgboost-clinical/scar_model"
xgboost_lvef_model_path = "../trained_models/xgboost-clinical/lvef_model"

probability_save_dir = "../resources/prediction_probabilities"
os.makedirs(probability_save_dir, exist_ok=True)

In [89]:
tasks = ["scar", "lvef"]
for task in tasks:
    model_name_to_dir_map = {
        "multi-task-old-format": multitask_old_format_model_path,
        "multi-task-transferred": multitask_transferred_model_path,
        "single-task": singletask_scar_model_path if task == "scar" else singletask_lvef_model_path,
        "multi-task": multitask_model_path,
        "single-task-clinical": singletask_scar_clinical_model_path if task == "scar" else singletask_lvef_clinical_model_path,
        "multi-task-clinical": multitask_clinical_model_path,
        "xgboost-clinical": xgboost_scar_model_path if task == "scar" else xgboost_lvef_model_path,
    }

    test_set_to_probability_dataframe_dict = get_probabilities(
        model_name_to_dir_map=model_name_to_dir_map,
        test_set_suffix_list=TEST_SET_SAVE_SUFFIX_LIST,
        task=task,
    )

    for test_set_suffix, probability_dataframe in test_set_to_probability_dataframe_dict.items():
        probability_save_path = op.join(probability_save_dir, f"{task}_probabilities_{test_set_suffix}.csv")
        probability_dataframe.to_csv(probability_save_path, index=False)

## BBB

In [None]:
import os.path as op
import pandas as pd
from mtecg.evaluation import calculate_metrics

def recompute_metrics_with_bbb(
    model_dir: str,
    test_set_name: str,
    csv_with_qrs_path: str,
    image_dir: str,
    imputer_dir: str
    ):
    def is_bbb(qrs_duration):
        return qrs_duration > 120

    prediction_result_path = op.join(model_dir, f"prediction_{test_set_name}.csv")
    prediction_df = pd.read_csv(prediction_result_path)

    # Load the ECG dataframe
    test_df_with_qrs = load_ecg_dataframe(
        csv_with_qrs_path,
        image_dir,
        imputer_dir=imputer_dir,
        do_split=False
    )

    # Join prediction_df with test_df_with_qrs on filename.
    prediction_df.rename(columns={"filename": "file_name"}, inplace=True)
    test_df_with_qrs.rename(columns={"filename": "file_name"}, inplace=True)
    prediction_df_with_qrs = prediction_df.merge(test_df_with_qrs, on="file_name", how="left")

    # Apply the is_bbb function to create a new column 'is_bbb'
    prediction_df_with_qrs["is_bbb"] = prediction_df_with_qrs["qrs_duration"].apply(is_bbb)

    # Split data into two DataFrames based on 'is_bbb' column
    prediction_df_with_bbb = prediction_df_with_qrs[prediction_df_with_qrs["is_bbb"]].reset_index(drop=True)
    prediction_df_without_bbb = prediction_df_with_qrs[~prediction_df_with_qrs["is_bbb"]].reset_index(drop=True)

    task_list = []
    for column in prediction_df.columns:
        if column.__contains__("scar"):
            task_list.append("scar")
        elif column.__contains__("lvef"):
            task_list.append("lvef")
    task_list = list(set(task_list))
    # Rearrange the task_list to always have 'scar' as the first element if it exists.
    if "scar" in task_list:
        task_list.remove("scar")
        task_list.insert(0, "scar")

    # Calculate metrics for both DataFrames
    metrics_df_with_bbb = calculate_metrics(prediction_df_with_bbb, tasks=task_list)
    metrics_df_without_bbb = calculate_metrics(prediction_df_without_bbb, tasks=task_list)
    
    metrics_df_with_bbb = metrics_df_with_bbb[[]]

    return metrics_df_with_bbb, metrics_df_without_bbb

# # Usage example:
# model_dir = multitask_model_path
# test_set_name = "new_test"
# csv_with_qrs_path = f"../datasets/{test_set_name}_with_qrs_duration.csv"
# # image_dir = old_image_dir
# image_dir = new_image_dir
# imputer_dir = multitask_clinical_model_path

# metrics_df_with_bbb, metrics_df_without_bbb = recompute_metrics_with_bbb(model_dir, test_set_name, csv_with_qrs_path, image_dir, imputer_dir)

test_set_name_list = ["old_test", "old_test_sensitivity", "new_test", "new_test_sensitivity"]
model_dir_list = [
    singletask_scar_model_path,
    singletask_scar_clinical_model_path,
    singletask_lvef_model_path,
    singletask_lvef_clinical_model_path,
    multitask_old_format_model_path,
    multitask_transferred_model_path,
    multitask_model_path,
    multitask_clinical_model_path,
]

for model_dir in model_dir_list:
    for test_set_name in test_set_name_list:
        print(f"model_dir: {model_dir}, test_set_name: {test_set_name}")

        if test_set_name.__contains__("old_test"):
            test_set_type = "old_test"
        else:
            test_set_type = "new_test"


        csv_with_qrs_path = f"../datasets/{test_set_type}_with_qrs_duration.csv"
        image_dir = old_image_dir if test_set_type == "old_test" else new_image_dir
        imputer_dir = multitask_clinical_model_path

        metrics_df_with_bbb, metrics_df_without_bbb = recompute_metrics_with_bbb(model_dir, test_set_name, csv_with_qrs_path, image_dir, imputer_dir)
        metrics_df_with_bbb.to_csv(f"{model_dir}/metrics_{test_set_name}_with_bbb.csv", index=True)
        metrics_df_without_bbb.to_csv(f"{model_dir}/metrics_{test_set_name}_without_bbb.csv", index=True)