# Master Notebook

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
os.chdir("/content/drive/MyDrive/Thesis Repository")

In [None]:
pip install scikit-multilearn hf_xet

In [None]:
# RUN THESE IMPORTS FIRST
import os
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import DebertaV2Tokenizer, AutoModel, RobertaTokenizer
from sklearn.preprocessing import MultiLabelBinarizer
from tqdm import tqdm
from sklearn.metrics import f1_score, classification_report
import numpy as np
import shap
#from captum.attr import IntegratedGradients
from transformers import AutoTokenizer, Trainer, TrainingArguments
import torch.nn.functional as F
import hf_xet
import itertools
#import optuna
import sys
import importlib # !pip install importlib
sys.path.append('.')


### Custom built modules ###
import importlib

import data_loader_STL
importlib.reload(data_loader_STL)
from data_loader_STL import prepare_data_STL_fine, prepare_data_STL_hierarchical, prepare_data_STL_coarse

import single_task
importlib.reload(single_task)
from single_task import TransformerClassifier, MultiLabelDataset, train_single_task_model, train_hierarchical_classifier

import multi_task
importlib.reload(multi_task)
from multi_task import MultiTaskTransformer, train_mtl_flat, train_mtl_hierarchical, apply_hierarchical_constraints_mtl, hierarchical_loss_mtl, AdapterMultiTaskTransformer

import data_loader_MTL
importlib.reload(data_loader_MTL)
from data_loader_MTL import prepare_data_MTL_fine_flat, prepare_data_MTL_hierarchical, prepare_data_MTL_coarse, MultiTaskDataset, prepare_data_MTL_mixed

import evaluation_utils as eval_util
importlib.reload(eval_util)
from evaluation_utils import evaluate_flat, evaluate_hierarchy, evaluate_mtl_all_tasks, evaluate_mtl_task, evaluate_per_class_flat, evaluate_per_domain_flat, predict_proba, evaluate_threshold_sweep, evaluate_mtl_hierarchical_task, evaluate_mtl_hierarchical_all_tasks, evaluate_flat_custom, compute_fine_vs_coarse_metrics, get_coarse_label_list


# Ablation - mixed dataset function

In [None]:
def prepare_data_MTL_mixed(
    task,
    train_domains,
    test_domains,
    train_languages,
    model_name,
    max_len,
    batch_size,
    granularity_s1="coarse",
    granularity_s2="fine"
):
    # --- Task 1 ---
    if granularity_s1 == "fine":
        (
            df_train_s1, df_val_s1, df_test_s1, y_train_s1, y_val_s1, y_test_s1, mlb_s1,
            _, _, _, _, _, _, _,
            train_loader_s1, val_loader_s1, test_loader_s1,
            _, _, _,
            _
        ) = prepare_data_MTL_fine_flat(
            task,
            train_domains=train_domains,
            test_domains=test_domains,
            train_languages=train_languages,
            model_name=model_name,
            max_len=max_len,
            batch_size=batch_size
        )
    elif granularity_s1 == "coarse":
        (
            df_train_s1, df_val_s1, df_test_s1, y_train_s1, y_val_s1, y_test_s1, mlb_s1,
            _, _, _, _, _, _, _,
            train_loader_s1, val_loader_s1, test_loader_s1,
            _, _, _,
            _
        ) = prepare_data_MTL_coarse(
            task,
            train_domains=train_domains,
            test_domains=test_domains,
            train_languages=train_languages,
            model_name=model_name,
            max_len=max_len,
            batch_size=batch_size
        )

    # --- Task 2 ---
    if granularity_s2 == "fine":
        (
            _, _, _, _, _, _, _,
            df_train_s2, df_val_s2, df_test_s2, y_train_s2, y_val_s2, y_test_s2, mlb_s2,
            _, _, _,
            train_loader_s2, val_loader_s2, test_loader_s2,
            _
        ) = prepare_data_MTL_fine_flat(
            task,
            train_domains=train_domains,
            test_domains=test_domains,
            train_languages=train_languages,
            model_name=model_name,
            max_len=max_len,
            batch_size=batch_size
        )
    elif granularity_s2 == "coarse":
        (
            _, _, _, _, _, _, _,
            df_train_s2, df_val_s2, df_test_s2, y_train_s2, y_val_s2, y_test_s2, mlb_s2,
            _, _, _,
            train_loader_s2, val_loader_s2, test_loader_s2,
            _
        ) = prepare_data_MTL_coarse(
            task,
            train_domains=train_domains,
            test_domains=test_domains,
            train_languages=train_languages,
            model_name=model_name,
            max_len=max_len,
            batch_size=batch_size
        )

    num_classes_dict = {
        "task1": len(mlb_s1.classes_),
        "task2": len(mlb_s2.classes_)
    }

    return (
        df_train_s1, df_val_s1, df_test_s1, y_train_s1, y_val_s1, y_test_s1, mlb_s1,
        df_train_s2, df_val_s2, df_test_s2, y_train_s2, y_val_s2, y_test_s2, mlb_s2,
        train_loader_s1, val_loader_s1, test_loader_s1,
        train_loader_s2, val_loader_s2, test_loader_s2,
        num_classes_dict
    )


# Ablation - Granularity - MTL

In [None]:
import pandas as pd
import itertools
import torch
import os
from transformers import AutoTokenizer

# === Configuration ===
MODEL_NAME = "distilbert-base-uncased"
MAX_LEN = 512
BATCH_SIZE = 8
EPOCHS = 4
LEARNING_RATE = 3e-5
TRAIN_LANGUAGES = ["ALL"]

granularity_options = ['fine','coarse'] #  38, 39, 40, 54, 55, 71, 72, 73, 74, 75
seeds = [71, 72, 73, 74, 75] # 42, 43, 44
granularity_configs = [('coarse', 'coarse')]


train_domains_all = [["UA"], ["CC"], ["UA", "CC"]]
test_domains = ["UA", "CC"]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

os.makedirs("ablation_results_final_seeds", exist_ok=True)

# training loop
for gran_s1, gran_s2 in granularity_configs:
    config_id = f"ef-{gran_s1}_nc-{gran_s2}"
    results = []

    for train_domain in train_domains_all:
        for task_type in ["multi_task", "multi_task_adapter"]:
            for seed in seeds:
                torch.manual_seed(seed)

                # set model path
                model_path = f"{task_type}_{'-'.join(train_domain)}_to_{'-'.join(test_domains)}_ef-{gran_s1}_nc-{gran_s2}_seed{seed}.pt"

                # --- Data Prep ---
                (
                    df_train_s1, df_val_s1, df_test_s1, y_train_s1, y_val_s1, y_test_s1, mlb_s1,
                    df_train_s2, df_val_s2, df_test_s2, y_train_s2, y_val_s2, y_test_s2, mlb_s2,
                    train_loader_s1, val_loader_s1, test_loader_s1,
                    train_loader_s2, val_loader_s2, test_loader_s2,
                    num_classes_dict
                ) = prepare_data_MTL_mixed(
                    task=task_type,
                    train_domains=train_domain,
                    test_domains=test_domains,
                    train_languages=TRAIN_LANGUAGES,
                    model_name=MODEL_NAME,
                    max_len=MAX_LEN,
                    batch_size=BATCH_SIZE,
                    granularity_s1=gran_s1,
                    granularity_s2=gran_s2
                )

                task_classes = {
                    "narrative_classification": y_train_s2.shape[1],
                    "entity_framing": y_train_s1.shape[1]
                }

                # --- Model ---
                if task_type == "multi_task":
                    model = MultiTaskTransformer(MODEL_NAME, task_classes).to(device)
                else:
                    model = AdapterMultiTaskTransformer(
                        model_name=MODEL_NAME,
                        num_classes_dict=task_classes,
                        adapter_dim=128
                    ).to(device)

                optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
                criterion = torch.nn.BCEWithLogitsLoss()

                # --- Train ---
                train_mtl_flat(
                    model=model,
                    loaders={
                        "narrative_classification": train_loader_s2,
                        "entity_framing": train_loader_s1
                    },
                    val_data={
                        "narrative_classification": (val_loader_s2, df_val_s2, y_val_s2, mlb_s2),
                        "entity_framing": (val_loader_s1, df_val_s1, y_val_s1, mlb_s1)
                    },
                    mlbs={
                        "narrative_classification": mlb_s2,
                        "entity_framing": mlb_s1
                    },
                    optimizer=optimizer,
                    criterion=criterion,
                    device=device,
                    epochs=EPOCHS,
                    train_domain=train_domain,
                    test_domain=test_domains
                )

                # torch.save(model.state_dict(), model_path)

                #  Evaluate
                eval_results = evaluate_mtl_all_tasks(
                    model=model,
                    task_loaders={
                        "narrative_classification": test_loader_s2,
                        "entity_framing": test_loader_s1
                    },
                    task_dfs={
                        "narrative_classification": df_test_s2,
                        "entity_framing": df_test_s1
                    },
                    task_targets={
                        "narrative_classification": y_test_s2,
                        "entity_framing": y_test_s1
                    },
                    task_mlbs={
                        "narrative_classification": mlb_s2,
                        "entity_framing": mlb_s1
                    },
                    domain_list=train_domain,
                    device=device
                )

                ef = eval_results["entity_framing"]
                nc = eval_results["narrative_classification"]

                results.append({
                    "task_type": task_type,
                    "seed": seed,
                    "train_domain": "-".join(train_domain),
                    "ef_granularity": gran_s1,
                    "nc_granularity": gran_s2,

                    # EF metrics
                    "ef_micro_ua": ef["UA"]["micro"],
                    "ef_macro_ua": ef["UA"]["macro"],
                    "ef_exact_ua": ef["UA"]["exact"],
                    "ef_micro_cc": ef["CC"]["micro"],
                    "ef_macro_cc": ef["CC"]["macro"],
                    "ef_exact_cc": ef["CC"]["exact"],

                    # NC metrics
                    "nc_micro_ua": nc["UA"]["micro"],
                    "nc_macro_ua": nc["UA"]["macro"],
                    "nc_exact_ua": nc["UA"]["exact"],
                    "nc_micro_cc": nc["CC"]["micro"],
                    "nc_macro_cc": nc["CC"]["macro"],
                    "nc_exact_cc": nc["CC"]["exact"]
                })


    df_out = pd.DataFrame(results)
    out_path = f"ablation_results/ablation_{config_id}.csv"
    df_out.to_csv(out_path, index=False)
    print(f" Saved: {out_path}")


# Ablation - Domain/Task - MTL

In [None]:
import pandas as pd
import torch
import itertools
import os
from transformers import AutoTokenizer

# === Configuration ===
MODEL_NAME = "distilbert-base-uncased"
MAX_LEN = 512
BATCH_SIZE = 8
EPOCHS = 4
LEARNING_RATE = 3e-5
TRAIN_LANGUAGES = ["ALL"]
TEST_LANGUAGES = ["ALL"]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
os.makedirs("ablation_results_cross_domain_more_seeds", exist_ok=True)

# === Domain Configurations for Task Splits ===
domain_configs = [
    ("UA", "CC"),  # EF on UA, NC on CC
    ("CC", "UA")   # EF on CC, NC on UA
]
seeds = [71, 72, 73, 74, 75] # 42, 43, 44, 31, 32

for ef_domain, nc_domain in domain_configs:
    config_id = f"EF-{ef_domain}_NC-{nc_domain}"
    results = []

    for task_type in ["multi_task", "multi_task_adapter"]:
        for seed in seeds:
            torch.manual_seed(seed)


            model_path = f"{task_type}_EF-{ef_domain}_NC-{nc_domain}_seed{seed}.pt"

            #Prepare data.
            (
                df_train_s1, df_val_s1, df_test_s1, y_train_s1, y_val_s1, y_test_s1, mlb_s1,
                df_train_s2, df_val_s2, df_test_s2, y_train_s2, y_val_s2, y_test_s2, mlb_s2,
                train_loader_s1, val_loader_s1, test_loader_s1,
                train_loader_s2, val_loader_s2, test_loader_s2,
                num_classes_dict
            ) = prepare_data_MTL_mixed(
                task=task_type,
                train_domains=[ef_domain, nc_domain],
                test_domains=["UA", "CC"],
                train_languages=TRAIN_LANGUAGES,
                model_name=MODEL_NAME,
                max_len=MAX_LEN,
                batch_size=BATCH_SIZE,
                granularity_s1="fine",
                granularity_s2="fine"
            )

            task_classes = {
                "entity_framing": y_train_s1.shape[1],
                "narrative_classification": y_train_s2.shape[1]
            }

            # --- Model
            if task_type == "multi_task":
                model = MultiTaskTransformer(MODEL_NAME, task_classes).to(device)
            else:
                model = AdapterMultiTaskTransformer(
                    model_name=MODEL_NAME,
                    num_classes_dict=task_classes,
                    adapter_dim=128
                ).to(device)

            optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
            criterion = torch.nn.BCEWithLogitsLoss()

            # --- Train
            train_mtl_flat(
                model=model,
                loaders={
                    "narrative_classification": train_loader_s2,
                    "entity_framing": train_loader_s1
                },
                val_data={
                    "narrative_classification": (val_loader_s2, df_val_s2, y_val_s2, mlb_s2),
                    "entity_framing": (val_loader_s1, df_val_s1, y_val_s1, mlb_s1)
                },
                mlbs={
                    "narrative_classification": mlb_s2,
                    "entity_framing": mlb_s1
                },
                optimizer=optimizer,
                criterion=criterion,
                device=device,
                epochs=EPOCHS,
                train_domain=[ef_domain, nc_domain],
                test_domain=["UA", "CC"]
            )

            # --- Evaluate
            eval_results = evaluate_mtl_all_tasks(
                model=model,
                task_loaders={
                    "narrative_classification": test_loader_s2,
                    "entity_framing": test_loader_s1
                },
                task_dfs={
                    "narrative_classification": df_test_s2,
                    "entity_framing": df_test_s1
                },
                task_targets={
                    "narrative_classification": y_test_s2,
                    "entity_framing": y_test_s1
                },
                task_mlbs={
                    "narrative_classification": mlb_s2,
                    "entity_framing": mlb_s1
                },
                domain_list=[ef_domain, nc_domain],
                device=device
            )

            ef = eval_results["entity_framing"]
            nc = eval_results["narrative_classification"]

            results.append({
                "task_type": task_type,
                "seed": seed,
                "ef_train_domain": ef_domain,
                "nc_train_domain": nc_domain,

                "ef_micro_ua": ef["UA"]["micro"],
                "ef_macro_ua": ef["UA"]["macro"],
                "ef_exact_ua": ef["UA"]["exact"],
                "ef_micro_cc": ef["CC"]["micro"],
                "ef_macro_cc": ef["CC"]["macro"],
                "ef_exact_cc": ef["CC"]["exact"],

                "nc_micro_ua": nc["UA"]["micro"],
                "nc_macro_ua": nc["UA"]["macro"],
                "nc_exact_ua": nc["UA"]["exact"],
                "nc_micro_cc": nc["CC"]["micro"],
                "nc_macro_cc": nc["CC"]["macro"],
                "nc_exact_cc": nc["CC"]["exact"]
            })


    df_out = pd.DataFrame(results)
    out_path = f"ablation_results_cross_domain_more_seeds/ablation_{config_id}.csv"
    df_out.to_csv(out_path, index=False)
    print(f" Saved: {out_path}")


# STL - Training Loop

In [None]:
from torch.utils.data import DataLoader
import pandas as pd
import itertools
import torch
import os
import numpy as np
from transformers import AutoTokenizer

# === Config ===
MODELS = ["distilbert-base-uncased"]
TASKS = ["entity_framing"]
TAXONOMY_DEPTHS = ["fine"]
SEEDS = [71]
TRAIN_DOMAINS = [["UA"]]
TEST_DOMAIN = ["UA", "CC"]
TRAIN_LANGUAGES = ["ALL"]
MAX_LEN = 512
BATCH_SIZE = 8
EPOCHS = 4
LEARNING_RATE = 3e-5
THRESHOLD = 0.35

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs("shap_plots_final", exist_ok=True)
results = []

for model_name, task, taxonomy, train_domain, seed in itertools.product(MODELS, TASKS, TAXONOMY_DEPTHS, TRAIN_DOMAINS, SEEDS):
    domain_str = "-".join(train_domain)
    print(f"\n--- Running STL: {task} | {taxonomy} | {model_name} | Seed={seed} | Train on {domain_str} ---")
    torch.manual_seed(seed)

    tokenizer = AutoTokenizer.from_pretrained(model_name)

    #Data prep
    if taxonomy == "fine":
        df_train, df_val, df_test, y_train, y_val, y_test, mlb, TEXT_COL, LABEL_COL = prepare_data_STL_fine(
            TASK=task,
            train_domains=train_domain,
            test_domains=TEST_DOMAIN,
            train_languages=TRAIN_LANGUAGES
        )
    else:
        df_train, df_val, df_test, y_train, y_val, y_test, mlb, TEXT_COL, LABEL_COL = prepare_data_STL_coarse(
            TASK=task,
            train_domains=train_domain,
            test_domains=TEST_DOMAIN,
            train_languages=TRAIN_LANGUAGES
        )

    #  Dataset Setup
    train_dataset = MultiLabelDataset(df_train[TEXT_COL].tolist(), y_train, tokenizer, MAX_LEN)
    val_dataset = MultiLabelDataset(df_val[TEXT_COL].tolist(), y_val, tokenizer, MAX_LEN)
    test_dataset = MultiLabelDataset(df_test[TEXT_COL].tolist(), y_test, tokenizer, MAX_LEN)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

    # Model Init
    num_classes = y_train.shape[1]
    model = TransformerClassifier(model_name, num_classes)

    model_path = (
        f"model_{task}_{taxonomy}_trained_on_{domain_str}"
        f"_{model_name.replace('/', '-')}_seed{seed}.pt"
    )

    # === Train ===
    model = train_single_task_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        y_val=y_val,
        MODEL_PATH=model_path,
        LEARNING_RATE=LEARNING_RATE,
        EPOCHS=EPOCHS,
        device=device,
        predict_proba=predict_proba,
        evaluate_threshold_sweep=evaluate_threshold_sweep
    )

    # evaluate
    eval_result = evaluate_flat(
        model=model,
        loader=test_loader,
        df_source=df_test,
        mlb=mlb,
        device=device,
        label="TEST",
        threshold=THRESHOLD
    )

    #Extract Per-Domain Metrics
    for domain in ["UA", "CC"]:
        results.append({
            "model": model_name,
            "task": task,
            "taxonomy": taxonomy,
            "train_domain": domain_str,
            "seed": seed,
            "eval_domain": domain,
            "micro": eval_result["per_domain"][domain]["micro"],
            "macro": eval_result["per_domain"][domain]["macro"],
            "exact": eval_result["per_domain"][domain]["exact"]
        })

    # SHAP
    texts_ef = df_test["Translated_Text"].tolist()
    shap_values_ef = explain_shap(model, tokenizer, texts_ef, max_explain=5)
    save_shap_waterfall_plots(shap_values_ef, "shap_plots", task, seed, task, mlb)


df_out = pd.DataFrame(results)
df_out.to_csv("ablation_results_stl_augmented_new/stl_all_results_augmented_more_seeds.csv", index=False)
print("ablation_results_stl_augmented_new/stl_all_results_augmented_more_seeds.csv")


# MTL/MTL-PAL

In [None]:
import pandas as pd
import torch
import itertools
import os
from transformers import AutoTokenizer

# === Config ===
MODEL_NAME = "distilbert-base-uncased"
MAX_LEN = 512
BATCH_SIZE = 8
EPOCHS = 4
LEARNING_RATE = 3e-5
TRAIN_LANGUAGES = ["ALL"]
TEST_DOMAINS = ["UA", "CC"]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
os.makedirs("new_augmented_seeds", exist_ok=True)

# Domain configs
train_domain_configs = [["UA"]]
seeds = [71] #71, 72, 73, 74, 75, 31, 32, 42, 43, 44

for train_domains in train_domain_configs:
    domain_str = "-".join(train_domains)
    results = []

    for task_type in ["multi_task"]:
        for seed in seeds:
            torch.manual_seed(seed)

            print(f"\n--- {task_type.upper()} | Train on: {domain_str} | Seed={seed} ---")

            model_path = f"{task_type}_distilbert_trained_on_{domain_str}_seed{seed}.pt"

            # === Prepare Data ===
            (
                df_train_s1, df_val_s1, df_test_s1, y_train_s1, y_val_s1, y_test_s1, mlb_s1,
                df_train_s2, df_val_s2, df_test_s2, y_train_s2, y_val_s2, y_test_s2, mlb_s2,
                train_loader_s1, val_loader_s1, test_loader_s1,
                train_loader_s2, val_loader_s2, test_loader_s2,
                num_classes_dict
            ) = prepare_data_MTL_mixed(
                task=task_type,
                train_domains=train_domains,
                test_domains=TEST_DOMAINS,
                train_languages=TRAIN_LANGUAGES,
                model_name=MODEL_NAME,
                max_len=MAX_LEN,
                batch_size=BATCH_SIZE,
                granularity_s1="fine",
                granularity_s2="fine"
            )

            task_classes = {
                "entity_framing": y_train_s1.shape[1],
                "narrative_classification": y_train_s2.shape[1]
            }

            # Initialise Model
            if task_type == "multi_task":
                model = MultiTaskTransformer(MODEL_NAME, task_classes).to(device)
            else:
                model = AdapterMultiTaskTransformer(
                    model_name=MODEL_NAME,
                    num_classes_dict=task_classes,
                    adapter_dim=128
                ).to(device)

            optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
            criterion = torch.nn.BCEWithLogitsLoss()

            # Train
            train_mtl_flat(
                model=model,
                loaders={
                    "narrative_classification": train_loader_s2,
                    "entity_framing": train_loader_s1
                },
                val_data={
                    "narrative_classification": (val_loader_s2, df_val_s2, y_val_s2, mlb_s2),
                    "entity_framing": (val_loader_s1, df_val_s1, y_val_s1, mlb_s1)
                },
                mlbs={
                    "narrative_classification": mlb_s2,
                    "entity_framing": mlb_s1
                },
                optimizer=optimizer,
                criterion=criterion,
                device=device,
                epochs=EPOCHS,
                train_domain=train_domains,
                test_domain=TEST_DOMAINS
            )

            # === Evaluate ===
            eval_results = evaluate_mtl_all_tasks(
                model=model,
                task_loaders={
                    "narrative_classification": test_loader_s2,
                    "entity_framing": test_loader_s1
                },
                task_dfs={
                    "narrative_classification": df_test_s2,
                    "entity_framing": df_test_s1
                },
                task_targets={
                    "narrative_classification": y_test_s2,
                    "entity_framing": y_test_s1
                },
                task_mlbs={
                    "narrative_classification": mlb_s2,
                    "entity_framing": mlb_s1
                },
                domain_list=train_domains,
                device=device
            )

            ef = eval_results["entity_framing"]
            nc = eval_results["narrative_classification"]

            results.append({
                "task_type": task_type,
                "model": "distilbert-base-uncased",
                "seed": seed,
                "train_domain": domain_str,

                "ef_micro_ua": ef["UA"]["micro"],
                "ef_macro_ua": ef["UA"]["macro"],
                "ef_exact_ua": ef["UA"]["exact"],
                "ef_micro_cc": ef["CC"]["micro"],
                "ef_macro_cc": ef["CC"]["macro"],
                "ef_exact_cc": ef["CC"]["exact"],

                "nc_micro_ua": nc["UA"]["micro"],
                "nc_macro_ua": nc["UA"]["macro"],
                "nc_exact_ua": nc["UA"]["exact"],
                "nc_micro_cc": nc["CC"]["micro"],
                "nc_macro_cc": nc["CC"]["macro"],
                "nc_exact_cc": nc["CC"]["exact"]
            })
            texts_ef = df_test_s1["Translated_Text"].tolist()
            texts_nc = df_test_s2["Translated_Text"].tolist()

            shap_values_ef = explain_shap(model, tokenizer, texts_ef, "entity_framing", max_explain=5)
            shap_values_nc = explain_shap(model, tokenizer, texts_nc, "narrative_classification", max_explain=5)

            save_shap_waterfall_plots(shap_values_nc, "shap_plots", "narrative_classification", seed, task_type, mlb_s2)
            save_shap_waterfall_plots(shap_values_ef, "shap_plots", "entity_framing", seed, task_type, mlb_s1)



    df_out = pd.DataFrame(results)
    out_path = f"new_augmented_seeds/baseline_mtl_more_seeds_augmented_70{domain_str}.csv"
    df_out.to_csv(out_path, index=False)
    print(f" Saved: {out_path}")


# SHAP functions

## MTL SHAP functions

In [None]:
import shap
import numpy as np
import torch
import os
import matplotlib.pyplot as plt

def truncate_texts(texts, tokenizer, max_len):
    truncated = []
    for text in texts:
        tokens = tokenizer.encode(text, truncation=True, max_length=max_len)
        truncated_text = tokenizer.decode(tokens, skip_special_tokens=True)
        truncated.append(truncated_text)
    return truncated

# Build SHAP Explainer for MTL
def get_shap_explainer(model, tokenizer, task_name, model_type="entity_framing"):
    def forward_func(inputs):
        model.eval()
        with torch.no_grad():
            outputs = model(**inputs, task=task_name)
            if isinstance(outputs, (tuple, list)):
                logits = outputs[0]
            else:
                logits = outputs
            return torch.sigmoid(logits).cpu().numpy()

    class Wrapper:
        def __call__(self, text):
            if isinstance(text, str):
                text = [text]
            elif isinstance(text, np.ndarray):
                text = text.tolist()
            elif isinstance(text, list) and isinstance(text[0], np.ndarray):
                text = [str(t) for t in text]

            encoded = tokenizer(
                text,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=MAX_LEN
            ).to(device)
            return forward_func(encoded)

    return shap.Explainer(Wrapper(), tokenizer, algorithm="permutation")

# run analysis
def explain_shap(model, tokenizer, texts, task_name, model_type="entity_framing", max_explain=5, visualize=False):
    texts = truncate_texts(texts[:max_explain], tokenizer, MAX_LEN)
    explainer = get_shap_explainer(model, tokenizer, task_name, model_type)
    shap_values = explainer(texts, max_evals=1500, silent=True)
    if visualize:
        shap.plots.text(shap_values)
    return shap_values


# save to PNG

def save_shap_waterfall_plots(shap_values, output_dir, task_name, seed, model_type, mlb, top_k=3):
    import matplotlib.pyplot as plt
    import os
    from shap import Explanation

    os.makedirs(output_dir, exist_ok=True)

    for i, sv in enumerate(shap_values):
        try:
            mean_abs = np.abs(sv.values).mean(axis=0)
            top_outputs = np.argsort(mean_abs)[-top_k:]

            for j in top_outputs:
                try:
                    # Create SHAP Explanation for one label
                    single_sv = Explanation(
                        values=sv.values[:, j],
                        base_values=sv.base_values[j] if hasattr(sv.base_values, '__len__') else sv.base_values,
                        data=sv.data,
                        feature_names=sv.feature_names
                    )

                    # Get label name from mlb
                    label_name = mlb.classes_[j].replace(" ", "_")  # safer for filenames

                    # Generate plot and save
                    ax = shap.plots.waterfall(single_sv, show=False)
                    fig = ax.figure
                    fname = os.path.join(
                        output_dir, f"{task_name}_{model_type}_seed{seed}_sample{i}_label_{label_name}.png"
                    )
                    fig.savefig(fname, bbox_inches="tight")
                    plt.close(fig)

                except Exception as e:
                    print(f"  [!] Skipped sample {i}, label {j} → {e}")

        except Exception as e:
            print(f"[!] Error processing sample {i} → {e}")


## STL SHAP functions

In [None]:
import shap
import numpy as np
import torch
import os
import matplotlib.pyplot as plt
from shap import Explanation


def truncate_texts(texts, tokenizer, max_len):
    truncated = []
    for text in texts:
        tokens = tokenizer.encode(text, truncation=True, max_length=max_len)
        truncated_text = tokenizer.decode(tokens, skip_special_tokens=True)
        truncated.append(truncated_text)
    return truncated

# Build SHAP Explainer for STL
def get_shap_explainer(model, tokenizer):
    def forward_func(inputs):
        model.eval()
        with torch.no_grad():
            outputs = model(**inputs)
            if hasattr(outputs, 'logits'):
                logits = outputs.logits
            elif isinstance(outputs, (tuple, list)):
                logits = outputs[0]
            else:
                logits = outputs
            return torch.sigmoid(logits).cpu().numpy()

    class Wrapper:
        def __call__(self, text):
            if isinstance(text, str):
                text = [text]
            elif isinstance(text, np.ndarray):
                text = text.tolist()
            elif isinstance(text, list) and isinstance(text[0], np.ndarray):
                text = [str(t) for t in text]

            encoded = tokenizer(
                text,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=MAX_LEN
            ).to(device)
            return forward_func(encoded)

    return shap.Explainer(Wrapper(), tokenizer, algorithm="permutation")

# run analysis
def explain_shap(model, tokenizer, texts, max_explain=5, visualize=False):
    texts = truncate_texts(texts[:max_explain], tokenizer, MAX_LEN)
    explainer = get_shap_explainer(model, tokenizer)
    shap_values = explainer(texts, max_evals=1500, silent=True)
    if visualize:
        shap.plots.text(shap_values)
    return shap_values

# save to PNG
def save_shap_waterfall_plots(shap_values, output_dir, task_name, seed, model_type, mlb, top_k=3):
    os.makedirs(output_dir, exist_ok=True)

    for i, sv in enumerate(shap_values):
        try:
            mean_abs = np.abs(sv.values).mean(axis=0)
            top_outputs = np.argsort(mean_abs)[-top_k:]

            for j in top_outputs:
                try:
                    single_sv = Explanation(
                        values=sv.values[:, j],
                        base_values=sv.base_values[j] if hasattr(sv.base_values, '__len__') else sv.base_values,
                        data=sv.data,
                        feature_names=sv.feature_names
                    )

                    label_name = mlb.classes_[j].replace(" ", "_")

                    ax = shap.plots.waterfall(single_sv, show=False)
                    fig = ax.figure
                    fname = os.path.join(
                        output_dir, f"{task_name}_{model_type}_seed{seed}_sample{i}_label_{label_name}.png"
                    )
                    fig.savefig(fname, bbox_inches="tight")
                    plt.close(fig)

                except Exception as e:
                    print(f"  [!] Skipped sample {i}, label {j} → {e}")

        except Exception as e:
            print(f"[!] Error processing sample {i} → {e}")


## Hyperparameters

In [None]:
import sys
sys.path.append(".")  # Ensure current directory is in path

from merged_optuna_script import objective_stl, objective_mtl, objective_mtl_adapter
import optuna
import pandas as pd

# === Fast Experiment Sweep ===
EXPERIMENTS = [
    {"setup": "stl", "task": "entity_framing", "encoder": "roberta-base"},
    {"setup": "stl", "task": "narrative_classification", "encoder": "roberta-base"},
    {"setup": "mtl", "task": None, "encoder": "roberta-base"},
    {"setup": "stl", "task": "entity_framing", "encoder": "distilbert-base-uncased"},
    {"setup": "stl", "task": "narrative_classification", "encoder": "distilbert-base-uncased"},
    {"setup": "mtl", "task": None, "encoder": "distilbert-base-uncased"},
    {"setup": "mtl_adapter", "task": None, "encoder": "roberta-base"},
    {"setup": "mtl_adapter", "task": None, "encoder": "distilbert-base-uncased"},
]

all_results = []

for config in EXPERIMENTS:
    setup = config["setup"]
    task = config["task"]
    encoder = config["encoder"]

    print(f"\n Starting Optuna Study → Setup: {setup.upper()} | Task: {task or 'MTL'} | Encoder: {encoder}")

    study = optuna.create_study(direction="maximize")

    if setup == "stl":
        study.optimize(lambda trial: objective_stl(trial, task_type=task, model_name=encoder), n_trials=3)
    elif setup == "mtl":
        study.optimize(lambda trial: objective_mtl(trial, model_name=encoder), n_trials=3)
    elif setup == "mtl_adapter":
        study.optimize(lambda trial: objective_mtl_adapter(trial, model_name=encoder), n_trials=3)
    else:
        raise ValueError(f"Unknown setup: {setup}")

    best_params = study.best_trial.params
    best_score = study.best_trial.value

    print(f"\n Best hyperparameters for {setup.upper()} | {task or 'MTL'} | {encoder}:")
    for k, v in best_params.items():
        print(f"  {k}: {v}")
    print(f"  score: {best_score:.4f}")

    all_results.append({
        "setup": setup,
        "task": task or "mtl",
        "encoder": encoder,
        "score": best_score,
        **best_params
    })

# === Save results ===
df = pd.DataFrame(all_results)
df.to_csv("optuna_quick_sweep_results_adapter.csv", index=False)
print("\n Saved results to optuna_quick_sweep_results.csv")


# Old Experiments [obsolete]

## Control Panel

In [None]:
# Choose a task for the pipeline below: "narrative_classification" or "entity_framing" or "multi_task" or "multi_task_adapter"
TASK = "entity_framing" # or "entity_framing" or "multi_task" or "multi_task_adapter

# select domains for training and testing: "UA"; "CC"; "UA", "CC";
TRAIN_DOMAIN = ["UA","CC"]
TEST_DOMAIN = ["UA", "CC"] # The test data comes from a separate dataset.
# The test data is always the same regardless of the domain we choose to train on. This is for consistency.

# select languages for training and testing: "ALL";"EN";"HI";"BG";"RU";"PT"
TRAIN_LANGUAGES = ["ALL"]
TEST_LANGUAGES = ["ALL"]

# Taxonomy Depth
TAXONOMY_DEPTH = "COARSE" # "COARSE" OR "FINE"

# Classifier Complexity
CLASSIFIER_COMPLEXITY = "FLAT" # "FLAT" OR "HIERARCHICAL"

# change the training hyperparameters here
MODEL_NAME = "distilbert-base-uncased" # OR  "distilbert-base-uncased" "roberta-base" ""FacebookAI/roberta-base""
MAX_LEN = 512
BATCH_SIZE = 8
EPOCHS = 4
LEARNING_RATE = 3e-5
MODEL_PATH = f"{TASK}_{'-'.join(TRAIN_DOMAIN)}_to_{'-'.join(TEST_DOMAIN)}.pt" # -- to save the model later

#tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)
#tokenizer = DebertaV2Tokenizer.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)


# debug mode -- reduced samples
DEBUG_MODE = False

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## UTILS Assemble Dataset

In [None]:
if TASK != "multi_task" and TASK != "multi_task_adapter":
    if TAXONOMY_DEPTH == 'FINE':
        if CLASSIFIER_COMPLEXITY == 'FLAT':
            df_train, df_val, df_test, y_train, y_val, y_test, mlb, TEXT_COL, LABEL_COL = prepare_data_STL_fine(
                TASK,
                TRAIN_DOMAIN,
                TEST_DOMAIN,
            )
        elif CLASSIFIER_COMPLEXITY == 'HIERARCHICAL':
            df_train, df_val, df_test, y_train, y_val, y_test, mlb, TEXT_COL, LABEL_COL, child_to_parent, label_to_index = prepare_data_STL_hierarchical(
                TASK,
                TRAIN_DOMAIN,
                TEST_DOMAIN,
            )


    elif TAXONOMY_DEPTH == 'COARSE':
        df_train, df_val, df_test, y_train, y_val, y_test, mlb, TEXT_COL, LABEL_COL = prepare_data_STL_coarse(
                TASK,
                TRAIN_DOMAIN,
                TEST_DOMAIN,
            )

    train_dataset = MultiLabelDataset(df_train[TEXT_COL].tolist(), y_train, tokenizer, MAX_LEN)
    val_dataset = MultiLabelDataset(df_val[TEXT_COL].tolist(), y_val, tokenizer, MAX_LEN)
    test_dataset = MultiLabelDataset(df_test[TEXT_COL].tolist(), y_test, tokenizer, MAX_LEN)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)
    num_classes = len(mlb.classes_)


elif TASK == "multi_task" or TASK == "multi_task_adapter":

    if TAXONOMY_DEPTH == 'FINE':

        if CLASSIFIER_COMPLEXITY == 'FLAT':
            (
                df_train_s1, df_val_s1, df_test_s1, y_train_s1, y_val_s1, y_test_s1, mlb_s1,
                df_train_s2, df_val_s2, df_test_s2, y_train_s2, y_val_s2, y_test_s2, mlb_s2,
                train_loader_s1, val_loader_s1, test_loader_s1,
                train_loader_s2, val_loader_s2, test_loader_s2,
                num_classes_dict
            ) = prepare_data_MTL_fine_flat(
                TASK,
                train_domains=TRAIN_DOMAIN,
                test_domains=TEST_DOMAIN,
                train_languages=TRAIN_LANGUAGES,
                model_name=MODEL_NAME,
                max_len=MAX_LEN,
                batch_size=BATCH_SIZE
            )

        elif CLASSIFIER_COMPLEXITY == 'HIERARCHICAL':
            (
                df_train_s1, df_val_s1, df_test_s1, y_train_s1, y_val_s1, y_test_s1, mlb_s1,
                df_train_s2, df_val_s2, df_test_s2, y_train_s2, y_val_s2, y_test_s2, mlb_s2,
                train_loader_s1, val_loader_s1, test_loader_s1,
                train_loader_s2, val_loader_s2, test_loader_s2,
                num_classes_dict,
                child_to_parent_map,
                label_to_index_map
            ) = prepare_data_MTL_hierarchical(
                TASK,
                train_domains=TRAIN_DOMAIN,
                test_domains=TEST_DOMAIN,
                train_languages=TRAIN_LANGUAGES,
                model_name=MODEL_NAME,
                max_len=MAX_LEN,
                batch_size=BATCH_SIZE
            )

    elif TAXONOMY_DEPTH == 'COARSE':
        (
            df_train_s1, df_val_s1, df_test_s1, y_train_s1, y_val_s1, y_test_s1, mlb_s1,
            df_train_s2, df_val_s2, df_test_s2, y_train_s2, y_val_s2, y_test_s2, mlb_s2,
            train_loader_s1, val_loader_s1, test_loader_s1,
            train_loader_s2, val_loader_s2, test_loader_s2,
            num_classes_dict
        ) = prepare_data_MTL_coarse(
            TASK,
            train_domains=TRAIN_DOMAIN,
            test_domains=TEST_DOMAIN,
            train_languages=TRAIN_LANGUAGES,
            model_name=MODEL_NAME,
            max_len=MAX_LEN,
            batch_size=BATCH_SIZE
        )




In [None]:
# Fine-Fine
prepare_data_MTL_mixed(..., "fine", "fine")

# Fine-Coarse
prepare_data_MTL_mixed(..., "fine", "coarse")

# Coarse-Fine
prepare_data_MTL_mixed(..., "coarse", "fine")

# Coarse-Coarse
prepare_data_MTL_mixed(..., "coarse", "coarse")


In [None]:
(
    df_train_s1, df_val_s1, df_test_s1, y_train_s1, y_val_s1, y_test_s1, mlb_s1,
    df_train_s2, df_val_s2, df_test_s2, y_train_s2, y_val_s2, y_test_s2, mlb_s2,
    train_loader_s1, val_loader_s1, test_loader_s1,
    train_loader_s2, val_loader_s2, test_loader_s2,
    num_classes_dict
) = prepare_data_MTL_mixed(
    TASK,
    train_domains=TRAIN_DOMAIN,
    test_domains=TEST_DOMAIN,
    train_languages=TRAIN_LANGUAGES,
    model_name=MODEL_NAME,
    max_len=MAX_LEN,
    batch_size=BATCH_SIZE,
    granularity_s1="coarse",
    granularity_s2="fine"
)

## Training Loop

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if TASK != "multi_task" and TASK != "multi_task_adapter":
    print("\n>>> Running Single-Task (no adapter) Model <<<")
    model = TransformerClassifier(MODEL_NAME, num_classes).to(device)

    if CLASSIFIER_COMPLEXITY == 'FLAT':
        trained_model = train_single_task_model(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            y_val=y_val,
            MODEL_PATH=MODEL_PATH,
            LEARNING_RATE=LEARNING_RATE,
            EPOCHS=EPOCHS,
            device=device,
            predict_proba=eval_util.predict_proba,
            evaluate_threshold_sweep=eval_util.evaluate_threshold_sweep
        )
        trained_model.load_state_dict(torch.load(MODEL_PATH))
        trained_model.to(device)

    elif CLASSIFIER_COMPLEXITY == 'HIERARCHICAL':
        trained_model = train_hierarchical_classifier(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            y_val=y_val,
            MODEL_PATH=MODEL_PATH,
            child_to_parent=child_to_parent,
            label_to_index=label_to_index,
            predict_proba=eval_util.predict_proba,
            evaluate_threshold_sweep=eval_util.evaluate_threshold_sweep,
            LEARNING_RATE=LEARNING_RATE,
            EPOCHS=EPOCHS
        )
        trained_model.load_state_dict(torch.load(MODEL_PATH))
        trained_model.to(device)

elif TASK == "multi_task":
    print("\n>>> Running Multi-Task (no adapter) Model <<<")
    task_classes = {
        "narrative_classification": y_train_s2.shape[1],
        "entity_framing": y_train_s1.shape[1]
    }
    model = MultiTaskTransformer(MODEL_NAME, task_classes).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.BCEWithLogitsLoss()

    if CLASSIFIER_COMPLEXITY == 'FLAT':
        train_mtl_flat(
            model=model,
            loaders={
                "narrative_classification": train_loader_s2,
                "entity_framing": train_loader_s1
            },
            val_data={
                "narrative_classification": (val_loader_s2, df_val_s2, y_val_s2, mlb_s2),
                "entity_framing": (val_loader_s1, df_val_s1, y_val_s1, mlb_s1)
            },
            mlbs={
                "narrative_classification": mlb_s2,
                "entity_framing": mlb_s1
            },
            optimizer=optimizer,
            criterion=criterion,
            device=device,
            epochs=EPOCHS,
            train_domain=TRAIN_DOMAIN,
            test_domain=TEST_DOMAIN
        )

    elif CLASSIFIER_COMPLEXITY == 'HIERARCHICAL':
        train_mtl_hierarchical(
            model=model,
            loaders={
                "narrative_classification": train_loader_s2,
                "entity_framing": train_loader_s1
            },
            val_data={
                "narrative_classification": (val_loader_s2, df_val_s2, y_val_s2, mlb_s2),
                "entity_framing": (val_loader_s1, df_val_s1, y_val_s1, mlb_s1)
            },
            child_to_parent_map=child_to_parent_map,
            label_to_index_map=label_to_index_map,
            optimizer=optimizer,
            criterion=criterion,
            device=device,
            epochs=EPOCHS,
            train_domain=TRAIN_DOMAIN,
            test_domain=TEST_DOMAIN
        )


    # Re-load best saved model per task
    model.load_state_dict(torch.load(f"entity_framing_MTL_{'-'.join(TRAIN_DOMAIN)}_to_{'-'.join(TEST_DOMAIN)}.pt"), strict=False)
    model.load_state_dict(torch.load(f"narrative_classification_MTL_{'-'.join(TRAIN_DOMAIN)}_to_{'-'.join(TEST_DOMAIN)}.pt"), strict=False)
    trained_model = model


elif TASK == "multi_task_adapter":
    print("\n>>> Running Multi-Task Adapter Model <<<")

    task_classes = {
        "narrative_classification": y_train_s2.shape[1],
        "entity_framing": y_train_s1.shape[1]
    }

    model = AdapterMultiTaskTransformer(
        model_name=MODEL_NAME,
        num_classes_dict=task_classes,
        adapter_dim=128
    ).to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.BCEWithLogitsLoss()

    if CLASSIFIER_COMPLEXITY == 'FLAT':
        train_mtl_flat(
            model=model,
            loaders={
                "narrative_classification": train_loader_s2,
                "entity_framing": train_loader_s1
            },
            val_data={
                "narrative_classification": (val_loader_s2, df_val_s2, y_val_s2, mlb_s2),
                "entity_framing": (val_loader_s1, df_val_s1, y_val_s1, mlb_s1)
            },
            mlbs={
                "narrative_classification": mlb_s2,
                "entity_framing": mlb_s1
            },
            optimizer=optimizer,
            criterion=criterion,
            device=device,
            epochs=EPOCHS,
            train_domain=TRAIN_DOMAIN,
            test_domain=TEST_DOMAIN
        )

    # load best saved models
    model.load_state_dict(torch.load(f"entity_framing_MTL_{'-'.join(TRAIN_DOMAIN)}_to_{'-'.join(TEST_DOMAIN)}.pt"), strict=False)
    model.load_state_dict(torch.load(f"narrative_classification_MTL_{'-'.join(TRAIN_DOMAIN)}_to_{'-'.join(TEST_DOMAIN)}.pt"), strict=False)
    trained_model = model


## Evaluation

In [None]:
# ==========================
# EVALUATION (Single Task)
# ==========================
if TASK != "multi_task" and TASK != "multi_task_adapter":
    print(f"\nEvaluating Single-Task Model ({TASK})")

    if CLASSIFIER_COMPLEXITY == 'FLAT':
        results_domain = eval_util.evaluate_per_domain_flat(
            trained_model,
            val_loader, df_val.reset_index(drop=True),
            test_loader, df_test.reset_index(drop=True),
            mlb,
            device=device
        )

        results_class = eval_util.evaluate_per_class_flat(
            trained_model,
            test_loader,
            df_test.reset_index(drop=True),
            mlb,
            device=device,
            label="TEST"
        )


    elif CLASSIFIER_COMPLEXITY == 'HIERARCHICAL':
        results_hierarchical = eval_util.evaluate_and_compare_hierarchical(
            model=trained_model,
            val_loader=val_loader,
            val_df=df_val.reset_index(drop=True),
            val_targets=y_val,
            test_loader=test_loader,
            test_df=df_test.reset_index(drop=True),
            test_targets=y_test,
            mlb=mlb,
            device=device,
            child_to_parent=child_to_parent,
            label_to_index=label_to_index
        )

# ==========================
# EVALUATION (Multi-Task)
# ==========================
elif TASK == "multi_task" or TASK == "multi_task_adapter":
    print(f"\nEvaluating Multi-Task Model ({TASK})")
    if CLASSIFIER_COMPLEXITY == 'FLAT':
        task_loaders = {
            "narrative_classification": test_loader_s2,
            "entity_framing": test_loader_s1,
        }

        task_dfs = {
            "narrative_classification": df_test_s2,
            "entity_framing": df_test_s1,
        }

        task_targets = {
            "narrative_classification": y_test_s2,
            "entity_framing": y_test_s1,
        }

        task_mlbs = {
            "narrative_classification": mlb_s2,
            "entity_framing": mlb_s1,
        }

        results_mtl = eval_util.evaluate_mtl_all_tasks(
            model=trained_model,
            task_loaders=task_loaders,
            task_dfs=task_dfs,
            task_targets=task_targets,
            task_mlbs=task_mlbs,
            domain_list=TRAIN_DOMAIN,
            device=device,
            load_from_disk=False
        )


    elif CLASSIFIER_COMPLEXITY == 'HIERARCHICAL':
        eval_util.evaluate_mtl_hierarchical_all_tasks(
            model=trained_model,
            test_loaders={
                "narrative_classification": test_loader_s2,
                "entity_framing": test_loader_s1
            },
            df_tests={
                "narrative_classification": df_test_s2,
                "entity_framing": df_test_s1
            },
            y_tests={
                "narrative_classification": y_test_s2,
                "entity_framing": y_test_s1
            },
            mlbs={
                "narrative_classification": mlb_s2,
                "entity_framing": mlb_s1
            },
            child_to_parent_map=child_to_parent_map,
            label_to_index_map=label_to_index_map,
            device=device
        )
