In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from datasets import load_dataset, Dataset
import torch
import numpy as np
import pandas as pd


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
from google.colab import files
import pandas as pd

uploaded = files.upload()

test_file  = "ecthr_b_test_preprocessed_WA.csv"

test_df  = pd.read_csv(test_file)

print("Test shape:", test_df.shape)



Saving ecthr_b_test_preprocessed_WA.csv to ecthr_b_test_preprocessed_WA.csv
Test shape: (1000, 2)


In [None]:
test_df_copy  = test_df.copy()

In [None]:
print("Test shape:", test_df.shape)


Test shape: (1000, 2)


In [None]:
import numpy as np
import ast

NUM_LABELS = 10

def labels_to_multihot_safe(label_list_str):
    vec = np.zeros(NUM_LABELS, dtype=float)
    try:

        if isinstance(label_list_str, str):
            label_list = ast.literal_eval(label_list_str.replace(' ', ','))
        else:
            label_list = label_list_str

        if isinstance(label_list, list):
            for label in label_list:
                try:
                    label_int = int(label)
                    if 0 <= label_int < NUM_LABELS:
                        vec[label_int] = 1.0
                    else:
                        print(f"Warning: label {label_int} out of range")
                except ValueError:
                    print(f"Warning: invalid label {label}")
        else:
            print(f"Warning: label_list is not a list after evaluation -> {label_list}")
    except (SyntaxError, ValueError) as e:
        print(f"Error evaluating label string: {label_list_str} - {e}")

    return vec

#
test_df_copy['labels_multihot'] = test_df_copy['labels'].apply(labels_to_multihot_safe)


for i in range(5):
    print(f"Row {i} original labels: {test_df_copy['labels'].iloc[i]}")
    print(f"Row {i} multi-hot: {test_df_copy['labels_multihot'].iloc[i]}")


Row 0 original labels: [6]
Row 0 multi-hot: [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
Row 1 original labels: [4]
Row 1 multi-hot: [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
Row 2 original labels: [3]
Row 2 multi-hot: [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
Row 3 original labels: [3]
Row 3 multi-hot: [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
Row 4 original labels: [1 3]
Row 4 multi-hot: [0. 1. 0. 1. 0. 0. 0. 0. 0. 0.]


In [None]:

from datasets import Dataset
from transformers import AutoTokenizer

test_dataset = Dataset.from_pandas(test_df_copy[["text", "labels_multihot"]])

model_path_ft = "/content/drive/MyDrive/ecthr_b_finetuned_model"
tokenizer = AutoTokenizer.from_pretrained(model_path_ft)

def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True, max_length=512)

test_dataset = test_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.rename_column("labels_multihot", "labels")
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import torch

def compute_metrics(pred):
    logits, labels = pred
    preds = torch.sigmoid(torch.tensor(logits)).numpy()
    preds = (preds >= 0.5).astype(int)
    labels = labels.astype(int)

    return {
        "accuracy": accuracy_score(labels, preds),
        "precision_micro": precision_score(labels, preds, average="micro", zero_division=0),
        "recall_micro": recall_score(labels, preds, average="micro", zero_division=0),
        "f1_micro": f1_score(labels, preds, average="micro", zero_division=0),
        "f1_macro": f1_score(labels, preds, average="macro", zero_division=0),
    }

In [None]:

# Fine-tuned Model & Evaluate

from transformers import AutoModelForSequenceClassification, Trainer

ft_model = AutoModelForSequenceClassification.from_pretrained(model_path_ft, num_labels=NUM_LABELS)
ft_trainer = Trainer(model=ft_model)

ft_preds = ft_trainer.predict(test_dataset)
ft_results = compute_metrics((ft_preds.predictions, ft_preds.label_ids))

print("Fine-tuned results:", ft_results)


Fine-tuned results: {'accuracy': 0.521, 'precision_micro': 0.7863829787234042, 'recall_micro': 0.6439024390243903, 'f1_micro': 0.7080459770114943, 'f1_macro': 0.635585521824499}


In [None]:
#  pretrained model directly from HuggingFace
base_model_name = "bert-base-uncased"
base_model = AutoModelForSequenceClassification.from_pretrained(base_model_name, num_labels=NUM_LABELS)
base_trainer = Trainer(model=base_model)

base_preds = base_trainer.predict(test_dataset)
base_results = compute_metrics((base_preds.predictions, base_preds.label_ids))

print("Baseline results:", base_results)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Baseline results: {'accuracy': 0.003, 'precision_micro': 0.2448566610455312, 'recall_micro': 0.5059233449477352, 'f1_micro': 0.33, 'f1_macro': 0.12401490266200606}


In [None]:
# =========================
# 7. Compare & Save Results
# =========================
comparison = pd.DataFrame([ft_results, base_results], index=["Fine-tuned", "Baseline"])
print(comparison)

# Save to Drive
from google.colab import drive
drive.mount("/content/drive")
comparison.to_csv("/content/drive/MyDrive/ecthr_b_baseline_vs_ft_augnito.csv")
print("Comparison saved to Drive!")


            accuracy  precision_micro  recall_micro  f1_micro  f1_macro
Fine-tuned     0.521         0.786383      0.643902  0.708046  0.635586
Baseline       0.003         0.244857      0.505923  0.330000  0.124015
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Comparison saved to Drive!
