<a href="https://colab.research.google.com/github/hassan09070/semeval/blob/main/Task1_semevel_bert_multilingual.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Installing libraries
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.6


In [1]:
#import data
import pandas as pd
from datasets import Dataset

#task config
subtask = "subtask_1"#don't change
task = "task1"#don't change
langs = ["eng","zho"] #chang the language you want to test
domains = ["restaurant","laptop"] #change what domain you want to test
train_dfs = []

for lang in langs:
    for domain in domains:
        train_url = f"https://raw.githubusercontent.com/DimABSA/DimABSA2026/refs/heads/main/task-dataset/track_a/{subtask}/{lang}/{lang}_{domain}_train_alltasks.jsonl"

        # Read directly into DataFrames
        train_df = pd.read_json(train_url, lines=True)

        # Store for later concatenation
        train_dfs.append(train_df)

# Combine all into single DataFrames
df = pd.concat(train_dfs, ignore_index=True)

rows = []
for _, row in df.iterrows():
    text = row["Text"]
    for quad in row["Quadruplet"]:
        aspect = quad["Aspect"]
        va = quad["VA"]
        valence, arousal = map(float, va.split("#"))
        rows.append({
            "Text": text,
            "Aspect": aspect,
            "VA": va,
            "Valence": valence,
            "Arousal": arousal
        })

raw_datasets = Dataset.from_pandas(pd.DataFrame(rows))


In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

checkpoint = "bert-base-multilingual-cased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

def tokenize_function(batch):
    tokenized = tokenizer(
        list(batch["Aspect"]),
        list(batch["Text"]),
        truncation=True
    )
    valence = [float(v.split("#")[0]) for v in batch["VA"]]
    arousal = [float(v.split("#")[1]) for v in batch["VA"]]
    tokenized["labels"] = list(zip(valence, arousal))
    return tokenized

# Batched map works now
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

# Data collator for dynamic padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

print(tokenized_datasets[0])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

Map:   0%|          | 0/24457 [00:00<?, ? examples/s]

{'Text': "ca n ' t wait wait for my next visit .", 'Aspect': 'NULL', 'VA': '6.75#6.38', 'Valence': 6.75, 'Arousal': 6.38, 'input_ids': [101, 151, 100673, 11369, 102, 11135, 182, 112, 188, 83279, 83279, 10142, 15127, 13451, 27541, 119, 102], 'token_type_ids': [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': [6.75, 6.38]}


In [3]:
# Drop raw columns we don't need for training
tokenized_datasets = tokenized_datasets.remove_columns(
    ["Text", "Aspect", "VA", "Valence", "Arousal"]
)

# Convert dataset to PyTorch tensors
tokenized_datasets.set_format("torch")

# Check final columns
print(tokenized_datasets.column_names)

['input_ids', 'token_type_ids', 'attention_mask', 'labels']


In [4]:
# Split into train/validation/test
# First: train + temp (where temp will be split further into val + test)
dataset_splits = tokenized_datasets.train_test_split(test_size=0.2, seed=42)

train_dataset = dataset_splits["train"]
temp_dataset = dataset_splits["test"]

# Now split temp into validation and test (50/50 → 10% val, 10% test overall)
temp_splits = temp_dataset.train_test_split(test_size=0.5, seed=42)

eval_dataset = temp_splits["train"]   # validation set
test_dataset = temp_splits["test"]    # final test set

# Make DataLoaders
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    train_dataset, shuffle=True, batch_size=8, collate_fn=data_collator
)
eval_dataloader = DataLoader(
    eval_dataset, batch_size=8, collate_fn=data_collator
)
test_dataloader = DataLoader(
    test_dataset, batch_size=8, collate_fn=data_collator
)

In [5]:
for batch in train_dataloader:
    break
{k: v.shape for k, v in batch.items()}

{'input_ids': torch.Size([8, 25]),
 'token_type_ids': torch.Size([8, 25]),
 'attention_mask': torch.Size([8, 25]),
 'labels': torch.Size([8, 2])}

In [6]:
from transformers import AutoConfig, AutoModelForSequenceClassification

# Tell HF to treat this as regression with 2 outputs (Valence, Arousal)
config = AutoConfig.from_pretrained(checkpoint, num_labels=2, problem_type="regression")
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, config=config)


model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# Pass the batch through the model to get outputs (predictions + loss)
outputs = model(**batch)

# Print:
#  - outputs.loss → the current Mean Squared Error (MSE) loss
#  - outputs.logits.shape → the shape of the predictions tensor (batch_size × 2)
print(outputs.loss, outputs.logits.shape)

tensor(39.3039, grad_fn=<MseLossBackward0>) torch.Size([8, 2])


In [8]:
from torch.optim import AdamW

# Define optimizer that updates the model's parameters
optimizer = AdamW(model.parameters(), lr=1e-5)
# ⚠️ The only thing you might tune later:
# Learning rate (lr) → try 5e-5, 3e-5, or 1e-5 to see which gives better results.
# Weight decay → if overfitting, you can add e.g. weight_decay=0.01.

In [9]:
from transformers import get_scheduler

# Train for more epochs since dataset is small
num_epochs = 5   # you can try 5, 8, or even 10

# Total number of training steps
num_training_steps = num_epochs * len(train_dataloader)

# Warmup = 10% of training steps
num_warmup_steps = int(0.1 * num_training_steps)

# Define learning rate scheduler
lr_scheduler = get_scheduler(
    "linear",                # linear decay schedule
    optimizer=optimizer,
    num_warmup_steps=num_warmup_steps,   # gradual warmup
    num_training_steps=num_training_steps,
)

print(f"Total steps: {num_training_steps}, Warmup steps: {num_warmup_steps}")


Total steps: 12230, Warmup steps: 1223


In [10]:
import torch

# Step 1: Choose device
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# Step 2: Move the model to the chosen device
model.to(device)
device


device(type='cuda')

In [11]:
from tqdm.auto import tqdm

# Create a progress bar for visual feedback
progress_bar = tqdm(range(num_training_steps))

model.train()  # Set model to training mode
for epoch in range(num_epochs):
    for batch in train_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}  # Move inputs & labels to GPU/CPU
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()  # Compute

        optimizer.step()       # Update model weights using gradients
        lr_scheduler.step()    # Update learning rate according to schedule
        optimizer.zero_grad()  # Reset gradients for next step
        progress_bar.update(1) # Advance progress bar




  0%|          | 0/12230 [00:00<?, ?it/s]

In [12]:
import torch
import numpy as np

model.eval()  # evaluation mode

valence_preds_all = []
arousal_preds_all = []
valence_refs_all = []
arousal_refs_all = []

for batch in eval_dataloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(**batch)

    predictions = outputs.logits  # [batch_size, 2]
    references = batch["labels"]   # [batch_size, 2]

    # Separate valence and arousal
    valence_preds_all.append(predictions[:, 0].cpu().numpy())
    arousal_preds_all.append(predictions[:, 1].cpu().numpy())
    valence_refs_all.append(references[:, 0].cpu().numpy())
    arousal_refs_all.append(references[:, 1].cpu().numpy())

# Concatenate all batches
valence_preds_all = np.concatenate(valence_preds_all)
arousal_preds_all = np.concatenate(arousal_preds_all)
valence_refs_all = np.concatenate(valence_refs_all)
arousal_refs_all = np.concatenate(arousal_refs_all)

# Compute MSE and MAE for each dimension
valence_mse = ((valence_preds_all - valence_refs_all) ** 2).mean()
arousal_mse = ((arousal_preds_all - arousal_refs_all) ** 2).mean()

valence_mae = np.abs(valence_preds_all - valence_refs_all).mean()
arousal_mae = np.abs(arousal_preds_all - arousal_refs_all).mean()

# ✅ Add RMSE for each dimension
valence_rmse = np.sqrt(valence_mse)
arousal_rmse = np.sqrt(arousal_mse)

print(f"Valence → MSE: {valence_mse:.4f}, MAE: {valence_mae:.4f}, RMSE: {valence_rmse:.4f}")
print(f"Arousal → MSE: {arousal_mse:.4f}, MAE: {arousal_mae:.4f}, RMSE: {arousal_rmse:.4f}")


Valence → MSE: 0.4823, MAE: 0.4568, RMSE: 0.6945
Arousal → MSE: 0.3725, MAE: 0.4486, RMSE: 0.6104


In [13]:
# Normalized DimASR RMSE
D_max = 8**2 + 8**2  # 128

# Clip predictions to [1,9] and round to 2 decimals
valence_preds_clipped = np.clip(np.round(valence_preds_all, 2), 1, 9)
arousal_preds_clipped = np.clip(np.round(arousal_preds_all, 2), 1, 9)

# Compute squared distances in VA space
squared_distances = (valence_preds_clipped - valence_refs_all) ** 2 + \
                    (arousal_preds_clipped - arousal_refs_all) ** 2

# Normalized RMSE
dimasr_rmse = np.sqrt((squared_distances / D_max).mean())

print(f"DimASR Normalized RMSE: {dimasr_rmse:.4f}")


DimASR Normalized RMSE: 0.0817


In [15]:
import torch
import numpy as np
import math
from scipy.stats import pearsonr

# -------------------------
# 1️⃣ Collect predictions
# -------------------------
def get_predictions(model, dataloader, device):
    """
    Collects predictions and gold labels from a model trained for Valence–Arousal regression.
    Works with Hugging Face AutoModelForSequenceClassification.
    """
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for batch in dataloader:
            # Extract labels before moving tensors
            labels = batch["labels"].cpu().numpy()

            # Move the rest to device (input_ids, attention_mask, etc.)
            inputs = {k: v.to(device) for k, v in batch.items() if k != "labels"}

            # Get model outputs
            outputs = model(**inputs)
            preds = outputs.logits.cpu().numpy()  # shape [batch_size, 2]

            all_preds.append(preds)
            all_labels.append(labels)

    preds = np.vstack(all_preds)
    labels = np.vstack(all_labels)

    # Split into valence and arousal
    pred_v, pred_a = preds[:, 0], preds[:, 1]
    gold_v, gold_a = labels[:, 0], labels[:, 1]

    return pred_v, pred_a, gold_v, gold_a



# -------------------------
# 2️⃣ Evaluate predictions
# -------------------------
def evaluate_va_predictions(pred_v, pred_a, gold_v, gold_a):
    """
    Computes PCC (correlation) and RMSE for Valence–Arousal regression.
    """
    # Check range validity
    if not (np.all((1 <= pred_v) & (pred_v <= 9)) and np.all((1 <= pred_a) & (pred_a <= 9))):
        print("⚠️ Warning: Some predicted values are outside the expected [1,9] range.")

    # Pearson correlation
    pcc_v = pearsonr(pred_v, gold_v)[0]
    pcc_a = pearsonr(pred_a, gold_a)[0]

    # Combined VA RMSE
    gold_va = gold_v + gold_a
    pred_va = pred_v + pred_a
    rmse_va = math.sqrt(np.mean((gold_va - pred_va) ** 2))

    # Individual RMSEs (optional but useful)
    rmse_v = math.sqrt(np.mean((gold_v - pred_v) ** 2))
    rmse_a = math.sqrt(np.mean((gold_a - pred_a) ** 2))

    return {
        "PCC_V": float(pcc_v),
        "PCC_A": float(pcc_a),
        "RMSE_V": float(rmse_v),
        "RMSE_A": float(rmse_a),
        "RMSE_VA": float(rmse_va),
    }


# -------------------------
# 3️⃣ Run evaluation
# -------------------------
pred_v, pred_a, gold_v, gold_a = get_predictions(model, eval_dataloader, device)
eval_results = evaluate_va_predictions(pred_v, pred_a, gold_v, gold_a)

print(f"{checkpoint} dev_eval: {eval_results}")


bert-base-multilingual-cased dev_eval: {'PCC_V': 0.8743869662284851, 'PCC_A': 0.8016546368598938, 'RMSE_V': 0.6945070037214014, 'RMSE_A': 0.6103572997777225, 'RMSE_VA': 1.0866932057628929}


In [16]:
#code to save model in local
save_path = "./bert-base-multilingual-cased-finetuned-task1-semeval"

model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

('./bert-base-multilingual-cased-finetuned-task1-semeval/tokenizer_config.json',
 './bert-base-multilingual-cased-finetuned-task1-semeval/special_tokens_map.json',
 './bert-base-multilingual-cased-finetuned-task1-semeval/vocab.txt',
 './bert-base-multilingual-cased-finetuned-task1-semeval/added_tokens.json',
 './bert-base-multilingual-cased-finetuned-task1-semeval/tokenizer.json')

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Paths
finetuned_path = "./bert-base-uncased-finetuned-task1-semeval"
fresh_checkpoint = "bert-base-uncased"

# Load fine-tuned model + tokenizer
finetuned_model = AutoModelForSequenceClassification.from_pretrained(finetuned_path)
finetuned_tokenizer = AutoTokenizer.from_pretrained(finetuned_path)

# Load fresh base BERT (not fine-tuned)
fresh_model = AutoModelForSequenceClassification.from_pretrained(
    fresh_checkpoint,
    num_labels=2,            # same as your fine-tuned
    problem_type="regression"
)
fresh_tokenizer = AutoTokenizer.from_pretrained(fresh_checkpoint)

# Example input
text = "for now i ' m okay with upping the experience & device to 3 out of 5 stars "
aspect = "device"

# Tokenize the same way you trained
inputs = finetuned_tokenizer(aspect, text, return_tensors="pt", truncation=True)

# Predictions
with torch.no_grad():
    finetuned_preds = finetuned_model(**inputs).logits.squeeze().tolist()
    fresh_preds = fresh_model(**inputs).logits.squeeze().tolist()

print("Input Text:", text)
print("Aspect:", aspect)
print("Fine-tuned predictions → Valence, Arousal:", finetuned_preds)
print("Fresh pretrained predictions → Valence, Arousal:", fresh_preds)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Input Text: for now i ' m okay with upping the experience & device to 3 out of 5 stars 
Aspect: device
Fine-tuned predictions → Valence, Arousal: [5.674418926239014, 5.606290340423584]
Fresh pretrained predictions → Valence, Arousal: [0.11778605729341507, -0.6561615467071533]


In [18]:
from huggingface_hub import notebook_login
notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [19]:
from huggingface_hub import HfApi

api = HfApi()
repo_id = "hassanshahzad2003/bert-base-multilingual-cased-finetuned-task1-semeval"

# Create repo if not exists
api.create_repo(repo_id=repo_id, repo_type="model", private=False)

# Upload all files in the folder
api.upload_folder(
    folder_path="/content/bert-base-multilingual-cased-finetuned-task1-semeval",
    repo_id=repo_id
)


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...semeval/model.safetensors:   0%|          |  556kB /  711MB            

CommitInfo(commit_url='https://huggingface.co/hassanshahzad2003/bert-base-multilingual-cased-finetuned-task1-semeval/commit/3b15287ee0a87176e07adf8b0a140779b0861e9c', commit_message='Upload folder using huggingface_hub', commit_description='', oid='3b15287ee0a87176e07adf8b0a140779b0861e9c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/hassanshahzad2003/bert-base-multilingual-cased-finetuned-task1-semeval', endpoint='https://huggingface.co', repo_type='model', repo_id='hassanshahzad2003/bert-base-multilingual-cased-finetuned-task1-semeval'), pr_revision=None, pr_num=None)

In [None]:
# from transformers import AutoTokenizer, AutoModelForSequenceClassification

# # Replace with your repo name
# model_name = "hassanshahzad2003/bert-base-uncased-finetuned-task1-semeval"

# # Load tokenizer and model from Hugging Face Hub
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSequenceClassification.from_pretrained(model_name)

# # Example usage
# text = "This laptop has amazing battery life!"
# inputs = tokenizer(text, return_tensors="pt")

# with torch.no_grad():
#     outputs = model(**inputs)

# print(outputs.logits)


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/664 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tensor([[7.9065, 8.0404]])
