### References
- Multi-Label Classification Model From Scratch: Step-by-Step Tutorial (https://huggingface.co/blog/Valerii-Knowledgator/multi-label-classification)
- https://github.com/NielsRogge/Transformers-Tutorials/blob/master/BERT/Fine_tuning_BERT_(and_friends)_for_multi_label_text_classification.ipynb
- https://github.com/huggingface/notebooks/blob/main/examples/text_classification.ipynb

# Libraries

In [1]:
%pip install -q -U datasets transformers accelerate sentencepiece

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.3/179.3 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.[0m[31m
[0m

In [2]:
import os
import random
import numpy as np
import torch
import transformers
from pprint import pprint
from datetime import datetime
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, DataCollatorWithPadding, Trainer, TrainingArguments
from sklearn.metrics import f1_score

# Config

In [51]:
seed = 42
lang = 'sun'
project_name = f'RoBERTa-Base-SE2025T11A-{lang}-v{datetime.now().strftime("%Y%m%d%H%M%S")}'
print("Project name:", project_name)

# hf_model_id = 'bhadresh-savani/roberta-base-emotion'
# hf_model_id = 'bhadresh-savani/bert-base-uncased-emotion'
# hf_model_id = 'w11wo/sundanese-bert-base-emotion-classifier'
hf_model_id = 'w11wo/sundanese-roberta-base-emotion-classifier'
# hf_model_id = 'w11wo/sundanese-roberta-base'
# hf_model_id = 'alxxtexxr/XLM-RoBERTa-Base-Sundanese-Emotion-Classifier-v20241222170134'
hf_tokenizer_id = hf_model_id
hf_data_id = 'alxxtexxr/SemEval2025-Task11-Dataset'
# hf_data_config = 'track_a_sun_70_15_15_stratify_v2'
hf_data_config = 'track_a_sun_go_emotions_70_15_15_stratify_v2'
# hf_data_config = 'track_a_sun_go_emotions_70_15_15_balanced'

num_epochs = 2

Project name: RoBERTa-Base-SE2025T11A-sun-v20250108145152


In [31]:
def set_seed(seed):
    # Set random seed for NumPy
    np.random.seed(seed)

    # Set random seed for Torch
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if using multi-GPU
    torch.backends.cudnn.deterministic = True  # Ensures deterministic results
    torch.backends.cudnn.benchmark = False  # Avoids non-deterministic algorithms

    # Set random seed for Transformers
    transformers.set_seed(seed)

    # Optionally set random seed for sklearn and Python's own random module
    random.seed(seed)

    # Set random seed for os
    os.environ['PYTHONHASHSEED'] = str(seed)

    print(f"Random seed set to: {seed}")

set_seed(seed)

Random seed set to: 42


# Data

## Load Data

In [32]:
datasets = load_dataset(hf_data_id, hf_data_config)

cols = list(datasets['train'].features)
emotion_cols = [col for col in cols if col not in ['Unnamed: 0', 'text', 'emotion']]
splits = [*datasets.keys()]

print("Data columns:", cols)
print("Emotions columns:", emotion_cols)

Data columns: ['text', 'emotion', 'marah', 'jijik', 'takut', 'senang', 'sedih', 'terkejut', 'biasa']
Emotions columns: ['marah', 'jijik', 'takut', 'senang', 'sedih', 'terkejut', 'biasa']


In [33]:
# Reorder emotion columns
emotion_cols = ['senang', 'marah', 'sedih', 'takut', 'jijik', 'terkejut', 'biasa']

In [34]:
class2id = {class_:id for id, class_ in enumerate(emotion_cols)}
id2class = {id:class_ for class_, id in class2id.items()}

print("Class to ID:")
pprint(class2id, width=1)
print()
print("ID to Class:")
pprint(id2class, width=1)

Class to ID:
{'biasa': 6,
 'jijik': 4,
 'marah': 1,
 'sedih': 2,
 'senang': 0,
 'takut': 3,
 'terkejut': 5}

ID to Class:
{0: 'senang',
 1: 'marah',
 2: 'sedih',
 3: 'takut',
 4: 'jijik',
 5: 'terkejut',
 6: 'biasa'}


## Preprocess Data

In [35]:
tokenizer = AutoTokenizer.from_pretrained(hf_tokenizer_id)

tokenizer_config.json:   0%|          | 0.00/318 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/786k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/445k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.33M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [36]:
def one_hot_encode_emotion(emotion, emotion_cols):
    emotions = emotion.replace(" ", "").split(",")
    one_hot_emotion = [1.0 if emotion_col in emotions else 0.0 for emotion_col in emotion_cols] # Ensure that the label is float, not int
    return one_hot_emotion

def preprocess_function(data):
   text = data['text']
   emotion = data['emotion']
   labels = one_hot_encode_emotion(emotion, emotion_cols)
   data = tokenizer(text, truncation=True)
   data['labels'] = labels
   return data

tokenized_datasets = {split: datasets[split].map(preprocess_function) for split in splits}

Map:   0%|          | 0/1765 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Map:   0%|          | 0/365 [00:00<?, ? examples/s]

Map:   0%|          | 0/365 [00:00<?, ? examples/s]

In [37]:
# Sanity check
data = tokenized_datasets['train'][5]

print("Text:", data['text'])
print("Emotion(s):", data['emotion'])
print("Labels:", data['labels'], '-->', emotion_cols)

Text: Sigana anjeun lalaki séjén anu henteu resep ka legion ;D
Emotion(s): marah, jijik
Labels: [0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0] --> ['senang', 'marah', 'sedih', 'takut', 'jijik', 'terkejut', 'biasa']


In [38]:
data_collator = DataCollatorWithPadding(tokenizer)

# Model

In [49]:
# ################################ TRANSFER LEARNING ################################
model = AutoModelForSequenceClassification.from_pretrained(
    hf_model_id, 
    # num_labels=len(emotion_cols),
    # id2label=id2class, 
    # label2id=class2id,
    problem_type = "multi_label_classification",
)
print(model.classifier)

# Freeze the rest of the layers for transfer learning
# for param in model.parameters():
#     param.requires_grad = False

# model.classifier.out_proj = torch.nn.Linear(in_features=768, out_features=len(emotion_cols), bias=True)
# print(model.classifier)

RobertaClassificationHead(
  (dense): Linear(in_features=768, out_features=768, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (out_proj): Linear(in_features=768, out_features=4, bias=True)
)


In [50]:
out_proj = model.classifier.out_proj
out_proj_weight = model.classifier.out_proj.weight.data
out_proj_bias = model.classifier.out_proj.bias.data
print("out_proj weight shape:", out_proj_weight.shape)
print("out_proj bias shape:", out_proj_bias.shape)
print()

# Extend out_proj layer weight and bias
out_proj_weight_extended = torch.cat([out_proj_weight, torch.randn(len(emotion_cols) - out_proj_weight.shape[0], out_proj_weight.shape[1])], dim=0)
out_proj_bias_extended = torch.cat([out_proj_bias, torch.randn(len(emotion_cols) - out_proj_bias.shape[0])], dim=0)
print("out_proj weight shape (extended):", out_proj_weight_extended.shape)
print("out_proj bias shape (extended):", out_proj_bias_extended.shape)
print()

# Create extended out_proj layer
out_proj_extended = torch.nn.Linear(in_features=out_proj_weight.shape[1], out_features=len(emotion_cols), bias=True)
out_proj_extended.weight.data = out_proj_weight_extended
out_proj_extended.bias.data = out_proj_bias_extended

# Replace the old out_proj with the new extended out_proj layer
model.classifier.out_proj = out_proj_extended
print(model.classifier)

out_proj weight shape: torch.Size([4, 768])
out_proj bias shape: torch.Size([4])

out_proj weight shape (extended): torch.Size([7, 768])
out_proj bias shape (extended): torch.Size([7])

RobertaClassificationHead(
  (dense): Linear(in_features=768, out_features=768, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (out_proj): Linear(in_features=768, out_features=7, bias=True)
)


# Finetuning

In [52]:
def sigmoid(x):
   return 1/(1 + np.exp(-x))

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    probs = sigmoid(predictions)
    y_pred = (probs > 0.5).astype(int)
    y_true = labels.astype(int)

    # Compute F1 score for each type of averaging method
    f1_micro = f1_score(y_true, y_pred, average='micro', zero_division=0.0)
    f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0.0)
    f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0.0)
    f1_samples = f1_score(y_true, y_pred, average='samples', zero_division=0.0)
    f1_labels = f1_score(y_true, y_pred, average=None, zero_division=0.0)
    f1_labels_dict = {f'f1_label_{emotion_cols[i]}': f1_labels[i] for i in range(len(f1_labels))}

    return {
        'f1_macro': f1_macro,
        'f1_micro': f1_micro,
        'f1_weighted': f1_weighted,
        'f1_samples': f1_samples,
        **f1_labels_dict,
    }

In [53]:
train_args = TrainingArguments(
    # Training config
    per_device_train_batch_size=2,
    num_train_epochs=num_epochs,
    learning_rate=2e-5,
    weight_decay=0.01,

    # Logging config for training
    logging_strategy='steps',
    logging_steps=100,

    # Evaluation config during training
    per_device_eval_batch_size=2,
    eval_strategy='steps',
    eval_steps=100,

    # Model saving config
    output_dir=project_name,
    save_strategy='epoch',
    # load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=train_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['val'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

  trainer = Trainer(


In [54]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malimtegar[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss,F1 Macro,F1 Micro,F1 Weighted,F1 Samples,F1 Label Senang,F1 Label Marah,F1 Label Sedih,F1 Label Takut,F1 Label Jijik,F1 Label Terkejut,F1 Label Biasa
100,2.099,1.778667,0.263083,0.313187,0.292031,0.295982,0.641148,0.177419,0.311594,0.237885,0.035088,0.14433,0.294118
200,1.6307,1.275318,0.219057,0.316964,0.240273,0.306941,0.612903,0.174757,0.076433,0.0,0.308943,0.027027,0.333333
300,1.2831,1.193426,0.269029,0.366337,0.314767,0.366027,0.737864,0.276923,0.029412,0.0,0.43787,0.303571,0.097561
400,1.2412,0.743131,0.41359,0.465162,0.438464,0.476256,0.748815,0.0,0.54902,0.351145,0.406504,0.375,0.464646
500,1.0289,0.874781,0.240924,0.377644,0.300458,0.246941,0.738916,0.059701,0.292683,0.066667,0.131148,0.397351,0.0
600,0.9521,0.85813,0.340616,0.461126,0.392024,0.410959,0.728395,0.0,0.595506,0.212121,0.496644,0.351648,0.0
700,0.9482,0.778113,0.333187,0.459538,0.384319,0.339635,0.789238,0.0,0.461538,0.26087,0.246154,0.431655,0.142857
800,0.9216,1.051215,0.286564,0.416949,0.346714,0.306393,0.78453,0.031746,0.571429,0.135593,0.101695,0.32967,0.051282
900,0.9789,1.108072,0.397288,0.50358,0.445594,0.457078,0.828571,0.406015,0.588235,0.309859,0.490385,0.106667,0.051282
1000,0.7743,0.667876,0.464343,0.55615,0.500568,0.501279,0.818605,0.092308,0.656,0.422535,0.53913,0.410714,0.311111


TrainOutput(global_step=1766, training_loss=0.9478327447668747, metrics={'train_runtime': 237.019, 'train_samples_per_second': 14.893, 'train_steps_per_second': 7.451, 'total_flos': 40226963970600.0, 'train_loss': 0.9478327447668747, 'epoch': 2.0})

In [55]:
trainer.push_to_hub()

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

events.out.tfevents.1736347923.42a6ebb2ccf7.1954.0:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/alxxtexxr/RoBERTa-Base-SE2025T11A-sun-v20250108145152/commit/a88e7321aa0b3c1c6be07a43525e35ae4e66557d', commit_message='End of training', commit_description='', oid='a88e7321aa0b3c1c6be07a43525e35ae4e66557d', pr_url=None, repo_url=RepoUrl('https://huggingface.co/alxxtexxr/RoBERTa-Base-SE2025T11A-sun-v20250108145152', endpoint='https://huggingface.co', repo_type='model', repo_id='alxxtexxr/RoBERTa-Base-SE2025T11A-sun-v20250108145152'), pr_revision=None, pr_num=None)

# Evaluation

In [57]:
eval = trainer.evaluate(eval_dataset=tokenized_datasets['test'])

print("Evaluation to copy:")
f1_keys = [eval_key for eval_key in eval.keys() if 'f1' in eval_key]
for i, k in enumerate(f1_keys): print(eval[k], end="\t" if i + 1 < len(f1_keys) else "")
print()
print()
print("Evaluation full results:")
pprint(eval)

Evaluation to copy:
0.5394891892211522	0.5961538461538461	0.5553246087918977	0.5346118721461187	0.8349514563106796	0.12121212121212122	0.6055045871559633	0.7294117647058823	0.4791666666666667	0.42990654205607476	0.576271186440678

Evaluation full results:
{'epoch': 2.0,
 'eval_f1_label_biasa': 0.576271186440678,
 'eval_f1_label_jijik': 0.4791666666666667,
 'eval_f1_label_marah': 0.12121212121212122,
 'eval_f1_label_sedih': 0.6055045871559633,
 'eval_f1_label_senang': 0.8349514563106796,
 'eval_f1_label_takut': 0.7294117647058823,
 'eval_f1_label_terkejut': 0.42990654205607476,
 'eval_f1_macro': 0.5394891892211522,
 'eval_f1_micro': 0.5961538461538461,
 'eval_f1_samples': 0.5346118721461187,
 'eval_f1_weighted': 0.5553246087918977,
 'eval_loss': 0.5251038074493408,
 'eval_runtime': 2.1727,
 'eval_samples_per_second': 167.993,
 'eval_steps_per_second': 84.227}


# Inference

In [None]:
data = datasets['val'][1]
text = data['text']
emotion_true = data['emotion']

inputs = tokenizer(text, return_tensors='pt').to(model.device)

outputs = trainer.model(**inputs)
logits = outputs.logits
probs = sigmoid(logits.squeeze().detach().cpu().numpy()) # apply sigmoid + threshold
labels_pred = (probs > 0.5).astype(int)
emotion_pred = [id2class[idx] for idx, label in enumerate(labels_pred) if label == 1.0] # turn predicted id's into actual label names

print("Text:", text)
print("True emotion(s):", emotion_true)
print("Predicted emotion(s):", ", ".join(emotion_pred))

Text: Maksud kuring, kaos away maranéhanana éfektif janten seragam home urang.
True emotion(s): biasa
Predicted emotion(s): marah
