### References
- Multi-Label Classification Model From Scratch: Step-by-Step Tutorial (https://huggingface.co/blog/Valerii-Knowledgator/multi-label-classification)
- https://github.com/NielsRogge/Transformers-Tutorials/blob/master/BERT/Fine_tuning_BERT_(and_friends)_for_multi_label_text_classification.ipynb
- https://github.com/huggingface/notebooks/blob/main/examples/text_classification.ipynb

# Libraries

In [1]:
%pip install -q -U datasets transformers accelerate sentencepiece

[0m

In [2]:
import os
import random
import numpy as np
import torch
import transformers
from pprint import pprint
from datetime import datetime
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, DataCollatorWithPadding, Trainer, TrainingArguments
from sklearn.metrics import f1_score

# Config

In [3]:
seed = 42
lang = 'sun'

# hf_model_id = 'bhadresh-savani/roberta-base-emotion'
# hf_model_id = 'bhadresh-savani/bert-base-uncased-emotion'
# hf_model_id = 'w11wo/sundanese-bert-base-emotion-classifier'
hf_model_id = 'w11wo/sundanese-roberta-base-emotion-classifier'
# hf_model_id = 'w11wo/sundanese-roberta-base'
# hf_model_id = 'alxxtexxr/XLM-RoBERTa-Base-Sundanese-Emotion-Classifier-v20241222170134'
hf_tokenizer_id = hf_model_id
hf_data_id = 'alxxtexxr/SemEval2025-Task11-Dataset'
# hf_data_config = 'track_a_sun_go_emotions_70_15_15_v2' # BEST
hf_data_config = 'track_a_sun_go_emotions_70_15_15_back_translated'

# num_epochs = 2
num_epochs_list = [10, 8, 5, 3, 2]
save_steps = 2 # Save model for every save_steps epoch

In [4]:
def set_seed(seed):
    # Set random seed for NumPy
    np.random.seed(seed)

    # Set random seed for Torch
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if using multi-GPU
    torch.backends.cudnn.deterministic = True  # Ensures deterministic results
    torch.backends.cudnn.benchmark = False  # Avoids non-deterministic algorithms

    # Set random seed for Transformers
    transformers.set_seed(seed)

    # Optionally set random seed for sklearn and Python's own random module
    random.seed(seed)

    # Set random seed for os
    os.environ['PYTHONHASHSEED'] = str(seed)

    print(f"Random seed set to: {seed}")

set_seed(seed)

Random seed set to: 42


# Data

## Load Data

In [5]:
datasets = load_dataset(hf_data_id, hf_data_config)
print(datasets)
print()

cols = list(datasets['train'].features)
emotion_cols = [col for col in cols if col not in ['Unnamed: 0', 'text', 'emotion', 'stratify', 'aug_go_emotions']]
splits = [*datasets.keys()]

print("Data columns:", cols)
print("Emotions columns:", emotion_cols)

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


DatasetDict({
    train: Dataset({
        features: ['text', 'emotion', 'marah', 'jijik', 'takut', 'senang', 'sedih', 'terkejut', 'biasa'],
        num_rows: 1764
    })
    val: Dataset({
        features: ['text', 'emotion', 'marah', 'jijik', 'takut', 'senang', 'sedih', 'terkejut', 'biasa'],
        num_rows: 365
    })
    test: Dataset({
        features: ['text', 'emotion', 'marah', 'jijik', 'takut', 'senang', 'sedih', 'terkejut', 'biasa'],
        num_rows: 365
    })
})

Data columns: ['text', 'emotion', 'marah', 'jijik', 'takut', 'senang', 'sedih', 'terkejut', 'biasa']
Emotions columns: ['marah', 'jijik', 'takut', 'senang', 'sedih', 'terkejut', 'biasa']


In [6]:
class2id = {class_:id for id, class_ in enumerate(emotion_cols)}
id2class = {id:class_ for class_, id in class2id.items()}

print("Class to ID:")
pprint(class2id, width=1)
print()
print("ID to Class:")
pprint(id2class, width=1)

Class to ID:
{'biasa': 6,
 'jijik': 1,
 'marah': 0,
 'sedih': 4,
 'senang': 3,
 'takut': 2,
 'terkejut': 5}

ID to Class:
{0: 'marah',
 1: 'jijik',
 2: 'takut',
 3: 'senang',
 4: 'sedih',
 5: 'terkejut',
 6: 'biasa'}


## Preprocess Data

In [7]:
tokenizer = AutoTokenizer.from_pretrained(hf_tokenizer_id)

In [8]:
def one_hot_encode_emotion(emotion, emotion_cols):
    emotions = emotion.replace(" ", "").split(",")
    one_hot_emotion = [1.0 if emotion_col in emotions else 0.0 for emotion_col in emotion_cols] # Ensure that the label is float, not int
    return one_hot_emotion

def preprocess_function(data):
   text = data['text']
   emotion = data['emotion']
   labels = one_hot_encode_emotion(emotion, emotion_cols)
   data = tokenizer(text, truncation=True)
   data['labels'] = labels
   return data

tokenized_datasets = {split: datasets[split].map(preprocess_function) for split in splits}

Map:   0%|          | 0/365 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [9]:
# Sanity check
data = tokenized_datasets['train'][5]

print("Text:", data['text'])
print("Emotion(s):", data['emotion'])
print("Labels:", data['labels'], '-->', emotion_cols)

Text: Sigana anjeun lalaki sejen anu henteu resep ka legion ;D
Emotion(s): marah, jijik
Labels: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0] --> ['marah', 'jijik', 'takut', 'senang', 'sedih', 'terkejut', 'biasa']


In [10]:
data_collator = DataCollatorWithPadding(tokenizer)

# Finetuning

In [11]:
def sigmoid(x):
   return 1/(1 + np.exp(-x))

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    probs = sigmoid(predictions)
    y_pred = (probs > 0.5).astype(int)
    y_true = labels.astype(int)

    # Compute F1 score for each type of averaging method
    f1_micro = f1_score(y_true, y_pred, average='micro', zero_division=0.0)
    f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0.0)
    f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0.0)
    f1_samples = f1_score(y_true, y_pred, average='samples', zero_division=0.0)
    f1_labels = f1_score(y_true, y_pred, average=None, zero_division=0.0)
    f1_labels_dict = {f'f1_label_{emotion_cols[i]}': f1_labels[i] for i in range(len(f1_labels))}

    return {
        'f1_macro': f1_macro,
        'f1_micro': f1_micro,
        'f1_weighted': f1_weighted,
        'f1_samples': f1_samples,
        **f1_labels_dict,
    }

In [12]:
for num_epochs in num_epochs_list:
    project_name = f'RoBERTa-Base-SE2025T11A-{lang}-v{datetime.now().strftime("%Y%m%d%H%M%S")}'

    print("=" * 64)
    print(f"NUM_EPOCHS: {num_epochs}")
    print(f"PROJECT_NAME: {project_name}")
    print("=" * 64)

    # MODEL
    # model = AutoModelForSequenceClassification.from_pretrained(
    #     hf_model_id, 
    #     num_labels=len(emotion_cols),
    #     id2label=id2class, 
    #     label2id=class2id,
    #     problem_type = "multi_label_classification",
    # )

    # ================================ 
    # TRANSFER LEARNING 
    # ================================
    model = AutoModelForSequenceClassification.from_pretrained(
        hf_model_id, 
        # num_labels=len(emotion_cols),
        # id2label=id2class, 
        # label2id=class2id,
        problem_type = "multi_label_classification",
    )
    # print(model.classifier)

    model.classifier.out_proj = torch.nn.Linear(in_features=768, out_features=len(emotion_cols), bias=True)
    # print(model.classifier)

    model.config._name_or_path = 'alxxtexxr/' + project_name
    model.config.num_labels = len(emotion_cols)
    model.config.id2label = id2class
    model.config.label2id = class2id

    # Freeze the rest of the layers for transfer learning
    # for param in model.parameters():
    #     param.requires_grad = False

    # TRAINING
    train_args = TrainingArguments(
        # Training config
        per_device_train_batch_size=2,
        num_train_epochs=num_epochs,
        learning_rate=2e-5,
        weight_decay=0.01,

        # Logging config for training
        logging_strategy='steps',
        logging_steps=100,

        # Evaluation config during training
        per_device_eval_batch_size=2,
        # eval_strategy='steps',
        # eval_steps=100,
        eval_strategy='epoch',
        eval_steps=1,

        # Model saving config
        output_dir=project_name,
        save_strategy='epoch',
        save_steps=save_steps,
        load_best_model_at_end=True,
    )

    trainer = Trainer(
        model=model,
        args=train_args,
        train_dataset=tokenized_datasets['train'],
        eval_dataset=tokenized_datasets['val'],
        processing_class=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    trainer.push_to_hub()

    # EVALUATION
    eval = trainer.evaluate(eval_dataset=tokenized_datasets['test'])

    print("Evaluation to copy:")
    f1_keys = [eval_key for eval_key in eval.keys() if 'f1' in eval_key]
    for i, k in enumerate(f1_keys): print(eval[k], end="\t" if i + 1 < len(f1_keys) else "")
    print()
    print()
    print("Evaluation full results:")
    pprint(eval)

NUM_EPOCHS: 10
PROJECT_NAME: RoBERTa-Base-SE2025T11A-sun-v20250113130658


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malimtegar[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,F1 Macro,F1 Micro,F1 Weighted,F1 Samples,F1 Label Marah,F1 Label Jijik,F1 Label Takut,F1 Label Senang,F1 Label Sedih,F1 Label Terkejut,F1 Label Biasa
1,0.3673,0.301842,0.527032,0.598106,0.57336,0.532877,0.55814,0.435897,0.54321,0.782609,0.707965,0.561404,0.1
2,0.2785,0.304163,0.592439,0.624096,0.620611,0.607763,0.549296,0.553571,0.6,0.78453,0.708661,0.580645,0.37037
3,0.1849,0.357601,0.592433,0.617925,0.616135,0.61653,0.487395,0.495726,0.54717,0.811111,0.757143,0.533333,0.515152
4,0.1581,0.377999,0.619828,0.645238,0.638383,0.644292,0.522523,0.540541,0.554217,0.835979,0.727273,0.540984,0.617284
5,0.0942,0.429534,0.611287,0.640371,0.636219,0.642283,0.569231,0.55814,0.571429,0.812183,0.755906,0.545455,0.466667
6,0.059,0.471385,0.597307,0.627063,0.622197,0.637443,0.528,0.564885,0.52381,0.827225,0.723926,0.520548,0.492754
7,0.0334,0.482311,0.620386,0.642534,0.640863,0.649498,0.556391,0.569231,0.57732,0.827225,0.733813,0.528,0.550725
8,0.035,0.511574,0.629008,0.6466,0.648051,0.649954,0.525547,0.5625,0.623656,0.819149,0.75,0.558824,0.56338
9,0.0233,0.535451,0.617682,0.640969,0.639851,0.652146,0.557143,0.545455,0.595745,0.839378,0.705882,0.542857,0.537313
10,0.0191,0.538506,0.621294,0.642303,0.641989,0.652146,0.550725,0.55814,0.595745,0.831579,0.724832,0.537313,0.550725


Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

events.out.tfevents.1736773620.5f2e6c9c6f3a.5626.0:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

Evaluation to copy:
0.5130354257141918	0.5826558265582655	0.5550287188258658	0.5236529680365296	0.5483870967741935	0.33766233766233766	0.7045454545454546	0.7403314917127072	0.6666666666666666	0.5423728813559322	0.05128205128205128

Evaluation full results:
{'epoch': 10.0,
 'eval_f1_label_biasa': 0.05128205128205128,
 'eval_f1_label_jijik': 0.33766233766233766,
 'eval_f1_label_marah': 0.5483870967741935,
 'eval_f1_label_sedih': 0.6666666666666666,
 'eval_f1_label_senang': 0.7403314917127072,
 'eval_f1_label_takut': 0.7045454545454546,
 'eval_f1_label_terkejut': 0.5423728813559322,
 'eval_f1_macro': 0.5130354257141918,
 'eval_f1_micro': 0.5826558265582655,
 'eval_f1_samples': 0.5236529680365296,
 'eval_f1_weighted': 0.5550287188258658,
 'eval_loss': 0.31920620799064636,
 'eval_runtime': 1.8843,
 'eval_samples_per_second': 193.706,
 'eval_steps_per_second': 97.118}
NUM_EPOCHS: 8
PROJECT_NAME: RoBERTa-Base-SE2025T11A-sun-v20250113132115


Epoch,Training Loss,Validation Loss,F1 Macro,F1 Micro,F1 Weighted,F1 Samples,F1 Label Marah,F1 Label Jijik,F1 Label Takut,F1 Label Senang,F1 Label Sedih,F1 Label Terkejut,F1 Label Biasa
1,0.3577,0.298225,0.533174,0.606711,0.578882,0.550411,0.582677,0.455696,0.556962,0.787234,0.699187,0.550459,0.1
2,0.2657,0.308457,0.599263,0.627879,0.62319,0.607763,0.557143,0.631579,0.577778,0.757062,0.703125,0.576,0.392157
3,0.1679,0.358454,0.592006,0.611374,0.611717,0.609132,0.504202,0.532258,0.560748,0.774566,0.740741,0.517241,0.514286
4,0.1435,0.367489,0.642257,0.661955,0.653517,0.668219,0.591304,0.622951,0.531646,0.793814,0.721088,0.559322,0.675676
5,0.086,0.429535,0.60829,0.640462,0.633923,0.640913,0.544118,0.561983,0.580645,0.817308,0.692913,0.596774,0.464286
6,0.0415,0.45847,0.626805,0.648107,0.645782,0.654703,0.564516,0.59375,0.602151,0.814815,0.735484,0.531469,0.545455
7,0.0339,0.469836,0.631952,0.650388,0.648589,0.662922,0.588235,0.576,0.58,0.821053,0.709677,0.542636,0.606061
8,0.0322,0.474611,0.627005,0.647125,0.644718,0.654977,0.571429,0.580645,0.583333,0.808511,0.738255,0.535433,0.571429


Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

events.out.tfevents.1736774476.5f2e6c9c6f3a.5626.2:   0%|          | 0.00/27.9k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

Evaluation to copy:
0.5060836536205995	0.5877659574468085	0.5525652867247695	0.539269406392694	0.5833333333333334	0.3076923076923077	0.6896551724137931	0.7619047619047619	0.7	0.5	0.0

Evaluation full results:
{'epoch': 8.0,
 'eval_f1_label_biasa': 0.0,
 'eval_f1_label_jijik': 0.3076923076923077,
 'eval_f1_label_marah': 0.5833333333333334,
 'eval_f1_label_sedih': 0.7,
 'eval_f1_label_senang': 0.7619047619047619,
 'eval_f1_label_takut': 0.6896551724137931,
 'eval_f1_label_terkejut': 0.5,
 'eval_f1_macro': 0.5060836536205995,
 'eval_f1_micro': 0.5877659574468085,
 'eval_f1_samples': 0.539269406392694,
 'eval_f1_weighted': 0.5525652867247695,
 'eval_loss': 0.3110816478729248,
 'eval_runtime': 1.8537,
 'eval_samples_per_second': 196.908,
 'eval_steps_per_second': 98.724}
NUM_EPOCHS: 5
PROJECT_NAME: RoBERTa-Base-SE2025T11A-sun-v20250113133244


Epoch,Training Loss,Validation Loss,F1 Macro,F1 Micro,F1 Weighted,F1 Samples,F1 Label Marah,F1 Label Jijik,F1 Label Takut,F1 Label Senang,F1 Label Sedih,F1 Label Terkejut,F1 Label Biasa
1,0.3637,0.29713,0.533579,0.606398,0.579559,0.53653,0.586207,0.461538,0.554217,0.795699,0.709091,0.528302,0.1
2,0.2775,0.307094,0.594427,0.622991,0.618506,0.605205,0.549618,0.568627,0.586957,0.75,0.727273,0.578512,0.4
3,0.1586,0.344259,0.604208,0.629321,0.625737,0.628037,0.512,0.578125,0.589474,0.789189,0.731343,0.545455,0.483871
4,0.1378,0.363546,0.616994,0.637749,0.63362,0.645479,0.545455,0.578512,0.574713,0.78534,0.684564,0.578947,0.571429
5,0.0955,0.375345,0.634318,0.652874,0.651609,0.658447,0.551181,0.595041,0.606742,0.791444,0.702703,0.625954,0.567164


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

events.out.tfevents.1736775164.5f2e6c9c6f3a.5626.4:   0%|          | 0.00/19.7k [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

Evaluation to copy:
0.484746011581034	0.5647382920110193	0.5298585340920339	0.5025570776255708	0.5178571428571429	0.2972972972972973	0.6966292134831461	0.7513227513227513	0.6605504587155964	0.46956521739130436	0.0

Evaluation full results:
{'epoch': 5.0,
 'eval_f1_label_biasa': 0.0,
 'eval_f1_label_jijik': 0.2972972972972973,
 'eval_f1_label_marah': 0.5178571428571429,
 'eval_f1_label_sedih': 0.6605504587155964,
 'eval_f1_label_senang': 0.7513227513227513,
 'eval_f1_label_takut': 0.6966292134831461,
 'eval_f1_label_terkejut': 0.46956521739130436,
 'eval_f1_macro': 0.484746011581034,
 'eval_f1_micro': 0.5647382920110193,
 'eval_f1_samples': 0.5025570776255708,
 'eval_f1_weighted': 0.5298585340920339,
 'eval_loss': 0.3209816813468933,
 'eval_runtime': 2.1437,
 'eval_samples_per_second': 170.266,
 'eval_steps_per_second': 85.366}
NUM_EPOCHS: 3
PROJECT_NAME: RoBERTa-Base-SE2025T11A-sun-v20250113134023


Epoch,Training Loss,Validation Loss,F1 Macro,F1 Micro,F1 Weighted,F1 Samples,F1 Label Marah,F1 Label Jijik,F1 Label Takut,F1 Label Senang,F1 Label Sedih,F1 Label Terkejut,F1 Label Biasa
1,0.3523,0.29809,0.503795,0.58728,0.554333,0.532877,0.516667,0.314286,0.55,0.818653,0.678571,0.548387,0.1
2,0.2656,0.29935,0.597056,0.624079,0.618988,0.611142,0.566667,0.60177,0.623656,0.766667,0.661538,0.544,0.415094
3,0.1829,0.29941,0.622019,0.642686,0.639522,0.638174,0.556522,0.623853,0.645161,0.775956,0.690141,0.5625,0.5


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

events.out.tfevents.1736775624.5f2e6c9c6f3a.5626.6:   0%|          | 0.00/14.1k [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

Evaluation to copy:
0.4935079128869173	0.5802968960863698	0.540544935440443	0.5255707762557077	0.5565217391304348	0.22857142857142856	0.6896551724137931	0.7448979591836735	0.6909090909090909	0.544	0.0

Evaluation full results:
{'epoch': 3.0,
 'eval_f1_label_biasa': 0.0,
 'eval_f1_label_jijik': 0.22857142857142856,
 'eval_f1_label_marah': 0.5565217391304348,
 'eval_f1_label_sedih': 0.6909090909090909,
 'eval_f1_label_senang': 0.7448979591836735,
 'eval_f1_label_takut': 0.6896551724137931,
 'eval_f1_label_terkejut': 0.544,
 'eval_f1_macro': 0.4935079128869173,
 'eval_f1_micro': 0.5802968960863698,
 'eval_f1_samples': 0.5255707762557077,
 'eval_f1_weighted': 0.540544935440443,
 'eval_loss': 0.31171366572380066,
 'eval_runtime': 1.8866,
 'eval_samples_per_second': 193.47,
 'eval_steps_per_second': 97.0}
NUM_EPOCHS: 2
PROJECT_NAME: RoBERTa-Base-SE2025T11A-sun-v20250113134514


Epoch,Training Loss,Validation Loss,F1 Macro,F1 Micro,F1 Weighted,F1 Samples,F1 Label Marah,F1 Label Jijik,F1 Label Takut,F1 Label Senang,F1 Label Sedih,F1 Label Terkejut,F1 Label Biasa
1,0.3577,0.292677,0.50401,0.593838,0.555572,0.524201,0.553571,0.268657,0.545455,0.822917,0.701754,0.535714,0.1
2,0.2456,0.284916,0.626229,0.659148,0.649469,0.643105,0.571429,0.632653,0.622222,0.820513,0.713178,0.559322,0.464286


Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

events.out.tfevents.1736775914.5f2e6c9c6f3a.5626.8:   0%|          | 0.00/11.3k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

Evaluation to copy:
0.6232560071995624	0.644918444165621	0.6377842823215462	0.628675799086758	0.6346153846153846	0.4946236559139785	0.6947368421052632	0.7448979591836735	0.6829268292682927	0.59375	0.5172413793103449

Evaluation full results:
{'epoch': 2.0,
 'eval_f1_label_biasa': 0.5172413793103449,
 'eval_f1_label_jijik': 0.4946236559139785,
 'eval_f1_label_marah': 0.6346153846153846,
 'eval_f1_label_sedih': 0.6829268292682927,
 'eval_f1_label_senang': 0.7448979591836735,
 'eval_f1_label_takut': 0.6947368421052632,
 'eval_f1_label_terkejut': 0.59375,
 'eval_f1_macro': 0.6232560071995624,
 'eval_f1_micro': 0.644918444165621,
 'eval_f1_samples': 0.628675799086758,
 'eval_f1_weighted': 0.6377842823215462,
 'eval_loss': 0.30483126640319824,
 'eval_runtime': 1.9022,
 'eval_samples_per_second': 191.885,
 'eval_steps_per_second': 96.205}


# Evaluation

In [13]:
# eval = trainer.evaluate(eval_dataset=tokenized_datasets['test'])

# print("Evaluation to copy:")
# f1_keys = [eval_key for eval_key in eval.keys() if 'f1' in eval_key]
# for i, k in enumerate(f1_keys): print(eval[k], end="\t" if i + 1 < len(f1_keys) else "")
# print()
# print()
# print("Evaluation full results:")
# pprint(eval)

# Inference

In [14]:
data = datasets['val'][1]
text = data['text']
emotion_true = data['emotion']

inputs = tokenizer(text, return_tensors='pt').to(model.device)

outputs = trainer.model(**inputs)
logits = outputs.logits
probs = sigmoid(logits.squeeze().detach().cpu().numpy()) # apply sigmoid + threshold
labels_pred = (probs > 0.5).astype(int)
emotion_pred = [id2class[idx] for idx, label in enumerate(labels_pred) if label == 1.0] # turn predicted id's into actual label names

print("Text:", text)
print("True emotion(s):", emotion_true)
print("Predicted emotion(s):", ", ".join(emotion_pred))

Text: Abdi hanjakal pisan
True emotion(s): sedih
Predicted emotion(s): sedih
