<a href="https://colab.research.google.com/github/alxxtexxr/SemEval_Task11/blob/main/SemEval2025_Task11_Track_A_T5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### References
- Finetune T5 for classification and multiple choice (https://colab.research.google.com/github/patil-suraj/exploring-T5/blob/master/t5_fine_tuning.ipynb)
- Finetune T5 for sentiment span extraction (https://colab.research.google.com/github/enzoampil/t5-intro/blob/master/t5_qa_training_pytorch_span_extraction.ipynb)

# Libraries

In [1]:
!pip install -q pytorch_lightning

In [2]:
import os
import random
import argparse
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import pytorch_lightning as pl
import textwrap
from pprint import pprint
from google.colab import drive
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, get_linear_schedule_with_warmup
from sklearn.metrics import f1_score, confusion_matrix
from tqdm.auto import tqdm

# Config

In [3]:
args_dict = dict(
    seed = 42,
    lang = 'eng', # 'eng' | 'deu' | 'ptbr' | 'rus' | 'sun'
    data_dir = '/content/drive/Shareddrives/Projects/SemEval2025_Task11/public_data',
    preprocessed_data_dir = '/content/drive/Shareddrives/Projects/SemEval2025_Task11/preprocessed_data/',
    output_dir = '/content/drive/Shareddrives/Projects/SemEval2025_Task11/outputs/t5-emotion-detection-eng/',

    # Model for English
    model_name_or_path = 'google-t5/t5-large',
    tokenizer_name_or_path = 'google-t5/t5-large',

    # Model for non-english
    # model_name_or_path = 'google/mt5-small',
    # tokenizer_name_or_path = 'google/mt5-small',

    # Model for Indonesian/Sundanese
    # model_name_or_path = 'indonlp/cendol-mt5-small-inst',
    # tokenizer_name_or_path = 'indonlp/cendol-mt5-small-inst',

    max_seq_length = 512,
    learning_rate = 3e-4,
    weight_decay = 0.0,
    adam_epsilon = 1e-8,
    warmup_steps = 0,
    train_batch_size = 8,
    eval_batch_size = 8,
    num_train_epochs = 2,
    gradient_accumulation_steps = 16,
    n_gpu = 1,
    early_stop_callback = False,
    fp_16 = False, # if you want to enable 16-bit training then install apex and set this to true
    opt_level = 'O1', # you can find out more on optimisation levels here https://nvidia.github.io/apex/amp.html#opt-levels-and-properties
    max_grad_norm = 1.0, # if you enable 16-bit training then set this to a sensible value, 0.5 is a good default
)
args = argparse.Namespace(**args_dict)
pprint(args_dict, sort_dicts=False)

{'seed': 42,
 'lang': 'eng',
 'data_dir': '/content/drive/Shareddrives/Projects/SemEval2025_Task11/public_data',
 'preprocessed_data_dir': '/content/drive/Shareddrives/Projects/SemEval2025_Task11/preprocessed_data/',
 'output_dir': '/content/drive/Shareddrives/Projects/SemEval2025_Task11/outputs/t5-emotion-detection-eng/',
 'model_name_or_path': 'google-t5/t5-large',
 'tokenizer_name_or_path': 'google-t5/t5-large',
 'max_seq_length': 512,
 'learning_rate': 0.0003,
 'weight_decay': 0.0,
 'adam_epsilon': 1e-08,
 'warmup_steps': 0,
 'train_batch_size': 8,
 'eval_batch_size': 8,
 'num_train_epochs': 2,
 'gradient_accumulation_steps': 16,
 'n_gpu': 1,
 'early_stop_callback': False,
 'fp_16': False,
 'opt_level': 'O1',
 'max_grad_norm': 1.0}


In [4]:
# Set random seed
def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

set_seed(args.seed)

In [5]:
# Load the Drive helper and mount
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


# Data

## Upload Data

In [6]:
# # Upload public_data.zip
# from google.colab import files
# uploaded_files = files.upload()

In [7]:
# !unzip public_data.zip

## Preprocess Data

In [8]:
train_df = pd.read_csv(os.path.join(args.data_dir, f'train/track_a/{args.lang}.csv'))
print("Training DF length:", len(train_df))
print()
train_df

Training DF length: 2768



Unnamed: 0,id,text,Anger,Fear,Joy,Sadness,Surprise
0,eng_train_track_a_00001,But not very happy.,0,0,1,1,0
1,eng_train_track_a_00002,Well she's not gon na last the whole song like...,0,0,1,0,0
2,eng_train_track_a_00003,She sat at her Papa's recliner sofa only to mo...,0,0,0,0,0
3,eng_train_track_a_00004,"Yes, the Oklahoma city bombing.",1,1,0,1,1
4,eng_train_track_a_00005,They were dancing to Bolero.,0,0,1,0,0
...,...,...,...,...,...,...,...
2763,eng_train_track_a_02764,"""Yeah, but did you just find that?",0,1,0,0,1
2764,eng_train_track_a_02765,I did as little as possible with my right hand...,0,0,0,0,0
2765,eng_train_track_a_02766,"Okay that sucks, right?",1,0,0,1,0
2766,eng_train_track_a_02767,"The spark leaped through his body into mine, a...",0,1,0,0,1


In [9]:
emotion_col_map = {
    'eng': { 'Anger': 'anger', 'Fear': 'fear', 'Joy': 'joy', 'Sadness': 'sad', 'Surprise': 'surprise' },
    'deu': { 'Anger': 'wut', 'Disgust': 'ekel', 'Fear': 'angst', 'Joy': 'freude', 'Sadness': 'trauer', 'Surprise': 'überraschung' },
    'sun': { 'Anger': 'marah', 'Disgust': 'jijik', 'Fear': 'takut', 'Joy': 'senang', 'Sadness': 'sedih', 'Surprise': 'terkejut' },
}
emotion_cols = list(emotion_col_map[args.lang].values())

empty_emotion_map = {
    'eng': 'neutral',
    'deu': 'neutral',
    'sun': 'biasa',
}

# Rename emotion columns
train_df = train_df.rename(columns=emotion_col_map[args.lang])

# Create 'emotion' column by combining the positive emotions
train_df['emotion'] = train_df.apply(lambda row: ', '.join([col for col in emotion_cols if row[col] == 1]), axis=1)
train_df['emotion'] = train_df['emotion'].replace('', empty_emotion_map[args.lang]) # Fill empty emotion
print(train_df['emotion'].value_counts())
print()

# Create 'stratify' column for stratified split
train_df['stratify'] = train_df['emotion']

# Identify classes with only one member
single_class = train_df['emotion'].value_counts()[train_df['emotion'].value_counts() == 1].index

# Assign a dummy value for the 'stratify' column for these classes
train_df.loc[train_df['emotion'].isin(single_class), 'stratify'] = 'dummy'

# Split training DF into training and validation DFs
train_df_, val_df = train_test_split(train_df[['text', 'emotion'] + emotion_cols],
                                     test_size=0.2,
                                     stratify=train_df['stratify'],
                                     random_state=args.seed)
print("Training DF length (splitted):", len(train_df_))
print("Validation DF length:", len(val_df))

test_df = pd.read_csv(os.path.join(args.data_dir, f'dev/track_a/{args.lang}_a.csv'))
test_df['emotion'] = None
test_df = test_df[['text', 'emotion']]
print("Testing DF length:", len(test_df))

emotion
joy                                429
fear, sad                          412
fear                               411
fear, surprise                     324
neutral                            239
sad                                133
fear, sad, surprise                124
surprise                           114
joy, surprise                      108
anger, fear, sad                    77
anger, fear                         66
anger                               54
anger, fear, sad, surprise          51
fear, joy                           49
anger, fear, surprise               42
fear, joy, surprise                 37
joy, sad                            25
anger, sad                          20
anger, surprise                     13
sad, surprise                       11
fear, joy, sad                      10
fear, joy, sad, surprise             5
joy, sad, surprise                   4
anger, sad, surprise                 3
anger, joy                           3
anger, fear, joy,

In [10]:
# Save preprocessed data
train_dir = os.path.join(args.preprocessed_data_dir, 'train')
val_dir = os.path.join(args.preprocessed_data_dir, 'val')
test_dir = os.path.join(args.preprocessed_data_dir, 'test')

!mkdir -p $train_dir
!mkdir -p $val_dir
!mkdir -p $test_dir

train_df_.to_csv(os.path.join(train_dir, f'{args.lang}.csv'))
val_df.to_csv(os.path.join(val_dir, f'{args.lang}.csv'))
test_df.to_csv(os.path.join(test_dir, f'{args.lang}.csv'))

## Create Dataset

In [11]:
tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
# model = AutoModelForSeq2SeqLM.from_pretrained(args.tokenizer_name_or_path)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [12]:
emotion_token_lengths = {emotion: len(tokenizer.encode(emotion)) for emotion in train_df['emotion'].unique()}
print("Emotion token lengths:")
pprint(emotion_token_lengths, width=1)
print()

target_max_len = max(emotion_token_lengths.values())
print("Target max. length:", target_max_len)

Emotion token lengths:
{'anger': 2,
 'anger, fear': 4,
 'anger, fear, joy, sad': 8,
 'anger, fear, joy, sad, surprise': 10,
 'anger, fear, sad': 6,
 'anger, fear, sad, surprise': 8,
 'anger, fear, surprise': 6,
 'anger, joy': 4,
 'anger, joy, surprise': 6,
 'anger, sad': 4,
 'anger, sad, surprise': 6,
 'anger, surprise': 4,
 'fear': 2,
 'fear, joy': 4,
 'fear, joy, sad': 6,
 'fear, joy, sad, surprise': 8,
 'fear, joy, surprise': 6,
 'fear, sad': 4,
 'fear, sad, surprise': 6,
 'fear, surprise': 4,
 'joy': 2,
 'joy, sad': 4,
 'joy, sad, surprise': 6,
 'joy, surprise': 4,
 'neutral': 2,
 'sad': 2,
 'sad, surprise': 4,
 'surprise': 2}

Target max. length: 10


In [13]:
class EmotionDataset(Dataset):
  def __init__(self, tokenizer,
               lang, one_hot_class_columns,
               data_dir, data_split,
               data_column='text', class_column='emotion', max_len=512):
    self.data_column = data_column
    self.one_hot_class_columns = one_hot_class_columns
    self.class_column = class_column

    self.data = pd.read_csv(os.path.join(data_dir, data_split, f'{lang}.csv'))

    self.max_len = max_len
    self.tokenizer = tokenizer
    self.inputs = []
    self.targets = []
    self.one_hot_targets = []

    self._build()

  def __len__(self):
    return len(self.inputs)

  def __getitem__(self, index):
    source_ids = self.inputs[index]["input_ids"].squeeze()
    target_ids = self.targets[index]["input_ids"].squeeze()

    src_mask    = self.inputs[index]["attention_mask"].squeeze()  # might need to squeeze
    target_mask = self.targets[index]["attention_mask"].squeeze()  # might need to squeeze

    return {
        "source_ids": source_ids,
        "source_mask": src_mask,
        "target_ids": target_ids,
        "target_mask": target_mask,
        "one_hot_target": self.one_hot_targets[index],
    }

  def _build(self):
    for idx in range(len(self.data)):
      input_, target, one_hot_target = self.data.loc[idx, self.data_column], self.data.loc[idx, self.class_column], self.data.loc[idx, self.one_hot_class_columns]
      one_hot_target = one_hot_target.values.tolist()

      # input_ = input_ + ' </s>'
      # target = target + " </s>"

      # tokenize inputs
      tokenized_inputs = self.tokenizer.batch_encode_plus(
          [input_], max_length=self.max_len, return_tensors="pt", padding='max_length', truncation=True,
      )
      # tokenize targets
      tokenized_targets = self.tokenizer.batch_encode_plus(
          [target], max_length=target_max_len, return_tensors="pt", padding='max_length', truncation=True,
      )

      self.inputs.append(tokenized_inputs)
      self.targets.append(tokenized_targets)
      self.one_hot_targets.append(one_hot_target)

In [14]:
def get_dataset(tokenizer, type_path, args):
    return EmotionDataset(tokenizer=tokenizer, lang=args.lang, one_hot_class_columns=emotion_cols,
                          data_dir=args.preprocessed_data_dir, data_split=type_path, max_len=args.max_seq_length)

In [15]:
train_set = get_dataset(tokenizer, 'train', args)

for i in range(10):
    d = train_set[i]
    print(tokenizer.decode(d['source_ids']))
    print(tokenizer.decode(d['target_ids']))
    print(d['one_hot_target'])
    print()

I now have 12 of those canker sore suckers in my mouth along with a fever since friday.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad

In [16]:
train_loader = DataLoader(train_set, batch_size=2)
batch = next(iter(train_loader))
print(torch.stack(batch['one_hot_target']).T.tolist())

[[0, 1, 0, 1, 0], [0, 1, 0, 1, 1]]


# Training

In [17]:
class T5FineTuner(pl.LightningModule):
  def __init__(self, hparams):
    super(T5FineTuner, self).__init__()
    self.automatic_optimization = False

    # self.hparams = hparams
    self.save_hyperparameters(hparams)

    self.model = AutoModelForSeq2SeqLM.from_pretrained(hparams.model_name_or_path)
    self.tokenizer = AutoTokenizer.from_pretrained(hparams.tokenizer_name_or_path)

    self.training_step_outputs = []
    self.validation_step_outputs = []

  def is_logger(self):
    # return self.trainer.proc_rank <= 0
    return self.trainer.global_rank <= 0

  def forward(
      self, input_ids, attention_mask=None, decoder_input_ids=None, decoder_attention_mask=None, labels=None
  ):
    return self.model(
        input_ids,
        attention_mask=attention_mask,
        decoder_input_ids=decoder_input_ids,
        decoder_attention_mask=decoder_attention_mask,
        labels=labels,
    )

  def _step(self, batch):
    labels = batch["target_ids"]
    labels[labels[:, :] == self.tokenizer.pad_token_id] = -100

    outputs = self(
        input_ids=batch["source_ids"],
        attention_mask=batch["source_mask"],
        labels=labels,
        decoder_attention_mask=batch['target_mask']
    )

    loss = outputs[0]

    return loss

  def training_step(self, batch, batch_idx):
    opt = self.optimizers()

    # scale losses by 1/N (for N batches of gradient accumulation)
    N = self.hparams.gradient_accumulation_steps
    loss = self._step(batch) / N
    self.manual_backward(loss)

    # accumulate gradients of N batches
    if (batch_idx + 1) % N == 0:
        # clip gradients
        self.clip_gradients(opt, gradient_clip_val=self.hparams.max_grad_norm, gradient_clip_algorithm="norm")

        opt.step()
        opt.zero_grad()
        self.lr_scheduler.step()

    tensorboard_logs = {"train_loss": loss}
    self.training_step_outputs.append({"loss": loss})
    return {"loss": loss, "log": tensorboard_logs}

  # def training_epoch_end(self, outputs):
  def on_train_epoch_end(self):
    avg_train_loss = torch.stack([x["loss"] for x in self.training_step_outputs]).mean()
    tensorboard_logs = {"avg_train_loss": avg_train_loss}
    return {"avg_train_loss": avg_train_loss, "log": tensorboard_logs, 'progress_bar': tensorboard_logs}

  def validation_step(self, batch, batch_idx):
    loss = self._step(batch)
    self.validation_step_outputs.append({"val_loss": loss})
    return {"val_loss": loss}

  # def validation_epoch_end(self, outputs):
  def on_validation_epoch_end(self):
    avg_loss = torch.stack([x["val_loss"] for x in self.validation_step_outputs]).mean()
    tensorboard_logs = {"val_loss": avg_loss}
    return {"avg_val_loss": avg_loss, "log": tensorboard_logs, 'progress_bar': tensorboard_logs}

  def configure_optimizers(self):
    "Prepare optimizer and schedule (linear warmup and decay)"

    model = self.model
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
            "weight_decay": self.hparams.weight_decay,
        },
        {
            "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
            "weight_decay": 0.0,
        },
    ]
    optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)
    self.opt = optimizer
    return [optimizer]

#   def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None):
#     # if self.trainer.use_tpu:
#     #   xm.optimizer_step(optimizer)
#     # else:
#     #   optimizer.step()
#     optimizer.step()
#     optimizer.zero_grad()
#     self.lr_scheduler.step()

  def get_tqdm_dict(self):
    tqdm_dict = {"loss": "{:.3f}".format(self.trainer.avg_loss), "lr": self.lr_scheduler.get_last_lr()[-1]}
    return tqdm_dict

  def train_dataloader(self):
    train_dataset = get_dataset(tokenizer=self.tokenizer, type_path="train", args=self.hparams)
    dataloader = DataLoader(train_dataset, batch_size=self.hparams.train_batch_size,
                            drop_last=True, shuffle=True, num_workers=2)
    t_total = (
        (len(dataloader.dataset) // (self.hparams.train_batch_size * max(1, self.hparams.n_gpu)))
        // self.hparams.gradient_accumulation_steps
        * float(self.hparams.num_train_epochs)
    )
    scheduler = get_linear_schedule_with_warmup(
        self.opt, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=t_total
    )
    self.lr_scheduler = scheduler
    return dataloader

  def val_dataloader(self):
    val_dataset = get_dataset(tokenizer=self.tokenizer, type_path="val", args=self.hparams)
    return DataLoader(val_dataset, batch_size=self.hparams.eval_batch_size, num_workers=2)

In [18]:
logger = logging.getLogger(__name__)

class LoggingCallback(pl.Callback):
  def on_validation_end(self, trainer, pl_module):
    logger.info("***** Validation results *****")
    if pl_module.is_logger():
      metrics = trainer.callback_metrics
      # Log results
      for key in sorted(metrics):
        if key not in ["log", "progress_bar"]:
          logger.info("{} = {}\n".format(key, str(metrics[key])))

  def on_test_end(self, trainer, pl_module):
    logger.info("***** Test results *****")

    if pl_module.is_logger():
      metrics = trainer.callback_metrics

      # Log and save results to file
      output_test_results_file = os.path.join(pl_module.hparams.output_dir, "test_results.txt")
      with open(output_test_results_file, "w") as writer:
        for key in sorted(metrics):
          if key not in ["log", "progress_bar"]:
            logger.info("{} = {}\n".format(key, str(metrics[key])))
            writer.write("{} = {}\n".format(key, str(metrics[key])))

In [19]:
!mkdir -p $args.output_dir

In [20]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    # filepath=args.output_dir,
    # prefix="checkpoint",
    monitor = "val_loss",
    mode = "min",
    save_top_k = 5
)

train_params = dict(
    # accumulate_grad_batches = args.gradient_accumulation_steps,
    # gpus = args.n_gpu,
    max_epochs = args.num_train_epochs,
    # early_stop_callback = False,
    precision =  16 if args.fp_16 else 32,
    # amp_level = args.opt_level,
    # gradient_clip_val = args.max_grad_norm,
    # checkpoint_callback = checkpoint_callback,
    callbacks = [LoggingCallback()],
    accelerator = 'auto',
    devices = 'auto',
    strategy = 'auto',
)

In [21]:
model = T5FineTuner(args)



In [22]:
trainer = pl.Trainer(**train_params)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [23]:
trainer.fit(model)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type                       | Params | Mode
------------------------------------------------------------
0 | model | T5ForConditionalGeneration | 737 M  | eval
------------------------------------------------------------
737 M     Trainable params
0         Non-trainable params
737 M     Total params
2,950.672 Total estimated model params size (MB)
0         Modules in train mode
1069      Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 1.06 MiB is free. Process 191701 has 14.74 GiB memory in use. Of the allocated memory 14.61 GiB is allocated by PyTorch, and 8.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

# Evaluation

In [None]:
val_set = get_dataset(tokenizer, 'val', args)
val_loader = DataLoader(val_set, batch_size=32, shuffle=True)

In [None]:
def one_hot_encode_emotion(emotion):
    emotions = emotion.replace(' ', '').split(',')
    one_hot_emotion = [1 if emotion_col in emotions else 0 for emotion_col in emotion_cols]
    return one_hot_emotion

y_true = []
y_pred = []
for batch in val_loader:
    one_hot_targets = torch.stack(batch['one_hot_target']).T.tolist()

    output_ids = model.model.cuda().generate(input_ids=batch['source_ids'].cuda(),
                                          attention_mask=batch['source_mask'].cuda(),
                                          max_length=target_max_len)
    outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    one_outputs = [one_hot_encode_emotion(output) for output in outputs]

    assert len(one_outputs) == len(one_hot_targets)

    y_true += one_hot_targets
    y_pred += one_outputs

In [None]:
# Calculate F1 score for each type of averaging method
f1_micro = f1_score(y_true, y_pred, average='micro', zero_division=0.0)
f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0.0)
# f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0.0)
# f1_samples = f1_score(y_true, y_pred, average='samples', zero_division=0.0)
f1_per_label = f1_score(y_true, y_pred, average=None, zero_division=0.0)

print(f'F1 Score (Micro-Average): {f1_micro}')
print(f'F1 Score (Macro-Average): {f1_macro}')
# print(f'F1 Score (Weighted-Average): {f1_weighted}')
# print(f'F1 Score (Samples-Average): {f1_samples}')
print()

# Output F1 per label
for label, f1 in zip(emotion_cols, f1_per_label):
    print(f"F1 Score for '{label}': {f1}")

# Report

## Models
| Model    	| Total Parameters 	| Total Estimated Parameters Size 	| GPU RAM Used 	|
|----------	|------------------	|---------------------------------	|--------------	|
| google-t5/t5-small 	| 60.5 M           	| ?                               	| 2.6 GB       	|
| google-t5/t5-base  	| 222 M            	| 891.614 MB                      	| 7.7 GB       	|
| google-t5/t5-large  	| 737 M            	| 2,950.672 MB                      	| ?       	|

## Evaluation
### English
| Model    	| F1 Score (Micro-Average) 	| F1 Score (Macro-Average) 	| F1 Score for 'anger' 	| F1 Score for 'disgust' 	| F1 Score for 'fear' 	| F1 Score for 'joy' 	| F1 Score for 'sad' 	| F1 Score for 'surprise' 	|
|----------	|--------------------------	|--------------------------	|----------------------	|------------------------	|---------------------	|--------------------	|--------------------	|-------------------------	|
| google-t5/t5-small 	| 0.6123093108890058       	| 0.4908088446307367       	| 0.0759493670886076   	| -                      	| 0.7564766839378239  	| 0.5110132158590308 	| 0.5625             	| 0.5481049562682215      	|
| google-t5/t5-base  	| 0.6885964912280702       	| 0.6155755742307234       	| 0.3695652173913043   	| -                      	| 0.8050847457627118  	| 0.6212121212121212 	| 0.6522781774580336 	| 0.6297376093294461      	|