
<h1 align="center"><font size="5"> Emotion Analysis with LLMs </font></h1>

<h2><center>Facebook-OPT, RoBERTa & ModernBERT</center></h2>

<h3><center> PEFT fine-tuning with LoRA </center></h3>

In [None]:
!pip install --upgrade -q torch torchao transformers
!pip install -q flash-attn
!pip install -q evaluate
!pip install -q plotly
!pip install -q imbalanced-learn
!pip install -q --upgrade peft
!pip install -q scikit-posthocs

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive/MyDrive/NLP-llm-fine-tuning

Mounted at /gdrive
/gdrive/MyDrive/NLP-llm-fine-tuning


In [None]:
# Connect to Hugging Face
from google.colab import userdata
HF_access = userdata.get('HF-LLM')

## 1. Setup

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
from collections import Counter
from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import DataLoader
from transformers import (Trainer, TrainingArguments)
from peft import get_peft_model
from utilities.utils import (build_tokenizer, lora_peft, seq_class_init, collate_func,
                            compute_classification_metrics,get_lora_model_for_seq_class,
                            predict, model_postprocessing, macnemar_comparison)

from utilities.emotions_dataset import EmotionsDataset

import gc

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

In [None]:
# Map device to the available device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device("cpu")

## 2. Emotions Dataset


<h3 id="reviewing_data"> 2.1 About the dataset</h3>

**`text.csv.zip`:**
The dataset comprises emotional content (tweets), tweet and a single label per text. It has in total 40,000 tweets of emotional content. The emotions are classified into six categories: sadness (0), joy (1), love (2), anger (3), fear (4), and surprise (5).
The dataset can be sourced from:
https://www.kaggle.com/datasets/nelgiriyewithana/emotions/data

<h3> 2.2 Read dataset </h3>


In [None]:
df = pd.read_csv('text.csv.zip', index_col=[0])
df.head()

Unnamed: 0,text,label
0,i just feel really helpless and heavy hearted,4
1,ive enjoyed being able to slouch about relax a...,0
2,i gave up my internship with the dmrg and am f...,4
3,i dont know i feel so lost,0
4,i am a kindergarten teacher and i am thoroughl...,4


In [None]:
# Map label to emotion
label_to_emotion = {0: 'sadness', 1: 'joy', 2:'love', 3:'anger', 4:'fear', 5:'surprise'}

In [None]:
df['emotions'] = df['label'].map(label_to_emotion)

In [None]:
emotion_to_label = {v: k for k, v in label_to_emotion.items()}
print(emotion_to_label)

{'sadness': 0, 'joy': 1, 'love': 2, 'anger': 3, 'fear': 4, 'surprise': 5}


In [None]:
df_emotions = df[['text', 'label']]

In [None]:
import plotly.express as px
fig = px.bar(df.emotions.value_counts().reset_index(), x='emotions', y='count',
            title='Emotions distributions', width=500, height=400, template='simple_white',
            color_discrete_sequence=['#088F8F'],
            labels={"sentiment": "Emotions",
                     "count": "Frequency"})
fig.update_yaxes(showgrid=True)
fig.show()

## 3. Split Dataset

### 3.1 Balance Sampling

In [None]:
from utilities.balance_dataset import under_sampling_cat

In [None]:
undersampling = {0: 57000, 1:57000}

X_res, y_res = under_sampling_cat(df_emotions.text.values.reshape(-1, 1),
                                  df_emotions.label.values, seed,
                                  strategy=undersampling)

In [None]:
final_sample = {label_to_emotion[k]: v for k, v in sorted(Counter(y_res).items(),
                                                          key=lambda item:item[1], reverse=True)}
print("Resample shape:")
print(final_sample)


Resample shape:
{'anger': 57317, 'sadness': 57000, 'joy': 57000, 'fear': 47712, 'love': 34554, 'surprise': 14972}


In [None]:
fig_re = px.bar(x=list(final_sample.keys()), y=list(final_sample.values()),
            title='Emotions distributions after resampling', width=600, height=400, template='simple_white',
            color_discrete_sequence=['#088F8F'],
            labels={"x": "Emotions",
                     "y": "Frequency"})
fig_re.update_yaxes(showgrid=True)
fig_re.show()


### 3.2 Split Dataset

In [None]:
X = X_res.reshape(-1)
y = y_res
X_train, X_val, y_train, y_val = train_test_split(X_res, y_res, test_size=0.20, stratify=y_res,
                                                  random_state= seed)

In [None]:
X_val, X_test, y_val, y_test  = train_test_split(X_val, y_val, test_size=0.40,
                                                 stratify=y_val, random_state= seed)

## 4. Prepare Training

In [None]:
from peft.utils import constants

In [None]:
training_args = TrainingArguments(
    output_dir=None,
    learning_rate=1e-4,# output directory
    num_train_epochs=2,
    use_cpu=True if device.type == 'cpu' else False,
    # dataloader_num_workers=4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=4,
    group_by_length = True,
    logging_steps = 5,
    weight_decay=0.05,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True,
    report_to="none",
)

In [None]:
target_modules_map = constants.TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING

In [None]:
def main(trainer_args, **kwargs):

    name = kwargs['check_point'].split("/")[-1].lower()

    trainer_args.output_dir = f"{kwargs['output_dir']}/peft-{name}"
    trainer_args.logging_dir = f"{kwargs['output_dir']}/peft-{name}/logs"

    tokenizer = build_tokenizer(kwargs['check_point'], kwargs['max_length'])

    train_dataset = EmotionsDataset(kwargs['x_train'], kwargs['y_train'], tokenizer, kwargs['max_length'])
    val_dataset = EmotionsDataset(kwargs['x_val'], kwargs['y_val'], tokenizer, kwargs['max_length'])

    data_collator = collate_func(tokenizer=tokenizer)

    model = seq_class_init(kwargs['check_point'],
                           num_labels=kwargs['num_labels'],
                           id2label=kwargs['id2label'],
                           label2id=kwargs['label2id'], device=kwargs['device'],
                           quantized=kwargs['quantized'])


    model = model_postprocessing(model, name)


    if 'modernbert' in name:
        target_modules = ["Wqkv", "Wo", "Wi"]

    else:
        target_modules = target_modules_map[name.split('-')[0]]


    peft_config = lora_peft(target_modules=target_modules)
    peft_config.inference_model = False

    peft_model = get_peft_model(model, peft_config)
    peft_model.config.use_cache = False

    llm_trainer = Trainer(
    model=peft_model,
    args=trainer_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_classification_metrics)

    print("")
    print("------------------------------Training-----------------------------")
    print("")

    print(f"PEFT {name.capitalize()} Model")
    peft_model.print_trainable_parameters()
    return llm_trainer


In [None]:
check_points = ["roberta-base", "facebook/opt-350m", "answerdotai/ModernBERT-base"]

In [None]:
params = {'check_point': check_points[0], 'quantized': False, 'num_labels': 6, 'x_train': X_train, 'y_train': y_train,
          'x_val': X_val, 'y_val': y_val, 'max_length': 128, 'id2label': label_to_emotion, 'label2id': emotion_to_label,
          'device': device, 'output_dir': './fine-tuned-lora',
          'modules_to_save': ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias',
                              'classifier.out_proj.weight']}

## 5. Start Training

### 5.1 Roberta

In [None]:
train_roberta = main(training_args, **params)
train_roberta.train()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.



------------------------------Training-----------------------------

PEFT Roberta-base Model
trainable params: 890,118 || all params: 125,540,364 || trainable%: 0.7090


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Matthews Correlation
1,0.1737,0.163268,0.940264,0.945474,0.940264,0.94106,0.927441
2,0.0716,0.140079,0.941867,0.947099,0.941867,0.942529,0.929445


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.56k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.38k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.79k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.60k [00:00<?, ?B/s]

TrainOutput(global_step=53712, training_loss=0.20948075339874223, metrics={'train_runtime': 3876.4366, 'train_samples_per_second': 110.846, 'train_steps_per_second': 13.856, 'total_flos': 2.855915201396045e+16, 'train_loss': 0.20948075339874223, 'epoch': 2.0})

In [None]:
del train_roberta
torch.cuda.empty_cache()

### 5.2 Opt

In [None]:
params['check_point'] = check_points[1]
params['modules_to_save'] = ['classifier.bias', 'classifier.weight']
train_opt = main(training_args, **params)
train_opt.train()

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.



------------------------------Training-----------------------------

PEFT Opt-350m Model
trainable params: 789,504 || all params: 331,988,992 || trainable%: 0.2378


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Matthews Correlation
1,0.2043,0.15853,0.944298,0.94967,0.944298,0.945197,0.93238
2,0.125,0.140864,0.944557,0.947479,0.944557,0.944273,0.93241


TrainOutput(global_step=53712, training_loss=0.19073274353256395, metrics={'train_runtime': 8854.9182, 'train_samples_per_second': 48.525, 'train_steps_per_second': 6.066, 'total_flos': 1.0037145534549197e+17, 'train_loss': 0.19073274353256395, 'epoch': 2.0})

In [None]:
del train_opt
torch.cuda.empty_cache()

### 5.3 ModernBert

In [None]:
params['check_point'] = check_points[1]
params['modules_to_save'] = ['classifier.bias', 'classifier.weight']
train_mbert = main(training_args, **params)
train_mbert.train()

Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.



------------------------------Training-----------------------------

PEFT Modernbert-base Model
trainable params: 1,694,214 || all params: 151,303,692 || trainable%: 1.1197



TensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.



Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Matthews Correlation
1,0.1299,0.118515,0.945229,0.950781,0.945229,0.946141,0.93355
2,0.0617,0.113943,0.945901,0.950437,0.945901,0.946538,0.93428


TrainOutput(global_step=53712, training_loss=0.14810486959876915, metrics={'train_runtime': 9068.0348, 'train_samples_per_second': 47.385, 'train_steps_per_second': 5.923, 'total_flos': 3.716562276059674e+16, 'train_loss': 0.14810486959876915, 'epoch': 2.0})

In [None]:
del train_opt
torch.cuda.empty_cache()

## 6.0 Inference

In [None]:
from transformers import logging
logging.set_verbosity_error()

In [None]:
path_to_roberta_adapter = "fine-tuned-lora/peft-roberta-base/checkpoint-53712/"
path_to_opt_adapter = "fine-tuned-lora/peft-opt-350m/checkpoint-53712/"
path_to_modernbert_adapter = "fine-tuned-lora/peft-modernbert-base/checkpoint-53712/"

In [None]:
def inference_dataset(model_name, max_length, text, labels):

    tokenizer = build_tokenizer(model_name, max_length)
    data_collator = collate_func(tokenizer)
    test_dataset = EmotionsDataset(text, labels, tokenizer, max_length)
    eval_dataloader = DataLoader(test_dataset, shuffle=False,
                             collate_fn=data_collator, batch_size=8)

    return eval_dataloader

### 6.1 RoBERTa-LoRA

In [None]:
peft_model_id = "fine-tuned-lora/peft-roberta-base/checkpoint-53712"

inference_model, config = get_lora_model_for_seq_class(peft_model_id, num_labels=params['num_labels'],
                                          label2id=params['label2id'], id2label=params['id2label'])

model_name = config.base_model_name_or_path
test_dataloader = inference_dataset(model_name, max_length=params['max_length'],
                                            text=X_test, labels=y_test)

rpeft_result, r_pred = predict(inference_model, test_dataloader, model_name, device=device)


config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

100%|██████████| 2686/2686 [02:23<00:00, 18.74it/s]


In [None]:
rpeft_result

Unnamed: 0,model_name,accuracy,precision,recall,f1,matthews_correlation
0,roberta-base,0.94438,0.949431,0.94438,0.945051,0.932478


### 6.2 Opt-350m-LoRA

In [None]:
peft_model_id = "fine-tuned-lora/peft-opt-350m/checkpoint-53712"

inference_model, config = get_lora_model_for_seq_class(peft_model_id, num_labels=params['num_labels'],
                                          label2id=params['label2id'], id2label=params['id2label'])

model_name = config.base_model_name_or_path
test_dataloader = inference_dataset(model_name, max_length=params['max_length'],
                                            text=X_test, labels=y_test)

optpeft_result, opt_pred = predict(inference_model, test_dataloader, model_name, device=device)

config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/663M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/662M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

100%|██████████| 2686/2686 [07:53<00:00,  5.68it/s]


In [None]:
optpeft_result

Unnamed: 0,model_name,accuracy,precision,recall,f1,matthews_correlation
0,facebook/opt-350m,0.947079,0.949574,0.947079,0.946699,0.93545


### 6.3 ModernBERT-LoRA

In [None]:
peft_model_id = "fine-tuned-lora/peft-modernbert-base/checkpoint-53712"

inference_model, config = get_lora_model_for_seq_class(peft_model_id, num_labels=params['num_labels'],
                                          label2id=params['label2id'], id2label=params['id2label'])

model_name = config.base_model_name_or_path
test_dataloader = inference_dataset(model_name, max_length=params['max_length'],
                                            text=X_test, labels=y_test)

mbertpeft_result, mbert_pred = predict(inference_model, test_dataloader, model_name, device=device)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.



config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/599M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

  0%|          | 0/2686 [00:00<?, ?it/s]W0401 17:50:48.192000 4646 torch/_inductor/utils.py:1137] [4/0] Not enough SMs to use max_autotune_gemm mode
100%|██████████| 2686/2686 [04:13<00:00, 10.58it/s]


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [None]:
mbertpeft_result

Unnamed: 0,model_name,accuracy,precision,recall,f1,matthews_correlation
0,answerdotai/ModernBERT-base,0.947172,0.951624,0.947172,0.947861,0.935787


In [None]:
del test_loader
torch.cuda.empty_cache()

## 7. Statistical Comparison

In [None]:
compare_rob_opt = stats.kruskal(r_pred, opt_pred)
compare_rob_mbert = stats.kruskal(r_pred, mbert_pred)
compare_opt_mbert = stats.kruskal(opt_pred, mbert_pred)

In [None]:
print(f'RoBERTa-LoRA vs OPT-350m-LoRA, pvalue: {float(compare_rob_opt.pvalue):.4f}'),
print('-----------------' * 3)
print(f'RoBERTa-LoRA vs ModernBERT-LoRA, pvalue: {float(compare_rob_mbert.pvalue):.4f}')
print('-----------------' * 3)
print(f'ModernBERT-LoRA vs OPT-350m-LoRA, pvalue: {float(compare_opt_mbert.pvalue):.4f}')

RoBERTa-LoRA vs OPT-350m-LoRA, pvalue: 0.1539
---------------------------------------------------
RoBERTa-LoRA vs ModernBERT-LoRA, pvalue: 0.8775
---------------------------------------------------
ModernBERT-LoRA vs OPT-350m-LoRA, pvalue: 0.2053


- We fail to reject the $H_0$ that the performance of the models is significantly different.
- No further post-hoc test is required.

## 8. Sample random prediction

In [None]:
random_sample = np.random.randint(0, len(X_test), 5)

for i in random_sample:

    print(f"Tweet: {X_test[i][0]}")
    print(f"Emotion: {label_to_emotion[y_test[i]]}")
    print("")
    print(f"Opt-350 predicted: {label_to_emotion[opt_pred[i]]}")
    print(f"ModernBERT-base predicted: {label_to_emotion[mbert_pred[i]]}")
    print(f"RoBERTa-base predicted: {label_to_emotion[r_pred[i]]}")
    print("--------------------" * 3)


Tweet: i feel most fearful and doubtful
Emotion: fear

Opt-350 predicted: fear
ModernBERT-base predicted: fear
RoBERTa-base predicted: fear
------------------------------------------------------------
Tweet: i feel no progression in our relationship and i am not sure we are even aiming towards the same thing
Emotion: joy

Opt-350 predicted: joy
ModernBERT-base predicted: joy
RoBERTa-base predicted: joy
------------------------------------------------------------
Tweet: i feel like im more humorous than i used to be
Emotion: joy

Opt-350 predicted: joy
ModernBERT-base predicted: joy
RoBERTa-base predicted: joy
------------------------------------------------------------
Tweet: i realize that being expelled from college may be a lesser punishment than five years in jail but if you feel you are innocent it might be worth the rolling of the dice
Emotion: joy

Opt-350 predicted: joy
ModernBERT-base predicted: joy
RoBERTa-base predicted: joy
--------------------------------------------------

In [None]:
gc.collect()

5902