### References
- Finetune Llama 3 for Sentiment Analysis (https://www.kaggle.com/code/lucamassaron/fine-tune-llama-3-for-sentiment-analysis)
- Finetune Llama 2 for Sentiment Analysis (https://www.kaggle.com/code/lucamassaron/fine-tune-llama-2-for-sentiment-analysis)

In [1]:
!nvidia-smi

Fri Oct 25 13:14:42 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   52C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

# Libraries

In [2]:
%pip install -q -U datasets transformers accelerate bitsandbytes

In [3]:
import os
import random
import numpy as np
import pandas as pd
import torch
import transformers
import bitsandbytes as bnb
from datasets import load_dataset, Dataset
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, pipeline
from sklearn.metrics import f1_score

# Config

In [4]:
seed = 42
lang = 'eng'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Might not work on Kaggle
# model_id = 'meta-llama/Llama-3.2-1B-Instruct'
# model_id = 'meta-llama/Llama-3.2-3B-Instruct'
model_id = 'meta-llama/Llama-3.1-8B-Instruct'

Disabling two features in PyTorch related to memory efficiency and speed during operations on the Graphics Processing Unit (GPU) specifically for the scaled dot product attention (SDPA) function.

In [5]:
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

In [6]:
def set_seed(seed):
    # Set random seed for NumPy
    np.random.seed(seed)

    # Set random seed for Torch
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if using multi-GPU
    torch.backends.cudnn.deterministic = True  # Ensures deterministic results
    torch.backends.cudnn.benchmark = False  # Avoids non-deterministic algorithms

    # Set random seed for Transformers
    transformers.set_seed(seed)

    # Optionally set random seed for sklearn and Python's own random module
    random.seed(seed)

    # Set random seed for os
    os.environ['PYTHONHASHSEED'] = str(seed)

    print(f"Random seed set to: {seed}")

set_seed(seed)

Random seed set to: 42


# Data

## Load Data

In [7]:
data_files = {
    'train': f'preprocessed_data/train/{lang}.csv', 
    'val': f'preprocessed_data/val/{lang}.csv',
    'test': f'preprocessed_data/test/{lang}.csv',
}
dataset = load_dataset('alxxtexxr/SemEval2025-Task11-Dataset', data_files=data_files)

splits = data_files.keys()
df = {split: pd.DataFrame(dataset[split]) for split in splits}

cols = list(df['train'].columns)
print("DF columns:", cols)

emotion_cols = [col for col in cols if col not in ['Unnamed: 0', 'text', 'emotion']]
# neutral_emotion = df['train'][df['train'][emotion_cols].sum(axis=1) == 0]['emotion'].iloc[0]
# emotions = emotion_cols + [neutral_emotion]
print("Emotions columns:", emotion_cols)
print()

print("Train DF size:", len(df['train']))
print("Validation DF size:", len(df['val']))
print("Testing DF size:", len(df['test']))

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


DF columns: ['Unnamed: 0', 'text', 'emotion', 'anger', 'fear', 'joy', 'sad', 'surprise']
Emotions columns: ['anger', 'fear', 'joy', 'sad', 'surprise']

Train DF size: 2214
Validation DF size: 554
Testing DF size: 116


## Create One-Hot Emotion Data

In [8]:
# df['train']['one_hot_emotion'] = df['train'].apply(lambda row: row[emotion_cols].tolist(), axis=1).tolist()
# df['val']['one_hot_emotion'] = df['val'].apply(lambda row: row[emotion_cols].tolist(), axis=1).tolist()

# df['val']['one_hot_emotion']

## Create Prompt Data

In [9]:
prompt_template = """### Instruction
Detect the emotion(s) in the given input text. 
The detected emotion(s) can be one or a combination of the following: anger, fear, joy, sad, surprise, or neutral

### Input
Text: {text}

### Output
Emotion(s): {emotion}"""

def create_prompt(row):
    emotion_list = row['emotion'].replace(" ", "").split(",")
    emotion = ", ".join([f"{e}" for e in row['emotion'].replace(" ", "").split(",")])#[1:]#+ "]"
    # emotion = '\n'.join([f"- {e}" for e in emotion_list])[2:]
    return prompt_template.format(text=row['text'], emotion=emotion).strip()

def create_test_prompt(row):
    return prompt_template.format(text=row['text'], emotion="").strip()

df['train']['prompt'] = df['train'].apply(create_prompt, axis=1)
df['val']['prompt'] = df['val'].apply(create_test_prompt, axis=1)
df['test']['prompt'] = df['test'].apply(create_test_prompt, axis=1)

print("Train prompts:\n")
for prompt in df['train']['prompt'].head(3):
    print(prompt)
    print("================================================================================================================================================================================================")
print()
print("Testing prompts:\n")
for prompt in df['test']['prompt'].head(3):
    print(prompt)
    print("================================================================================================================================================================================================")

Train prompts:

### Instruction
Detect the emotion(s) in the given input text. 
The detected emotion(s) can be one or a combination of the following: anger, fear, joy, sad, surprise, or neutral

### Input
Text: I now have 12 of those canker sore suckers in my mouth along with a fever since friday.

### Output
Emotion(s): fear, sad
### Instruction
Detect the emotion(s) in the given input text. 
The detected emotion(s) can be one or a combination of the following: anger, fear, joy, sad, surprise, or neutral

### Input
Text: It just... went away.

### Output
Emotion(s): fear, sad, surprise
### Instruction
Detect the emotion(s) in the given input text. 
The detected emotion(s) can be one or a combination of the following: anger, fear, joy, sad, surprise, or neutral

### Input
Text: I naively walked up and stuck my head in the driver's window hole.

### Output
Emotion(s): fear, surprise

Testing prompts:

### Instruction
Detect the emotion(s) in the given input text. 
The detected emotion(s

In [10]:
max_seq_lengths = {split: df[split]['prompt'].str.len().max() for split in splits}
max_seq_length = int(max(max_seq_lengths.values()))

print("Train max. prompt length:", max_seq_lengths['train'])
print("Validation max. prompt length:", max_seq_lengths['val'])
print("Testing max. prompt length:", max_seq_lengths['test'])
print()
print("Max. prompt length:", max_seq_length, f"({type(max_seq_length)})")

Train max. prompt length: 673
Validation max. prompt length: 586
Testing max. prompt length: 527

Max. prompt length: 673 (<class 'int'>)


## Create Hugging Face Datasets

In [11]:
datasets = {split: Dataset.from_pandas(df[split][['prompt']]) for split in ['train', 'val']}
datasets

{'train': Dataset({
     features: ['prompt'],
     num_rows: 2214
 }),
 'val': Dataset({
     features: ['prompt'],
     num_rows: 554
 })}

# Models

In [12]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map=device,
    torch_dtype='float16',
    quantization_config=BitsAndBytesConfig(load_in_8bit=True),
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

In [13]:
tokenizer = AutoTokenizer.from_pretrained(model_id, max_seq_length=max_seq_length)
tokenizer.pad_token_id = tokenizer.eos_token_id

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

# Evaluation without Finetuning 

In [14]:
y_true = df['val'].apply(lambda row: row[emotion_cols].tolist(), axis=1).tolist()
print(f"True Y ({len(y_true)}):")
y_true[:10]

True Y (554):


[[0, 1, 0, 1, 0],
 [0, 1, 0, 0, 1],
 [0, 0, 0, 1, 1],
 [0, 0, 0, 1, 0],
 [1, 1, 0, 0, 1],
 [0, 1, 0, 0, 0],
 [0, 1, 0, 0, 0],
 [0, 1, 0, 0, 1],
 [0, 1, 0, 1, 1],
 [1, 1, 0, 0, 0]]

In [15]:
def one_hot_encode_emotion(emotion, emotion_cols):
    emotions = emotion.replace(" ", "").split(",")
    one_hot_emotion = [1 if emotion_col in emotions else 0 for emotion_col in emotion_cols]
    return one_hot_emotion

def predict(df_, model, tokenizer, max_new_tokens=32, batch_size=128):
    prompt = df_['prompt'].tolist()
    pipe = pipeline(
        task='text-generation',
        model=model, 
        tokenizer=tokenizer,
        max_new_tokens=max_new_tokens,
        temperature=0.001,
        batch_size=batch_size,
    )
    outputs = pipe(prompt)
    pred_emotion_list = [output[0]['generated_text'].split("Emotion(s): ")[-1].split("\n")[0].lower() for output in outputs]
    y_pred = [one_hot_encode_emotion(pred_emotion_i, emotion_cols) for pred_emotion_i in pred_emotion_list]
    return y_pred

# max_new_tokens = df['train'].apply(lambda row: len(tokenizer.encode(row['emotion'])), axis=1).max()
# print("Max. emotion tokens:", max_new_tokens)

tokenizer.padding_side = 'left'

y_pred = predict(df['val'], model, tokenizer)
# y_pred_8bit = predict(df['val'], model_8bit, tokenizer)
# y_pred_4bit = predict(df['val'], model_4bit, tokenizer)
print(f"Predicted Y ({len(y_pred)}):")
y_pred[:10]

OutOfMemoryError: CUDA out of memory. Tried to allocate 320.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 249.06 MiB is free. Process 237319 has 14.50 GiB memory in use. Of the allocated memory 12.34 GiB is allocated by PyTorch, and 2.03 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
def evaluate(y_true, y_pred):
    # Compute F1 score for each type of averaging method
    f1_micro = f1_score(y_true, y_pred, average='micro', zero_division=0.0)
    f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0.0)
    # f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0.0)
    # f1_samples = f1_score(y_true, y_pred, average='samples', zero_division=0.0)
    f1_per_label = f1_score(y_true, y_pred, average=None, zero_division=0.0)


    print(f'F1 Score (Micro-Average): {f1_micro}')
    print(f'F1 Score (Macro-Average): {f1_macro}')
    print()
    for label, f1 in zip(emotion_cols, f1_per_label):
        print(f"F1 Score for '{label}': {f1}")

evaluate(y_true, y_pred)

F1 Score (Micro-Average): 0.42096642096642095
F1 Score (Macro-Average): 0.4202282200558063

F1 Score for 'anger': 0.6136363636363636
F1 Score for 'fear': 0.375
F1 Score for 'joy': 0.4630541871921182
F1 Score for 'sad': 0.5615384615384615
F1 Score for 'surprise': 0.08791208791208792
