# Fine Tune Llama 3.2 3b

In [None]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

from data import go_emotions


## Load data

In [None]:
with open('data/go_emotions/emotions.txt', "r") as file:
    lines = file.readlines()

# Remove any trailing newline characters
l_emotions = [line.strip() for line in lines]

d_go_emotions = {i:e for i,e in enumerate(l_emotions)}

In [None]:
df_train = go_emotions['train']
df_test = go_emotions['test']
df_val = go_emotions['val']

In [None]:
for data in [df_train, df_test, df_val]:
    data['l_emotions'] = data['labels'].apply(lambda x: [d_go_emotions[label] for label in x])
    data['emotions'] = data['l_emotions'].apply(str)

In [None]:
# Define the prompt generation functions
def generate_prompt(data_point):
    return f"""
            Classify the text into {' ,'.join(l_emotions)}, and return the answer as the corresponding emotion label.
text: {data_point["text"]}
label: {data_point["emotions"]}""".strip()

def generate_test_prompt(data_point):
    return f"""
            Classify the text into Normal, Depression, Anxiety, Bipolar, and return the answer as the corresponding mental health disorder label.
text: {data_point["text"]}
label: """.strip()

# Generate prompts for training and evaluation data
df_train.loc[:,'prompt'] = df_train.apply(generate_prompt, axis=1)
df_val.loc[:,'prompt'] = df_val.apply(generate_prompt, axis=1)

# Generate test prompts and extract true labels
y_test = df_test.loc[:,'emotions']
X_test = pd.DataFrame(df_test.apply(generate_test_prompt, axis=1), columns=["prompt"])

In [None]:
df_train.emotions.value_counts()

In [None]:
# Convert to datasets
train_data = Dataset.from_pandas(df_train[["prompt"]])
eval_data = Dataset.from_pandas(df_val[["prompt"]])

In [None]:
train_data['text'][3]

In [None]:
base_model_name = "/kaggle/input/llama-3.1/transformers/8b-instruct/1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype="float16",
    quantization_config=bnb_config, 
)

model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(base_model_name)

tokenizer.pad_token_id = tokenizer.eos_token_id

In [None]:
def predict(test, model, tokenizer):
    y_pred = []
    categories = l_emotions
    
    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["text"]
        pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer, 
                        max_new_tokens=2, 
                        temperature=0.1)
        
        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()
        
        # Determine the predicted category
        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")
    
    return y_pred

y_pred = predict(X_test, model, tokenizer)

In [None]:
def evaluate(y_true, y_pred):
    labels = ["Normal", "Depression", "Anxiety", "Bipolar"]
    mapping = {label: idx for idx, label in enumerate(labels)}
    
    def map_func(x):
        return mapping.get(x, -1)  # Map to -1 if not found, but should not occur with correct data
    
    y_true_mapped = np.vectorize(map_func)(y_true)
    y_pred_mapped = np.vectorize(map_func)(y_pred)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f'Accuracy: {accuracy:.3f}')
    
    # Generate accuracy report
    unique_labels = set(y_true_mapped)  # Get unique labels
    
    for label in unique_labels:
        label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]
        label_y_true = [y_true_mapped[i] for i in label_indices]
        label_y_pred = [y_pred_mapped[i] for i in label_indices]
        label_accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {labels[label]}: {label_accuracy:.3f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels, labels=list(range(len(labels))))
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels))))
    print('\nConfusion Matrix:')
    print(conf_matrix)

evaluate(y_true, y_pred)