# Fine-Tuning LLM 

### For sentiment analysis used to classify people with mental health statuses

## 0. Imports

In [None]:
import numpy as np
import pandas as pd
import unsloth
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer, SFTConfig
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
Unsloth: Failed to patch Gemma3ForConditionalGeneration.
🦥 Unsloth Zoo will now patch everything to make training faster!


## 1. Loading the Model

In [None]:
from unsloth import FastLanguageModel

max_seq_length = 1024 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = HF_TOKEN, # use one if using gated models like meta-llama/Llama-2-7b-hf
)

# Set model configuration parameters similar to reference code
model.config.use_cache = False
model.config.pretraining_tp = 1

# Ensure pad token is set correctly
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 4090. Num GPUs = 1. Max memory: 23.542 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


## 2. Setting up wandb to track training

In [None]:
import wandb

wb_token = WANDB_TOKEN

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune llama-3.1-8b-it on Sentiment Analysis Dataset', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/makers/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdionyichia[0m ([33mdionyichia-nanyang-technological-university-singapore[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


## 3. Loading Dataset

In [None]:
# Read the data
df = pd.read_csv("./balanced_dataset.csv", index_col="Unnamed: 0")
df.head()

# Rebalance classes - Check to ensure that all statuses have the same number of statements
min_class_size = df["status"].value_counts().min()
df = df.groupby("status", group_keys=False).apply(lambda x: x.sample(n=min_class_size, random_state=85))
df = df.reset_index(drop=True)

print("\nBalanced class distribution:\n", df["status"].value_counts())

# Create a stratified train-eval-test split
# First split into train and temp (which will become eval and test)
features = df.drop("status", axis=1)  # All columns except status
target = df["status"]

X_train, X_temp, y_train, y_temp = train_test_split(
    features, 
    target, 
    test_size=0.2,  # 20% for eval+test combined
    random_state=42, 
    stratify=target  # This ensures class balance is maintained
)

# Then split temp into eval and test (50% each, which gives us 10% of original data for each)
X_eval, X_test, y_eval, y_test = train_test_split(
    X_temp, 
    y_temp,
    test_size=0.5,
    random_state=42,
    stratify=y_temp  # This maintains class balance again
)

# Combine features and targets back for prompt generation
train_df = X_train.copy()
train_df["status"] = y_train
train_df = train_df.reset_index(drop=True)

eval_df = X_eval.copy()
eval_df["status"] = y_eval
eval_df = eval_df.reset_index(drop=True)

test_df = X_test.copy()
test_df["status"] = y_test
test_df = test_df.reset_index(drop=True)

# Verify class distribution in each split
print("\nTrain set class distribution:")
print(train_df["status"].value_counts())

print("\nEval set class distribution:")
print(eval_df["status"].value_counts())

print("\nTest set class distribution:")
print(test_df["status"].value_counts())


Balanced class distribution:
 status
Anxiety       3888
Depression    3888
Normal        3888
Name: count, dtype: int64

Train set class distribution:
status
Anxiety       3111
Normal        3110
Depression    3110
Name: count, dtype: int64

Eval set class distribution:
status
Depression    389
Normal        389
Anxiety       388
Name: count, dtype: int64

Test set class distribution:
status
Normal        389
Anxiety       389
Depression    389
Name: count, dtype: int64


  df = df.groupby("status", group_keys=False).apply(lambda x: x.sample(n=min_class_size, random_state=85))


## 4. Prepping data for training

In [6]:
# Define the prompt generation functions
def generate_prompt(data_point):
    return f"""
            Classify the text into Normal, Depression, Anxiety, Bipolar, and return the answer as the corresponding mental health disorder label.
text: {data_point["statement"]}
label: {data_point["status"]}""".strip()

def generate_test_prompt(data_point):
    return f"""
            Classify the text into Normal, Depression, Anxiety, Bipolar, and return the answer as the corresponding mental health disorder label.
text: {data_point["statement"]}
label: """.strip()

# Generate prompts for training and evaluation data
train_df.loc[:,'text'] = train_df.apply(generate_prompt, axis=1)
eval_df.loc[:,'text'] = eval_df.apply(generate_prompt, axis=1)

# For test data, save the true labels separately before generating prompts
y_true = test_df["status"].copy()
test_prompts = test_df.apply(generate_test_prompt, axis=1)
test_df = pd.DataFrame({"text": test_prompts})

# Convert to datasets
train_data = Dataset.from_pandas(train_df[["text"]])# Convert to datasets
eval_data = Dataset.from_pandas(eval_df[["text"]])
test_data = Dataset.from_pandas(test_df)

print("\nFinal dataset shapes:")
print(f"Train: {train_data.shape}")
print(f"Eval: {eval_data.shape}")
print(f"Test: {test_data.shape}")


Final dataset shapes:
Train: (9331, 1)
Eval: (1166, 1)
Test: (1167, 1)


In [7]:
train_data['text'][3]

'Classify the text into Normal, Depression, Anxiety, Bipolar, and return the answer as the corresponding mental health disorder label.\ntext: i apologized to her, but i could tell she was still upset.\nlabel: Normal'

In [8]:
def predict(test, model, tokenizer):
    y_pred = []
    categories = ["Normal", "Depression", "Anxiety", "Bipolar"]
    
    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["text"]
        pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer, 
                        max_new_tokens=2, 
                        temperature=0.1)
        
        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()
        
        # Determine the predicted category
        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")
    
    return y_pred

y_pred = predict(test_df, model, tokenizer)

  0%|          | 0/1167 [00:00<?, ?it/s]Device set to use cuda:0
  0%|          | 1/1167 [00:02<39:30,  2.03s/it]Device set to use cuda:0
  0%|          | 2/1167 [00:02<17:45,  1.09it/s]Device set to use cuda:0
  0%|          | 3/1167 [00:02<10:45,  1.80it/s]Device set to use cuda:0
  0%|          | 4/1167 [00:02<07:25,  2.61it/s]Device set to use cuda:0
  0%|          | 5/1167 [00:02<05:33,  3.49it/s]Device set to use cuda:0
  1%|          | 6/1167 [00:02<04:26,  4.36it/s]Device set to use cuda:0
  1%|          | 7/1167 [00:02<03:53,  4.97it/s]Device set to use cuda:0
  1%|          | 8/1167 [00:02<03:22,  5.72it/s]Device set to use cuda:0
  1%|          | 9/1167 [00:03<03:04,  6.26it/s]Device set to use cuda:0
  1%|          | 10/1167 [00:03<02:50,  6.80it/s]Device set to use cuda:0
  1%|          | 11/1167 [00:03<02:39,  7.23it/s]Device set to use cuda:0
  1%|          | 12/1167 [00:03<02:34,  7.48it/s]Device set to use cuda:0
  1%|          | 13/1167 [00:03<02:29,  7.74it/s]Device 

## 5. Evaluation of Pre-fine tuned Model

In [9]:
def evaluate(y_true, y_pred):
    labels = ["Normal", "Depression", "Anxiety"]
    mapping = {label: idx for idx, label in enumerate(labels)}
    
    def map_func(x):
        return mapping.get(x, -1)  # Map to -1 if not found, but should not occur with correct data
    
    y_true_mapped = np.vectorize(map_func)(y_true)
    y_pred_mapped = np.vectorize(map_func)(y_pred)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f'Accuracy: {accuracy:.3f}')
    
    # Generate accuracy report
    unique_labels = set(y_true_mapped)  # Get unique labels
    
    for label in unique_labels:
        label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]
        label_y_true = [y_true_mapped[i] for i in label_indices]
        label_y_pred = [y_pred_mapped[i] for i in label_indices]
        label_accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {labels[label]}: {label_accuracy:.3f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels, labels=list(range(len(labels))))
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels))))
    print('\nConfusion Matrix:')
    print(conf_matrix)

evaluate(y_true, y_pred)

Accuracy: 0.730
Accuracy for label Normal: 0.746
Accuracy for label Depression: 0.853
Accuracy for label Anxiety: 0.591

Classification Report:
              precision    recall  f1-score   support

      Normal       0.76      0.75      0.75       389
  Depression       0.67      0.85      0.75       389
     Anxiety       0.81      0.59      0.68       389

   micro avg       0.73      0.73      0.73      1167
   macro avg       0.75      0.73      0.73      1167
weighted avg       0.75      0.73      0.73      1167


Confusion Matrix:
[[290  82  17]
 [ 14 332  38]
 [ 76  83 230]]


## 6. Setting training and config parameters

In [10]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)
modules = find_all_linear_names(model)
modules

['k_proj', 'q_proj', 'down_proj', 'gate_proj', 'o_proj', 'up_proj', 'v_proj']

In [11]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = modules,
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
)

Unsloth 2025.3.19 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [12]:
# # 3. Optimize LoRA configuration - reduce rank and alpha
# peft_config = LoraConfig(
#     lora_alpha=8,  # Reduced from 16
#     lora_dropout=0,
#     r=16,  # Reduced from 64
#     bias="none",
#     task_type="CAUSAL_LM",
#     target_modules=modules,
# )

In [13]:
output_dir="llama-3.1-fine-tuned-model"

# 4. Optimize training parameters
training_arguments = SFTConfig(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=16,  # Increased from 8
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",  # Use 8-bit optimizer to save memory
    logging_steps=20,  # Reduced logging frequency
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=False,
    lr_scheduler_type="cosine",
    report_to="wandb",
    eval_strategy="steps",
    eval_steps=0.5,  # Less frequent evaluation
    dataset_text_field="text",
    max_seq_length=256,  # Reduced sequence length
    packing=False,
    dataset_kwargs={
        "add_special_tokens": False,
        "append_concat_token": False,
    },
)

# 5. Clear cache before training
import gc
import torch
gc.collect()
torch.cuda.empty_cache()

# Set up trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=eval_data,
    # peft_config=peft_config,
    processing_class=tokenizer,
    args=training_arguments,
)

Unsloth: Tokenizing ["text"] (num_proc=8):   0%|          | 0/9331 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=8):   0%|          | 0/1166 [00:00<?, ? examples/s]

## 7. Train the model

In [14]:
# Add manual garbage collection during training
old_step = trainer.training_step

def training_step_with_gc(*args, **kwargs):
    if trainer.state.global_step % 50 == 0:
        gc.collect()
        torch.cuda.empty_cache()
    return old_step(*args, **kwargs)

trainer.training_step = training_step_with_gc

# Train and save
trainer_stats = trainer.train()
wandb.finish()
model.config.use_cache = True
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 9,331 | Num Epochs = 1 | Total steps = 583
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 16
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 16 x 1) = 16
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Step,Training Loss,Validation Loss
292,2.1172,2.042768


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
train/grad_norm,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,█████▇▇▇▇▆▆▆▅▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁
train/loss,█▃▂▂▂▂▂▂▂▂▂▂▁▂▁▂▁▂▂▁▁▁▁▁▁▁▁▂▁

0,1
eval/loss,2.04277
eval/runtime,42.8131
eval/samples_per_second,27.235
eval/steps_per_second,6.82
total_flos,5.316330584776704e+16
train/epoch,0.99968
train/global_step,583.0
train/grad_norm,0.40559
train/learning_rate,0.0
train/loss,2.0802


('llama-3.1-fine-tuned-model/tokenizer_config.json',
 'llama-3.1-fine-tuned-model/special_tokens_map.json',
 'llama-3.1-fine-tuned-model/tokenizer.json')

## 8. Evaluate trained model

In [15]:
y_pred = predict(test_df, model, tokenizer)
evaluate(y_true, y_pred)

  0%|          | 0/1167 [00:00<?, ?it/s]Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'Glm4ForCausalLM', 'GotOcr2ForConditionalGeneration', 'GPT2LMHeadModel', 'GPT2LMHeadMo

Accuracy: 0.955
Accuracy for label Normal: 0.982
Accuracy for label Depression: 0.938
Accuracy for label Anxiety: 0.943

Classification Report:
              precision    recall  f1-score   support

      Normal       0.96      0.98      0.97       389
  Depression       0.94      0.94      0.94       389
     Anxiety       0.97      0.94      0.95       389

   micro avg       0.96      0.95      0.96      1167
   macro avg       0.96      0.95      0.96      1167
weighted avg       0.96      0.95      0.96      1167


Confusion Matrix:
[[382   6   1]
 [  9 365  12]
 [  5  17 367]]


## 9. Merge Adaptor with Base Model

In [18]:
# Cell 2: Load your fine-tuned adapter model
output_dir = "llama-3.1-fine-tuned-model"
model, tokenizer = FastLanguageModel.from_pretrained(
    output_dir,
    device_map="auto",
)

# Cell 3: Merge adapter weights with base model
print("Merging adapter weights with base model...")
merged_model = model.merge_and_unload()
print("Merge completed!")

# Cell 4: Save the merged model locally
merged_output_dir = "llama-3.1-merged-model"
print(f"Saving merged model to {merged_output_dir}...")
merged_model.save_pretrained(merged_output_dir)
tokenizer.save_pretrained(merged_output_dir)
print("Merged model saved locally!")

==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 4090. Num GPUs = 1. Max memory: 23.542 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Merging adapter weights with base model...




Merge completed!
Saving merged model to llama-3.1-merged-model...
Merged model saved locally!


## 10. Visualise Fine Tuned Output

In [None]:
text = "I'm trapped in a storm of emotions that I can't control, and it feels like no one understands the chaos inside me"
prompt = f"""Classify the text into Normal, Depression, Anxiety, and return the answer as the corresponding mental health disorder label.
text: {text}
label: """.strip()

pipe = pipeline(
    "text-generation",
    model=merged_model,
    tokenizer=tokenizer,
    device_map="auto",
)

outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)
print(outputs[0]["generated_text"].split("label: ")[-1].strip())

Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'Glm4ForCausalLM', 'GotOcr2ForConditionalGeneration', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoFo

Depression


## 11. Push to HuggingFace repo

In [None]:
from huggingface_hub import login

# Cell A5: Login to Hugging Face Hub
HF_TOKEN = HF_TOKEN  # Replace with your token
login(token=HF_TOKEN)

# Cell 6: Push the merged model to Hugging Face Hub
model_id = "fiendfrye/mental-status-classifier-lama-3.1-8b-fine-tuned"  # Replace with your desired repo name

# Push to hub - this might take a while depending on your connection speed
merged_model.push_to_hub(
    model_id,
    tokenizer=tokenizer,
    private=False,  # Set to True if you want a private repository
    commit_message="Upload merged fine-tuned Llama 3.1 8B model. This model is fine tuned for SC1015 DSAI, based on text input it classifies the mental health status of the user to one of 3 categories: Normal, Depression, Anxiety."
)

print(f"Merged model successfully uploaded to https://huggingface.co/{model_id}")

README.md:   0%|          | 0.00/608 [00:00<?, ?B/s]

  0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.05G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.65G [00:00<?, ?B/s]

Saved model to https://huggingface.co/fiendfrye/mental-status-classifier-lama-3.1-8b-fine-tuned
Merged model successfully uploaded to https://huggingface.co/fiendfrye/mental-status-classifier-lama-3.1-8b-fine-tuned


## 12. Create Model Card

In [21]:
# Cell 7: Create model card with details about your fine-tuning
model_card = """
---
language:
- en
tags:
- llama
- llama-3.1
- fine-tuned
- text-generation
library_name: transformers
pipeline_tag: text-generation
license: cc-by-nc-4.0
---

# Fine-tuned Llama 3.1 8B

This model is a fine-tuned version of the [Meta Llama 3.1 8B Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) model.

## Training Details

- **Base Model:** Meta-Llama-3.1-8B-Instruct
- **Fine-tuning Method:** QLoRA with Unsloth optimization
- **Parameters:**
  - LoRA rank: 16
  - LoRA alpha: 16
  - Batch size: 4
  - Learning rate: 2e-4
  - Epochs: 1

## Intended Use

This model is fine-tuned on this [Kaggle Dataset on mental health sentiment anaylsis](https://www.kaggle.com/datasets/suchintikasarkar/sentiment-analysis-for-mental-health) to predict a user's mental health status from their input.
The model classifies the user to 1 of 3 categories; 1. Normal, 2. Depression, 3. Anxiety.

**This was developed as part of a larger project, creating a classification system to determine students' mental health status. 
***This was developed for SC1015 by Chia Dion Yi.

## Example Usage

```python
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "{model_id}"
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)

text = "I'm trapped in a storm of emotions that I can't control, and it feels like no one understands the chaos inside me"
prompt = f"Classify the text into Normal, Depression, Anxiety, and return the answer as the corresponding mental health disorder label.
text: {text}
label: ".strip()

prompt = "I'm trapped in a storm of emotions that I can't control, and it feels like no one understands the chaos inside me"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
)

outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)
print(outputs[0]["generated_text"].split("label: ")[-1].strip())
```

## Limitations
-Nil

"""

# Save model card
with open("README.md", "w") as f:
    f.write(model_card)

# Push the model card to the hub
from huggingface_hub import HfApi
api = HfApi()
api.upload_file(
    path_or_fileobj="README.md",
    path_in_repo="README.md",
    repo_id=model_id,
    repo_type="model",
)

print("Model card uploaded successfully!")

Model card uploaded successfully!
