In [1]:
# Install the unsloth package from the GitHub repository
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

Found existing installation: unsloth 2024.12.4
Uninstalling unsloth-2024.12.4:
  Successfully uninstalled unsloth-2024.12.4
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-w8_tybyc/unsloth_50b0c9ce64974e3e9b6eaea8abdd31e8
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-w8_tybyc/unsloth_50b0c9ce64974e3e9b6eaea8abdd31e8
  Resolved https://github.com/unslothai/unsloth.git to commit 85f1fa096afde5efe2fb8521d8ceec8d13a00715
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: unsloth
  Building wheel for unsloth (pyproject.toml) ... [?25ldone
[?25h  Created wheel for unsloth: filename=unsloth-2024.12.4-py3-none-any.whl

### Load the necessary packages

In [2]:

import torch
from trl import SFTTrainer
from transformers import TrainingArguments, TextStreamer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel
from datasets import Dataset
from unsloth import is_bfloat16_supported
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Warnings
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


### Load the base model from unsloth and prepare PEFT parameters

In [7]:


max_seq_length = 5020

# Load model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-1B-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

# Prepare training with peft
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
    use_rslora=True,
    use_gradient_checkpointing="unsloth",
    random_state = 32,
    loftq_config = None,
)
print(model.print_trainable_parameters())

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.47.1.
   \\   /|    GPU: NVIDIA GeForce RTX 4090. Max memory: 23.61 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2024.12.4 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


trainable params: 11,272,192 || all params: 1,247,086,592 || trainable%: 0.9039
None


### Prepare training prompts

In [8]:
data_prompt = """Analyze the provided text from a mental health perspective. Identify any indicators of emotional distress, coping mechanisms, or psychological well-being. Highlight any potential concerns or positive aspects related to mental health, and provide a brief explanation for each observation.

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompt(examples):
    inputs       = examples["Context"]
    outputs      = examples["Response"]
    texts = []
    for input_, output in zip(inputs, outputs):
        text = data_prompt.format(input_, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }


In [9]:
import pandas as pd

In [10]:
#Format the data for training

# Load the CSV data
filtered_data = pd.read_csv('./dataset/train_mental_health.csv')

training_data = Dataset.from_pandas(filtered_data)
training_data = training_data.map(formatting_prompt, batched=True)


Map: 100%|██████████| 2784/2784 [00:00<00:00, 147116.64 examples/s]


In [11]:
training_data[0]

{'Context': "I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here.\n   I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it.\n   How can I change my feeling of being worthless to everyone?",
 'Response': "If everyone thinks you're worthless, then maybe you need to find new people to hang out with.Seriously, the social context in which a person lives is a big influence in self-esteem.Otherwise, you can go round and round trying to understand why you're not worthless, then go back to the same crowd and be knocked down again.There are many inspirational messages you can find in social media. \xa0Maybe read some of the ones which state that no person is worthless, and that everyone has a good purpose to their life.Also, since our culture is so saturated with the belief that if someone doesn't feel good about themselves that this is someh

### Model Training 🤖

In [12]:
#Training Process
trainer=SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=training_data,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=True,
    args=TrainingArguments(
        learning_rate=3e-4,
        lr_scheduler_type="linear",
        per_device_train_batch_size=32,
        gradient_accumulation_steps=8,
        num_train_epochs=40,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        output_dir="output",
        seed=0,
    ),
)

trainer.train()

Generating train split: 182 examples [00:00, 765.41 examples/s]
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 182 | Num Epochs = 40
O^O/ \_/ \    Batch size per device = 32 | Gradient Accumulation steps = 8
\        /    Total batch size = 256 | Total steps = 40
 "-____-"     Number of trainable parameters = 11,272,192
  2%|▎         | 1/40 [01:30<58:31, 90.04s/it]

{'loss': 2.3746, 'grad_norm': 1.6996452808380127, 'learning_rate': 2.9999999999999997e-05, 'epoch': 1.0}


  5%|▌         | 2/40 [02:55<55:26, 87.55s/it]

{'loss': 2.3746, 'grad_norm': 1.699662446975708, 'learning_rate': 5.9999999999999995e-05, 'epoch': 2.0}


  8%|▊         | 3/40 [04:21<53:22, 86.54s/it]

{'loss': 2.3591, 'grad_norm': 1.4604734182357788, 'learning_rate': 8.999999999999999e-05, 'epoch': 3.0}


 10%|█         | 4/40 [05:46<51:44, 86.22s/it]

{'loss': 2.3037, 'grad_norm': 0.9553579092025757, 'learning_rate': 0.00011999999999999999, 'epoch': 4.0}


 12%|█▎        | 5/40 [07:12<50:12, 86.08s/it]

{'loss': 2.2506, 'grad_norm': 0.6224781274795532, 'learning_rate': 0.00015, 'epoch': 5.0}


 15%|█▌        | 6/40 [08:37<48:31, 85.63s/it]

{'loss': 2.2348, 'grad_norm': 0.655556321144104, 'learning_rate': 0.00017999999999999998, 'epoch': 6.0}


 18%|█▊        | 7/40 [10:02<47:00, 85.47s/it]

{'loss': 2.2091, 'grad_norm': 0.5509653091430664, 'learning_rate': 0.00020999999999999998, 'epoch': 7.0}


 20%|██        | 8/40 [11:27<45:33, 85.41s/it]

{'loss': 2.1879, 'grad_norm': 0.49977850914001465, 'learning_rate': 0.00023999999999999998, 'epoch': 8.0}


 22%|██▎       | 9/40 [12:52<44:00, 85.17s/it]

{'loss': 2.162, 'grad_norm': 0.3901579678058624, 'learning_rate': 0.00027, 'epoch': 9.0}


 25%|██▌       | 10/40 [14:15<42:17, 84.59s/it]

{'loss': 2.1423, 'grad_norm': 0.38835442066192627, 'learning_rate': 0.0003, 'epoch': 10.0}


 28%|██▊       | 11/40 [15:37<40:25, 83.66s/it]

{'loss': 2.1248, 'grad_norm': 0.39274531602859497, 'learning_rate': 0.00029, 'epoch': 11.0}


 30%|███       | 12/40 [17:02<39:17, 84.20s/it]

{'loss': 2.1048, 'grad_norm': 0.3416568338871002, 'learning_rate': 0.00028, 'epoch': 12.0}


 32%|███▎      | 13/40 [18:28<38:04, 84.63s/it]

{'loss': 2.0884, 'grad_norm': 0.3246256411075592, 'learning_rate': 0.00027, 'epoch': 13.0}


 35%|███▌      | 14/40 [19:53<36:45, 84.82s/it]

{'loss': 2.0706, 'grad_norm': 0.2675072252750397, 'learning_rate': 0.00026, 'epoch': 14.0}


 38%|███▊      | 15/40 [21:19<35:26, 85.06s/it]

{'loss': 2.0547, 'grad_norm': 0.25296086072921753, 'learning_rate': 0.00025, 'epoch': 15.0}


 40%|████      | 16/40 [22:44<34:04, 85.20s/it]

{'loss': 2.0398, 'grad_norm': 0.2416089028120041, 'learning_rate': 0.00023999999999999998, 'epoch': 16.0}


 42%|████▎     | 17/40 [24:10<32:41, 85.27s/it]

{'loss': 2.0254, 'grad_norm': 0.2271411418914795, 'learning_rate': 0.00023, 'epoch': 17.0}


 45%|████▌     | 18/40 [25:35<31:15, 85.24s/it]

{'loss': 2.0117, 'grad_norm': 0.20881876349449158, 'learning_rate': 0.00021999999999999995, 'epoch': 18.0}


 48%|████▊     | 19/40 [27:00<29:49, 85.21s/it]

{'loss': 1.9989, 'grad_norm': 0.21596954762935638, 'learning_rate': 0.00020999999999999998, 'epoch': 19.0}


 50%|█████     | 20/40 [28:25<28:24, 85.22s/it]

{'loss': 1.9858, 'grad_norm': 0.1942194253206253, 'learning_rate': 0.00019999999999999998, 'epoch': 20.0}


 52%|█████▎    | 21/40 [29:51<27:00, 85.28s/it]

{'loss': 1.9731, 'grad_norm': 0.19775590300559998, 'learning_rate': 0.00018999999999999998, 'epoch': 21.0}


 55%|█████▌    | 22/40 [31:16<25:32, 85.16s/it]

{'loss': 1.9607, 'grad_norm': 0.19644634425640106, 'learning_rate': 0.00017999999999999998, 'epoch': 22.0}


 57%|█████▊    | 23/40 [32:40<24:04, 84.96s/it]

{'loss': 1.9487, 'grad_norm': 0.1972813606262207, 'learning_rate': 0.00016999999999999999, 'epoch': 23.0}


 60%|██████    | 24/40 [34:05<22:40, 85.06s/it]

{'loss': 1.9367, 'grad_norm': 0.18397745490074158, 'learning_rate': 0.00015999999999999999, 'epoch': 24.0}


 62%|██████▎   | 25/40 [35:30<21:14, 84.97s/it]

{'loss': 1.9252, 'grad_norm': 0.1545424461364746, 'learning_rate': 0.00015, 'epoch': 25.0}


 65%|██████▌   | 26/40 [36:55<19:50, 85.04s/it]

{'loss': 1.9145, 'grad_norm': 0.13455434143543243, 'learning_rate': 0.00014, 'epoch': 26.0}


 68%|██████▊   | 27/40 [38:20<18:25, 85.01s/it]

{'loss': 1.9046, 'grad_norm': 0.13596150279045105, 'learning_rate': 0.00013, 'epoch': 27.0}


 70%|███████   | 28/40 [39:46<17:00, 85.08s/it]

{'loss': 1.895, 'grad_norm': 0.13697557151317596, 'learning_rate': 0.00011999999999999999, 'epoch': 28.0}


 72%|███████▎  | 29/40 [41:10<15:35, 85.01s/it]

{'loss': 1.8857, 'grad_norm': 0.1315624713897705, 'learning_rate': 0.00010999999999999998, 'epoch': 29.0}


 75%|███████▌  | 30/40 [42:36<14:11, 85.11s/it]

{'loss': 1.8767, 'grad_norm': 0.1302127093076706, 'learning_rate': 9.999999999999999e-05, 'epoch': 30.0}


 78%|███████▊  | 31/40 [44:00<12:43, 84.83s/it]

{'loss': 1.8682, 'grad_norm': 0.13049891591072083, 'learning_rate': 8.999999999999999e-05, 'epoch': 31.0}


 80%|████████  | 32/40 [45:25<11:18, 84.76s/it]

{'loss': 1.8602, 'grad_norm': 0.12613478302955627, 'learning_rate': 7.999999999999999e-05, 'epoch': 32.0}


 82%|████████▎ | 33/40 [46:49<09:53, 84.82s/it]

{'loss': 1.8529, 'grad_norm': 0.12541911005973816, 'learning_rate': 7e-05, 'epoch': 33.0}


 85%|████████▌ | 34/40 [48:15<08:29, 84.91s/it]

{'loss': 1.8462, 'grad_norm': 0.12499398738145828, 'learning_rate': 5.9999999999999995e-05, 'epoch': 34.0}


 88%|████████▊ | 35/40 [49:39<07:03, 84.77s/it]

{'loss': 1.84, 'grad_norm': 0.12008650600910187, 'learning_rate': 4.9999999999999996e-05, 'epoch': 35.0}


 90%|█████████ | 36/40 [51:04<05:39, 84.89s/it]

{'loss': 1.8347, 'grad_norm': 0.11780329793691635, 'learning_rate': 3.9999999999999996e-05, 'epoch': 36.0}


 92%|█████████▎| 37/40 [52:29<04:14, 84.91s/it]

{'loss': 1.83, 'grad_norm': 0.11815663427114487, 'learning_rate': 2.9999999999999997e-05, 'epoch': 37.0}


 95%|█████████▌| 38/40 [53:54<02:49, 84.89s/it]

{'loss': 1.8262, 'grad_norm': 0.11887627094984055, 'learning_rate': 1.9999999999999998e-05, 'epoch': 38.0}


 98%|█████████▊| 39/40 [55:19<01:24, 84.82s/it]

{'loss': 1.8233, 'grad_norm': 0.11721403151750565, 'learning_rate': 9.999999999999999e-06, 'epoch': 39.0}


100%|██████████| 40/40 [56:43<00:00, 84.72s/it]

{'loss': 1.8213, 'grad_norm': 0.11374007910490036, 'learning_rate': 0.0, 'epoch': 40.0}


100%|██████████| 40/40 [56:47<00:00, 85.18s/it]

{'train_runtime': 3407.0003, 'train_samples_per_second': 2.137, 'train_steps_per_second': 0.012, 'train_loss': 2.0181788563728333, 'epoch': 40.0}





TrainOutput(global_step=40, training_loss=2.0181788563728333, metrics={'train_runtime': 3407.0003, 'train_samples_per_second': 2.137, 'train_steps_per_second': 0.012, 'total_flos': 2.158569454239744e+17, 'train_loss': 2.0181788563728333, 'epoch': 40.0})

In [13]:
# Saving the model locally
model.save_pretrained("ft_model/1B_finetuned_llama3.2")
tokenizer.save_pretrained("ft_model/1B_finetuned_llama3.2")

('ft_model/1B_finetuned_llama3.2/tokenizer_config.json',
 'ft_model/1B_finetuned_llama3.2/special_tokens_map.json',
 'ft_model/1B_finetuned_llama3.2/tokenizer.json')

### Load the fine-tuned model

In [14]:
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "ft_model/1B_finetuned_llama3.2",
max_seq_length = 5020,
dtype = None,
load_in_4bit = True)
model = FastLanguageModel.for_inference(model)

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.47.1.
   \\   /|    GPU: NVIDIA GeForce RTX 4090. Max memory: 23.61 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


#### Inference

In [15]:
import gc
# Clear CUDA cache
def clear_memory():
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()

In [17]:
def generate_response(model, tokenizer, text, data_prompt):
    clear_memory()
    
    with torch.no_grad():
        inputs = tokenizer(
            [data_prompt.format(input=text)],
            return_tensors="pt",
            padding=True
        ).to("cuda")
        
        outputs = model.generate(
            **inputs,
            max_new_tokens=5020,  # Keeping the original parameter
            use_cache=True,
            do_sample=True,
            num_return_sequences=1,
            # streamer=TextStreamer(tokenizer)
        )
        
        response = tokenizer.batch_decode(outputs)[0]
        cleaned_response = response.split("Response:")[-1].strip()
        
        # Remove unwanted tokens
        cleaned_response = cleaned_response.replace("<|begin_of_text|>", "").replace("<|end_of_text|>", "").strip()
        
    clear_memory()
    return cleaned_response

In [18]:
data_prompt = """Analyze the mental health aspects in this text:
{input}
Response:"""

In [19]:
text = "I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here. I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it. How can I change my feeling of being worthless to everyone?"

response = generate_response(model, tokenizer, text, data_prompt)
response

"It sounds like there is more going on emotionally than you're aware of. A good therapist will help you explore what's going on for you and help you find ways to express your feelings. My guess is that you're afraid people won't accept you as you are, or that they will reject you. \xa0Are there times when you wonder if people would even care about what you're feeling? Often when people are depressed they feel alone or think no one cares. \xa0You seem to be aware that you're worth something and believe you're worthwhile. The good news is that it sounds like you have a therapist now (congratulations!) and that you know you need help. Now...to get what you need might require some changes to your life. Do you have a good support system? What are you doing that helps you not despair? A therapist can also help you to see what's at the root of this emotion. You may have some unhealthy habits or things you are telling yourself that are not helpful and may need to change. Getting support from o