# SarvUday v3
-  Fine Tuning Version 2 Model of SurvUday (survuday_v2): `/content/drive/MyDrive/Projects/models/llama_3_2_3B_SurvUday_v2`
- The Base Model was developed by Fine Tuning Llama 3.2 (3B)
- Dataset Description: Trained on 60,000 (v1 -> 30k, v2 -> 30k) data samples including both mental health related and non-mental health related. Around 85% data related to mental health and 15% data related to non-mental health
- Dataset -> `mental_health_corpus_03`
- Dataset Link -> `/content/drive/MyDrive/Projects/Data/mental_health_corpus_03.csv`

### Install

In [None]:
%%capture
import os
from google.colab import drive
drive.mount('/content/drive')

os.environ["WANDB_PROJECT"] = "SarvUday_v3"
os.environ["WANDB_SILENT"] = "true"
os.environ["WANDB_API_KEY"] = ""

!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install wandb

import random
import pandas as pd
from datasets import load_dataset , Dataset
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments, TextStreamer, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported
from unsloth.chat_templates import get_chat_template

### Train

In [2]:
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "/content/drive/MyDrive/Projects/models/llama_3_2_3B_SurvUday_v2",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

output_dir = "/content/drive/MyDrive/Projects/llama_3_2_3B_chat_v3"
data_path = '/content/drive/MyDrive/Projects/Data/mental_health_corpus_03.csv'
df = pd.read_csv(data_path)
df.head()

==((====))==  Unsloth 2024.11.10: Fast Llama patching. Transformers:4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

Unnamed: 0,conversations
0,"[{'from': 'human', 'value': ""I feel so insecur..."
1,"[{'from': 'human', 'value': 'Who are you? '}, ..."
2,"[{'from': 'human', 'value': ""I'm really strugg..."
3,"[{'from': 'human', 'value': 'Tell me about spa..."
4,"[{'from': 'human', 'value': 'I just want some ..."


In [3]:
import ast

df = df.dropna(subset=['conversations'])
# Convert string literals to lists only for non-null values
df['conversations'] = df['conversations'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Define tokenizer and other setup code
tokenizer = get_chat_template(
    tokenizer,
    chat_template="chatml",
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
    map_eos_token=True,
)

# Function to apply formatting prompts
def formatting_prompts_func(examples):
    texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in examples["conversations"]]
    return {"text": texts}

dataset = Dataset.from_pandas(df)
dataset = dataset.map(formatting_prompts_func, batched=True)

Map:   0%|          | 0/29962 [00:00<?, ? examples/s]

In [4]:
dataset[5]["conversations"]

[{'from': 'human',
  'value': "I'm feeling really unhappy lately, and he... he just doesn't understand. "},
 {'from': 'gpt',
  'value': 'I\'m sorry to hear that you\'re feeling unhappy. Can you tell me more about who "he" is and why he doesn\'t understand? '},
 {'from': 'human',
  'value': "He's my partner, and I've been dealing with anxiety for a while now. But every time I try to explain it to him, he dismisses it as me overreacting. "},
 {'from': 'gpt',
  'value': "It sounds like your partner's lack of understanding is adding to your distress. How does it make you feel when he dismisses your anxiety? "},
 {'from': 'human',
  'value': "It makes me feel frustrated and invalidated. I feel like he doesn't take my emotions seriously, and it only amplifies my anxiety. I wish he could understand what I'm going through. "},
 {'from': 'gpt',
  'value': "It's difficult when the people closest to us don't understand or validate our experiences. Have you tried explaining your anxiety to him in 

In [5]:
print(dataset[5]["text"])

<|im_start|>user
I'm feeling really unhappy lately, and he... he just doesn't understand. <|im_end|>
<|im_start|>assistant
I'm sorry to hear that you're feeling unhappy. Can you tell me more about who "he" is and why he doesn't understand? <|im_end|>
<|im_start|>user
He's my partner, and I've been dealing with anxiety for a while now. But every time I try to explain it to him, he dismisses it as me overreacting. <|im_end|>
<|im_start|>assistant
It sounds like your partner's lack of understanding is adding to your distress. How does it make you feel when he dismisses your anxiety? <|im_end|>
<|im_start|>user
It makes me feel frustrated and invalidated. I feel like he doesn't take my emotions seriously, and it only amplifies my anxiety. I wish he could understand what I'm going through. <|im_end|>
<|im_start|>assistant
It's difficult when the people closest to us don't understand or validate our experiences. Have you tried explaining your anxiety to him in a different way or using specif

In [6]:
unsloth_template = \
    "{{ bos_token }}"\
    "{{ 'You are a helpful assistant to the user\n' }}"\
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '>>> User: ' + message['content'] + '\n' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '>>> Assistant: ' + message['content'] + eos_token + '\n' }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '>>> Assistant: ' }}"\
    "{% endif %}"
unsloth_eos_token = "eos_token"

if False:
    tokenizer = get_chat_template(
        tokenizer,
        chat_template = (unsloth_template, unsloth_eos_token,),
        mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"},
        map_eos_token = True,
    )

In [7]:
exist_already = os.path.exists(output_dir)
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = True,
    args = TrainingArguments(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1,
        # max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = output_dir,
        save_strategy = "steps",
        save_steps = 5,
        # eval_steps= 100,
        save_total_limit = 2,
        report_to="wandb"
    ),
)

if(exist_already):
  trainer_stats = trainer.train(resume_from_checkpoint=True)
else:
  trainer_stats = trainer.train()

Generating train split: 0 examples [00:00, ? examples/s]

  torch.load(os.path.join(checkpoint, OPTIMIZER_NAME), map_location=map_location)
  checkpoint_rng_state = torch.load(rng_file)


Step,Training Loss
661,0.6983
662,0.7027
663,0.7376
664,0.713
665,0.7112
666,0.7492
667,0.692
668,0.6747
669,0.7288
670,0.6788


### Inference

In [8]:
def generate_response(max_new_tokens=512):
    # Ask for user input
    user_input = input("Enter your message: ")

    messages = [
        {"from": "human", "value": f"{user_input}"},
    ]

    # Format and tokenize the input
    FastLanguageModel.for_inference(model)
    # Format the input message with tokenizer
    tokenized_input = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    # Since tokenized_input is a tensor, we directly pass it to model.generate
    inputs = {"input_ids": tokenized_input}

    text_streamer = TextStreamer(tokenizer)
    _ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=max_new_tokens, use_cache=True)

In [14]:
generate_response()

Enter your message: What can you do ?
<|im_start|>user
What can you do ?<|im_end|>
<|im_start|>assistant
I am a mental health AI assistant, and I can have conversations about mental health with you. I can assist you in dealing with your mental health problems. <|im_end|>


### Save Lora Adapters

In [15]:
model.save_pretrained("/content/drive/MyDrive/Projects/models/llama_3_2_3B_SurvUday_v3")
tokenizer.save_pretrained("/content/drive/MyDrive/Projects/models/llama_3_2_3B_SurvUday_v3")

('/content/drive/MyDrive/Projects/models/llama_3_2_3B_SurvUday_v3/tokenizer_config.json',
 '/content/drive/MyDrive/Projects/models/llama_3_2_3B_SurvUday_v3/special_tokens_map.json',
 '/content/drive/MyDrive/Projects/models/llama_3_2_3B_SurvUday_v3/tokenizer.json')

### Save Q4_K_M

In [None]:
Token = "" # ...
Repo = "dkp2701/survuday_v3" # edit
model.push_to_hub_gguf(Repo,
                        tokenizer,
                        quantization_method = ["q4_k_m"],
                        token = Token)

Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 4.62 out of 12.67 RAM for saving.


100%|██████████| 28/28 [00:01<00:00, 15.77it/s]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 5 minutes for Llama-7b...
Unsloth: Saving dkp2701/survuday_v3/pytorch_model-00001-of-00002.bin...
Unsloth: Saving dkp2701/survuday_v3/pytorch_model-00002-of-00002.bin...
Done.
==((====))==  Unsloth: Conversion from QLoRA to GGUF information
   \\   /|    [0] Installing llama.cpp will take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF 16bits will take 3 minutes.
\        /    [2] Converting GGUF 16bits to ['q4_k_m'] will take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: [0] Installing llama.cpp. This will take 3 minutes...
Unsloth: [1] Converting model at dkp2701/survuday_v3 into f16 GGUF format.
The output location will be /content/dkp2701/survuday_v3/unsloth.F16.gguf
This will take 3 minutes...
INFO:hf-to-gguf:Loading model: survuday_v3
INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
INFO:hf-to-gguf:Exporting model...
INFO:hf-to-gguf:ro

unsloth.Q4_K_M.gguf:   0%|          | 0.00/2.02G [00:00<?, ?B/s]

Saved GGUF to https://huggingface.co/dkp2701/survuday_v3




Saved Ollama Modelfile to https://huggingface.co/dkp2701/survuday_v3


### Load the Saved Model for Inference

In [None]:
%%capture
# import os
from google.colab import drive
drive.mount('/content/drive')

# os.environ["WANDB_PROJECT"] = "SarvUday_chatbot_v5"
# os.environ["WANDB_SILENT"] = "true"
# os.environ["WANDB_API_KEY"] = "3bdd1319d3e8db604ae097eed8d135abdfda52f8"

!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
# !pip install wandb

# import random
# import pandas as pd
# from datasets import load_dataset , Dataset
from unsloth import FastLanguageModel
# import torch
# from trl import SFTTrainer
# from transformers import TrainingArguments, TextStreamer, DataCollatorForSeq2Seq
from transformers import  TextStreamer
from unsloth import is_bfloat16_supported
# from unsloth.chat_templates import get_chat_template

In [None]:
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "/content/drive/MyDrive/Projects/models/llama_3_2_3B_SurvUday_v3",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
def generate_response(max_new_tokens=512):
    # Ask for user input
    user_input = input("Enter your message: ")

    messages = [
        {"from": "human", "value": f"{user_input}"},
    ]

    # Format and tokenize the input
    FastLanguageModel.for_inference(model)
    # Format the input message with tokenizer
    tokenized_input = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    # Since tokenized_input is a tensor, we directly pass it to model.generate
    inputs = {"input_ids": tokenized_input}

    text_streamer = TextStreamer(tokenizer)
    _ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=max_new_tokens, use_cache=True)

In [None]:
generate_response()