# **AI Mental Health Companion (LLM)**

In [1]:
# Step 1: Install libraries
!pip install transformers datasets peft accelerate bitsandbytes huggingface_hub --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Step 2: Import libraries
import pandas as pd
from datasets import Dataset
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model

In [3]:
# Step 3: Hugging Face authentication
from google.colab import userdata
HF_TOKEN = userdata.get("HF_TOKEN")
if HF_TOKEN is None:
    raise ValueError("Hugging Face token not found")
print("Token loaded!")
login(HF_TOKEN)

Token loaded!


In [4]:
# Step 4: Load your CSV file
df = pd.read_csv("/content/drive/MyDrive/mental_health.csv")

In [5]:
# Step 5: Extract <HUMAN> and <ASSISTANT> pairs
df[['input_text', 'target_text']] = df['text'].str.extract(
    r'<HUMAN>:\s*(.*?)\s*<ASSISTANT>:\s*(.*)'
)
df = df.dropna()

# Combine into prompt + response for training
df["dialogue"] = "<s>User: " + df["input_text"] + "\nAssistant: " + df["target_text"] + "</s>"

dataset = Dataset.from_pandas(df[["dialogue"]])
print("Dataset size:", len(dataset))

Dataset size: 172


In [6]:
# Step 6: Load tokenizer & base model
model_name = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    token=HF_TOKEN,
    load_in_8bit=True,
    device_map="auto")

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

In [7]:
# Step 7: Apply LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM")

model = get_peft_model(model, lora_config)
print("LoRA applied...")

LoRA applied...


In [8]:
# Step 8: Tokenize dataset
def tokenize_fn(examples):
    return tokenizer(examples["dialogue"], truncation=True, padding="max_length", max_length=256)

tokenized_ds = dataset.map(tokenize_fn, batched=True, remove_columns=["dialogue"])

Map:   0%|          | 0/172 [00:00<?, ? examples/s]

In [9]:
# Step 9: Training setup
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=10,
    max_steps=50,
    learning_rate=2e-4,
    logging_steps=5,
    save_strategy="no",
    fp16=True,
    optim="paged_adamw_32bit",
    report_to="none")

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [10]:
# Step 10: Train with Trainer API
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds,
    tokenizer=tokenizer,
    data_collator=data_collator)

trainer.train()

  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.


Step,Training Loss
5,2.5276
10,2.6995
15,2.464
20,2.3551
25,2.0616
30,1.8687
35,1.9095
40,1.9486
45,1.991
50,1.957


TrainOutput(global_step=50, training_loss=2.178245506286621, metrics={'train_runtime': 76.6727, 'train_samples_per_second': 5.217, 'train_steps_per_second': 0.652, 'total_flos': 586455931944960.0, 'train_loss': 2.178245506286621, 'epoch': 2.2790697674418605})

In [18]:
# Step 11: Inference after LoRA fine-tuning
import re
system_prompt = "You are a warm, supportive mental health chatbot. Keep answers short, empathetic, and calming."
user_input = "I am feeling anxious about tomorrow."
prompt = f"{system_prompt}\nUser: {user_input}\nAssistant:"

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=80)

output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
bot_reply = output_text.replace(system_prompt, "").strip()

match = re.search(r'Assistant:\s*(.*?)(User:|$)', bot_reply, re.DOTALL)
if match:
    bot_reply = match.group(1).strip()

print("User:",user_input)
print("🤖 Bot:", bot_reply)


User: I am feeling anxious about tomorrow.
🤖 Bot: It's completely normal to feel anxious. Tomorrow can be overwhelming, but you've got this. What are some things you can do to feel more prepared and calm?
