## **Mental Health Support Chatbot (Fine-Tuned)**

### Installing Required Libraries 

In [1]:
!pip install -qU transformers datasets accelerate peft bitsandbytes torch
!pip install -q sentencepiece 
# Required for Mistral tokenizer

### Load and Prepare Dataset

In [2]:
import pandas as pd
from datasets import DatasetDict, Dataset

# Load the Kaggle CSV
df = pd.read_csv("C:/Users/Aoun Haider/Downloads/emotion-emotion_69k.csv")

# Display sample to verify
print(df.head())
print(f"\nDataset shape: {df.shape}")

   Unnamed: 0                                          Situation      emotion  \
0           0  I remember going to the fireworks with my best...  sentimental   
1           1  I remember going to the fireworks with my best...  sentimental   
2           2  I remember going to the fireworks with my best...  sentimental   
3           3  I remember going to the fireworks with my best...  sentimental   
4           4  I remember going to the fireworks with my best...  sentimental   

                                empathetic_dialogues  \
0  Customer :I remember going to see the firework...   
1  Customer :This was a best friend. I miss her.\...   
2              Customer :We no longer talk.\nAgent :   
3  Customer :Was this a friend you were in love w...   
4             Customer :Where has she gone?\nAgent :   

                                              labels Unnamed: 5 Unnamed: 6  
0  Was this a friend you were in love with, or ju...        NaN        NaN  
1                     

### Step 2: Robust Preprocessing Function

In [3]:
import re
def preprocess_row(row):
    """Process a single row of the Kaggle dataset"""
    # Extract components
    emotion = row['emotion']
    dialogue = row['empathetic_dialogues']
    
    # Clean and split dialogue
    dialogue_clean = dialogue.replace('""', '"').replace('\\t', ' ').replace('\\n', '\n').strip()
    
    # Extract user input and bot response
    user_input = ""
    bot_response = ""
    
    # Case 1: Standard format with Customer/Agent markers
    if "Customer :" in dialogue_clean and "Agent :" in dialogue_clean:
        try:
            parts = re.split(r'Customer :|Agent :', dialogue_clean)
            user_input = parts[1].strip() if len(parts) > 1 else ""
            bot_response = parts[2].strip() if len(parts) > 2 else ""
        except:
            pass
    
    # Case 2: Fallback if markers missing
    if not user_input or not bot_response:
        if '\n' in dialogue_clean:
            parts = dialogue_clean.split('\n')
            user_input = parts[0].replace("Customer:", "").replace("Customer :", "").strip()
            bot_response = parts[1].replace("Agent:", "").replace("Agent :", "").strip() if len(parts) > 1 else ""
        else:
            user_input = dialogue_clean
            bot_response = row['labels']  # Use labels column as fallback
    
    # Final cleaning
    user_input = re.sub(r'[^a-zA-Z0-9,.!?\'" ]', '', user_input)
    bot_response = re.sub(r'[^a-zA-Z0-9,.!?\'" ]', '', bot_response)
    
    return {
        "context": emotion,
        "user_input": user_input,
        "bot_response": bot_response
    }

# Apply preprocessing
processed_data = []
for idx, row in df.iterrows():
    processed_data.append(preprocess_row(row))
    
processed_df = pd.DataFrame(processed_data)

# Filter out empty responses
processed_df = processed_df[processed_df['bot_response'].str.len() > 5]

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,Situation,emotion,empathetic_dialogues,labels,Unnamed: 5,Unnamed: 6
0,0,I remember going to the fireworks with my best...,sentimental,Customer :I remember going to see the firework...,"Was this a friend you were in love with, or ju...",,
1,1,I remember going to the fireworks with my best...,sentimental,Customer :This was a best friend. I miss her.\...,Where has she gone?,,
2,2,I remember going to the fireworks with my best...,sentimental,Customer :We no longer talk.\nAgent :,Oh was this something that happened because of...,,
3,3,I remember going to the fireworks with my best...,sentimental,Customer :Was this a friend you were in love w...,This was a best friend. I miss her.,,
4,4,I remember going to the fireworks with my best...,sentimental,Customer :Where has she gone?\nAgent :,We no longer talk.,,


### Step 3: Create Training-Ready Format

In [5]:
# Create final text format
processed_df['text'] = processed_df.apply(
    lambda x: f"CONTEXT: {x['context']}\nUSER: {x['user_input']}\nBOT: {x['bot_response']}", 
    axis=1
)

# Split into train/validation
train_df = processed_df.sample(frac=0.8, random_state=42)
val_df = processed_df.drop(train_df.index)

# Convert to Hugging Face datasets
dataset = DatasetDict({
    "train": Dataset.from_pandas(train_df),
    "validation": Dataset.from_pandas(val_df)
})

# Verify
print("\nProcessed sample:")
print(dataset["train"]["text"][0])
print("\nDataset structure:", dataset)


Processed sample:
CONTEXT:  but what I didn't know was that he was working in the next room with the door open.  He approached and asked what I had been saying.  I knew I was caught.  I was so disgusted with myself.  
USER: ashamed
BOT: Customer Pretty awful, actually.  Especially considering that the coworker of whom I was speaking was in a room just next door with the door open.  Agent 

Dataset structure: DatasetDict({
    train: Dataset({
        features: ['context', 'user_input', 'bot_response', 'text', '__index_level_0__'],
        num_rows: 36
    })
    validation: Dataset({
        features: ['context', 'user_input', 'bot_response', 'text', '__index_level_0__'],
        num_rows: 9
    })
})


### Step 4: Tokenization

### Updated Tokenization for GPT-Neo-1.3B

In [6]:

from transformers import AutoTokenizer

# Use the smaller, CPU-friendly model
model_name = "EleutherAI/gpt-neo-1.3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set padding token (crucial for training)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    """Tokenize text with truncation and padding"""
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=256,
        padding="max_length",  # Pad all sequences to same length
        return_tensors="pt"    # Return PyTorch tensors
    )

# Re-tokenize datasets
tokenized_train = dataset["train"].map(tokenize_function, batched=True)
tokenized_val = dataset["validation"].map(tokenize_function, batched=True)

print("Tokenization complete for GPT-Neo-1.3B!")
print(f"Sample tokenized output: {tokenized_train[0]}")

Map:   0%|          | 0/36 [00:00<?, ? examples/s]

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

Tokenization complete for GPT-Neo-1.3B!
Sample tokenized output: {'context': " but what I didn't know was that he was working in the next room with the door open.  He approached and asked what I had been saying.  I knew I was caught.  I was so disgusted with myself.  ", 'user_input': 'ashamed', 'bot_response': 'Customer Pretty awful, actually.  Especially considering that the coworker of whom I was speaking was in a room just next door with the door open.  Agent ', 'text': "CONTEXT:  but what I didn't know was that he was working in the next room with the door open.  He approached and asked what I had been saying.  I knew I was caught.  I was so disgusted with myself.  \nUSER: ashamed\nBOT: Customer Pretty awful, actually.  Especially considering that the coworker of whom I was speaking was in a room just next door with the door open.  Agent ", '__index_level_0__': 53779, 'input_ids': [10943, 32541, 25, 220, 475, 644, 314, 1422, 470, 760, 373, 326, 339, 373, 1762, 287, 262, 1306, 2119,

### Step 5: Configure LoRA (Low-Rank Adaptation)

In [7]:
# Uninstall existing bitsandbytes
!pip uninstall -y bitsandbytes

# Install Windows-compatible version from unofficial build
!pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.41.1-py3-none-win_amd64.whl

# Install required dependencies
!pip install scipy

Found existing installation: bitsandbytes 0.46.0
Uninstalling bitsandbytes-0.46.0:
  Successfully uninstalled bitsandbytes-0.46.0
Collecting bitsandbytes==0.41.1


  ERROR: HTTP error 404 while getting https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.41.1-py3-none-win_amd64.whl
ERROR: Could not install requirement bitsandbytes==0.41.1 from https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.41.1-py3-none-win_amd64.whl because of HTTP error 404 Client Error: Not Found for url: https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.41.1-py3-none-win_amd64.whl for URL https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.41.1-py3-none-win_amd64.whl




In [8]:
!pip install hf_xet



In [9]:
!pip install huggingface_hub[hf_xet]



In [19]:
# %% [markdown]
### **Step 2: Load Model and Tokenizer**
from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorForLanguageModeling

# Tiny model for fast training
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Critical!

model = AutoModelForCausalLM.from_pretrained(model_name)
print("Model loaded successfully!")

Model loaded successfully!


In [20]:
# %% [markdown]
### **Step 3: LoRA Setup**
from peft import LoraConfig, get_peft_model

# Minimal LoRA configuration
lora_config = LoraConfig(
    r=4,                # Tiny rank for speed
    target_modules=["c_attn"],  # Only attention weights
    lora_alpha=16,
    lora_dropout=0.05,
    task_type="CAUSAL_LM"
)

peft_model = get_peft_model(model, lora_config)
peft_model.print_trainable_parameters()

# Output should show ~0.18% of parameters trainable

trainable params: 73,728 || all params: 81,986,304 || trainable%: 0.0899


## Training setup 

In [13]:
!pip install -q tf-keras

In [14]:
# %% [markdown]
### **Step 2: Verify Installation**
import keras
print(f"Keras version: {keras.__version__}")
# Should output 2.13.x or similar

Keras version: 3.9.2


In [12]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

# Data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Fast training config
training_args = TrainingArguments(
    output_dir="./quick_empathy_train",
    per_device_train_batch_size=8,      # Larger batch since model is small
    num_train_epochs=2,                 # Fewer epochs
    learning_rate=1e-4,                 # Slightly higher rate
    logging_steps=10,                   # More frequent updates
    save_strategy="no",                 # Don't save checkpoints
    report_to="none"                    # No external logging
)

# Initialize trainer
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_train,
    data_collator=data_collator
)

ValueError: Your currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.

### Doing Training using Pytorch as Keras as some model issues while training ( it trains on older model etc ) 

In [17]:
# First uninstall problematic Keras versions
!pip uninstall -y keras keras-nightly keras-preprocessing tensorflow

# Install only what we need
!pip install -qU transformers datasets peft torch

Found existing installation: keras 3.9.2
Uninstalling keras-3.9.2:
  Successfully uninstalled keras-3.9.2
Found existing installation: tensorflow 2.19.0
Uninstalling tensorflow-2.19.0:
  Successfully uninstalled tensorflow-2.19.0


You can safely remove it manually.


In [24]:
# %% [markdown]
### **Fixed Step 4: Prepare Data Loader**
import torch
from torch.utils.data import DataLoader
from transformers import DataCollatorForLanguageModeling

# Create data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # For causal language modeling
)

# Create PyTorch Dataset
class EmpathyDataset(torch.utils.data.Dataset):
    def __init__(self, encoded_data):
        self.encodings = {
            "input_ids": encoded_data["input_ids"],
            "attention_mask": encoded_data["attention_mask"]
        }
    
    def __getitem__(self, idx):
        return {
            "input_ids": torch.tensor(self.encodings["input_ids"][idx]),
            "attention_mask": torch.tensor(self.encodings["attention_mask"][idx])
        }
    
    def __len__(self):
        return len(self.encodings["input_ids"])

# Create datasets
train_dataset = EmpathyDataset(tokenized_train)
val_dataset = EmpathyDataset(tokenized_val)

# Create data loader
train_loader = DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True,
    collate_fn=data_collator
)

In [33]:
# %% [markdown]
### **Step 1: Verify Model Setup**
# Check if model parameters require gradients
for name, param in peft_model.named_parameters():
    print(f"{name}: requires_grad={param.requires_grad}")
    
# Should show True for LoRA parameters

base_model.model.transformer.wte.weight: requires_grad=False
base_model.model.transformer.wpe.weight: requires_grad=False
base_model.model.transformer.h.0.ln_1.weight: requires_grad=False
base_model.model.transformer.h.0.ln_1.bias: requires_grad=False
base_model.model.transformer.h.0.attn.c_attn.weight: requires_grad=False
base_model.model.transformer.h.0.attn.c_attn.bias: requires_grad=False
base_model.model.transformer.h.0.attn.c_proj.weight: requires_grad=False
base_model.model.transformer.h.0.attn.c_proj.bias: requires_grad=False
base_model.model.transformer.h.0.ln_2.weight: requires_grad=False
base_model.model.transformer.h.0.ln_2.bias: requires_grad=False
base_model.model.transformer.h.0.mlp.c_fc.weight: requires_grad=False
base_model.model.transformer.h.0.mlp.c_fc.bias: requires_grad=False
base_model.model.transformer.h.0.mlp.c_proj.weight: requires_grad=False
base_model.model.transformer.h.0.mlp.c_proj.bias: requires_grad=False
base_model.model.transformer.h.1.ln_1.weight: requ

In [36]:
from torch.optim import AdamW
from tqdm import tqdm
import torch

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Move model to device
peft_model = peft_model.to(device)

# Optimizer
optimizer = AdamW(peft_model.parameters(), lr=1e-4)

# Training loop
num_epochs = 2
for epoch in range(num_epochs):
    peft_model.train()
    total_loss = 0
    batch_count = 0
    
    # Progress bar
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
    
    for batch in progress_bar:
        # Move batch to device
        batch = {k: v.to(device) for k, v in batch.items()}
        
        # Forward pass
        outputs = peft_model(**batch)
        loss = outputs.loss
        
        # Backward pass
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        # Update progress
        total_loss += loss.item()
        batch_count += 1
        avg_loss = total_loss / batch_count
        progress_bar.set_postfix({"avg_loss": f"{avg_loss:.4f}"})
    
    print(f"Epoch {epoch+1} complete! Average loss: {total_loss/len(train_loader):.4f}")

# Save adapter
peft_model.save_pretrained("empathy_adapter")
print("Training complete! Adapter saved.")

Using device: cpu


Epoch 1/2:   0%|          | 0/5 [00:05<?, ?it/s]


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

## updating code as with 2 epochs model was not working correctly ( and changing epochs was givimng error also ) 

In [40]:
# %% [markdown]
### **Step 1: Install Essentials (No Keras)**
!pip uninstall -y keras keras-nightly keras-preprocessing tensorflow
!pip install -qU transformers datasets torch



In [41]:
# %% [markdown]
### **Step 2: Load Model & Tokenizer**
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Critical!

model = AutoModelForCausalLM.from_pretrained(model_name)
print("Model loaded successfully!")

Model loaded successfully!


In [44]:
# %% [markdown]
### **Fixed Step 3: Prepare Data Loader**
from torch.utils.data import DataLoader, Dataset
import torch

class ChatDataset(Dataset):
    def __init__(self, hf_dataset):
        """
        Creates a PyTorch Dataset from Hugging Face Dataset
        with input_ids and attention_mask
        """
        self.input_ids = hf_dataset["input_ids"]
        self.attention_mask = hf_dataset["attention_mask"]
        
    def __len__(self):
        return len(self.input_ids)
    
    def __getitem__(self, idx):
        return {
            "input_ids": torch.tensor(self.input_ids[idx]),
            "attention_mask": torch.tensor(self.attention_mask[idx])
        }

# Create PyTorch Dataset
train_dataset = ChatDataset(tokenized_train)

# Create DataLoader
train_loader = DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True
)
print(f"Created DataLoader with {len(train_loader)} batches")

Created DataLoader with 5 batches


## trying to achieve average loss < 4 %

In [45]:
# %% [markdown]
### **Step 4: Robust Training Loop**
from torch.optim import AdamW
from tqdm import tqdm

# Setup
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
model = model.to(device)
optimizer = AdamW(model.parameters(), lr=5e-5)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    batch_count = 0
    
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
    
    for batch in progress_bar:
        # Move batch to device
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        
        # Forward pass
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=input_ids  # Use input_ids as labels for causal LM
        )
        loss = outputs.loss
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Update progress
        total_loss += loss.item()
        batch_count += 1
        avg_loss = total_loss / batch_count
        progress_bar.set_postfix({"avg_loss": f"{avg_loss:.4f}"})
    
    print(f"Epoch {epoch+1} complete! Average loss: {total_loss/len(train_loader):.4f}")

# Save model
model.save_pretrained("empathy_chatbot")
tokenizer.save_pretrained("empathy_chatbot")
print("Training complete! Model saved.")

Using device: cpu


Epoch 1/5: 100%|██████████| 5/5 [01:03<00:00, 12.75s/it, avg_loss=4.6185]


Epoch 1 complete! Average loss: 4.6185


Epoch 2/5: 100%|██████████| 5/5 [01:02<00:00, 12.55s/it, avg_loss=1.1874]


Epoch 2 complete! Average loss: 1.1874


Epoch 3/5: 100%|██████████| 5/5 [01:01<00:00, 12.34s/it, avg_loss=1.0866]


Epoch 3 complete! Average loss: 1.0866


Epoch 4/5: 100%|██████████| 5/5 [01:03<00:00, 12.72s/it, avg_loss=0.9331]


Epoch 4 complete! Average loss: 0.9331


Epoch 5/5: 100%|██████████| 5/5 [01:03<00:00, 12.68s/it, avg_loss=0.8637]


Epoch 5 complete! Average loss: 0.8637
Training complete! Model saved.


## Testing Model on some example cases/ dialogues ( while first loading model ) 

In [47]:
# %% [markdown]
### **Step 1: Load Your Trained Model**
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load your custom-trained model
model_path = "empathy_chatbot"  # Path where you saved the model
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Set padding token
tokenizer.pad_token = tokenizer.eos_token

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
print("Trained model loaded successfully!")

Trained model loaded successfully!


In [49]:
# Add at the top
import random

def rule_based_fallback(user_input):
    """Simple empathetic responses"""
    user_input = user_input.lower()
    responses = [
        "That sounds really difficult. Would you like to talk more about it?",
        "I hear you. That must be challenging.",
        "Thank you for sharing. I'm here to listen.",
        "I understand this is tough. Remember to be kind to yourself.",
        "That sounds stressful. Maybe try taking some deep breaths?"
    ]
    return random.choice(responses)

In [52]:
def enhance_response(user_input, raw_response):
    user_input_lower = user_input.lower()
    raw_response = raw_response.strip()
    
    # 1. Handle empty responses
    if not raw_response or len(raw_response.split()) < 3:
        return handle_empty_response(user_input_lower)
    
    # 2. Fix contradictory phrases
    if "lonely" in user_input_lower or "isolated" in user_input_lower:
        if "great experience" in raw_response:
            return "Loneliness can be painful. Would you like to talk more about what you're experiencing?"
    
    # 3. Situation-specific enhancements
    enhancement = ""
    if "overwhelm" in user_input_lower or "stress" in user_input_lower:
        enhancement = " When feeling overwhelmed, try breaking tasks into smaller steps."
    elif "tired" in user_input_lower or "exhaust" in user_input_lower:
        enhancement = " Remember to take regular breaks and stay hydrated while working."
    elif "sleep" in user_input_lower or "racing" in user_input_lower:
        enhancement = " A warm drink and deep breathing before bed might help calm your mind."
    
    # 4. Ensure proper sentence structure
    if not raw_response.endswith(('.', '!', '?')):
        raw_response += '.'
    
    return raw_response + enhancement

def handle_empty_response(user_input):
    """Provide empathetic responses when model returns empty"""
    if "tired" in user_input or "exhaust" in user_input:
        return "Coding fatigue is real! Try the 20-20-20 rule: every 20 minutes, look at something 20 feet away for 20 seconds."
    elif "work" in user_input or "deadline" in user_input:
        return "Work pressure can be draining. Remember to prioritize tasks and take short breaks."
    else:
        return "I hear you. Would you like to share more about how you're feeling?"

# Updated chat interface with better error handling
def mental_health_chat():
    print("\n" + "="*60)
    print("Refined Mental Health Support Chatbot")
    print("Type 'exit' to quit")
    print("="*60)
    
    while True:
        try:
            user_input = input("\nYou: ")
            if user_input.lower() in ["exit", "quit"]:
                print("Bot: Thank you for chatting. Your mental health matters!")
                break
                
            # Generate response
            prompt = f"USER: {user_input}\nBOT:"
            inputs = tokenizer(prompt, return_tensors="pt").to(device)
            outputs = model.generate(
                inputs.input_ids,
                max_new_tokens=80,
                temperature=0.7,
                top_p=0.9,
                repetition_penalty=1.3,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )
            raw_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Extract bot's response
            if "BOT:" in raw_response:
                raw_response = raw_response.split("BOT:")[-1].strip()
            else:
                raw_response = raw_response.split("USER:")[0].strip()
            
            # Enhance and print
            final_response = enhance_response(user_input, raw_response)
            print(f"Bot: {final_response}")
            
        except Exception as e:
            print(f"Bot: I encountered an error. Please try rephrasing: {str(e)}")

# Start the chat
mental_health_chat()


Refined Mental Health Support Chatbot
Type 'exit' to quit



You:  I've been feeling isolated lately


Bot: I hear you. Would you like to share more about how you're feeling?



You:   I feel tired after coding


Bot: Coding fatigue is real! Try the 20-20-20 rule: every 20 minutes, look at something 20 feet away for 20 seconds.



You:  I'm feeling overwhelmed with work deadlines


Bot: My last day was when the first lady and my wife were in our room, sitting next to each other. When feeling overwhelmed, try breaking tasks into smaller steps.



You:  I can't sleep because my mind won't stop racing


Bot: It's like the other day. A warm drink and deep breathing before bed might help calm your mind.



You:  ok thanks , quit


Bot: if the user is not a regular person, he has to go around his door and see what it says.



You:  quit


Bot: Thank you for chatting. Your mental health matters!
