# Mental Health Chatbot Trainer – Full Pipeline

This notebook trains a multi-model chatbot with emotional intelligence and conversational memory.

**Models Trained:**
- Emotion Classifier: `SamLowe/roberta-base-go_emotions`
- Response Generator: `T5`
- Q&A Assistant: `T5`

**Datasets Used:**
- `mental_health_faq_cleaned.csv`
- `transformed_mental_health_chatbot.csv`
- `Mental Health Chatbot Dataset - Friend mode and Professional mode Responses.csv`
- `transformed_mental_health_chatbot_dataset.csv`
- HuggingFace datasets:
  - `tolu07/Mental_Health_FAQ`
  - `Amod/mental_health_counseling_conversations`
  - `ruslanmv/ai-medical-chatbot`
  - `lavita/ChatDoctor-HealthCareMagic-100k`

---


## 1. Load, Clean, and Merge All Datasets

In [29]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, T5Tokenizer, T5ForConditionalGeneration, DataCollatorForSeq2Seq
import torch
import torch.nn.functional as F
from datasets import load_dataset, Dataset, concatenate_datasets
import gradio as gr
from evaluate import load as load_metric
from tqdm import tqdm
from tqdm.auto import tqdm
import torch
import tensorflow as tf
from accelerate import init_empty_weights
from sklearn.preprocessing import MultiLabelBinarizer



In [30]:
device = torch.device("cuda:0")
print("GPU Available:", torch.cuda.is_available())  # True if a GPU is accessible
print("Current Device Index:", torch.cuda.current_device())  # e.g., 0
print("Device Name:", torch.cuda.get_device_name(0))  # e.g., "NVIDIA GeForce RTX 3070"

# Example: set default tensor type to GPU-based FloatTensor (optional)
# This will make ALL newly created tensors go to GPU (Float32).
# torch.set_default_dtype(torch.float32)
# torch.set_default_tensor_type(torch.cuda.FloatTensor)

GPU Available: True
Current Device Index: 0
Device Name: NVIDIA GeForce RTX 3070 Laptop GPU


In [None]:
# === 📊 CONFIGURATION CELL ===
# Local Dataset Toggles (ds1 through ds4)
use_ds1 = True
use_ds2 = False
use_ds3 = False
use_ds4 = False

# HuggingFace Dataset Toggles (ds5 through ds8)
use_ds5 = False
use_ds6 = False
use_ds7 = False
use_ds8 = False

# Sampling fraction for faster development (1.0 = full data)
sample_fraction = 0.3

# Model Training Configuration
train_data_fraction = 0.3  # Train on 10% of the data
max_epochs = 3
batch_size = 16
enable_training = True
enable_emotion_labeling = True


✅ Configuration Loaded


In [32]:

# === LOAD ALL DATASETS UNCONDITIONALLY ===
# Local (ds1 - ds4)
ds1 = pd.read_csv('./data/mental_health_faq_cleaned.csv')
ds2 = pd.read_csv('./data/transformed_mental_health_chatbot.csv')
ds3 = pd.read_csv('./data/mental_health_chatbot_dataset_merged_modes.csv')
ds4 = pd.read_csv('./data/transformed_mental_health_chatbot_dataset.csv')

# Hugging Face (ds5 - ds8)
ds5 = load_dataset("tolu07/Mental_Health_FAQ")['train']
ds6 = load_dataset("Amod/mental_health_counseling_conversations")['train']
ds7 = load_dataset("ruslanmv/ai-medical-chatbot")['train']
ds8 = load_dataset("lavita/ChatDoctor-HealthCareMagic-100k")['train']


In [33]:
def extract_qa(dataset, question_key, answer_key, drop_cols=None, sample_frac=1.0):
    df = dataset['train'].to_pandas() if not isinstance(dataset, pd.DataFrame) else dataset
    if sample_frac < 1.0:
        df = df.sample(frac=sample_frac, random_state=42)
    df = df.rename(columns={question_key: "question", answer_key: "answer"})
    if drop_cols:
        df = df.drop(columns=drop_cols, errors='ignore')
    return df[["question", "answer"]]


In [34]:
# === Load and sample HuggingFace datasets ===
hf_dfs = []
if use_ds5:
    ds5 = load_dataset("tolu07/Mental_Health_FAQ")
    hf_dfs.append(extract_qa(ds5, question_key="Questions", answer_key="Answers", drop_cols=["Question_ID"], sample_frac=sample_fraction))
if use_ds6:
    ds6 = load_dataset("Amod/mental_health_counseling_conversations")
    hf_dfs.append(extract_qa(ds6, question_key="Context", answer_key="Response", sample_frac=sample_fraction))
if use_ds7:
    ds7 = load_dataset("ruslanmv/ai-medical-chatbot")
    hf_dfs.append(extract_qa(ds7, question_key="Patient", answer_key="Doctor", sample_frac=sample_fraction))
if use_ds8:
    ds8 = load_dataset("lavita/ChatDoctor-HealthCareMagic-100k")
    hf_dfs.append(extract_qa(ds8, question_key="input", answer_key="output", sample_frac=sample_fraction))

In [35]:

# === APPLY TOGGLES TO INCLUDE/EXCLUDE DATASETS ===
# Build lists of selected datasets
local_dfs = []
if use_ds1: local_dfs.append(ds1)
if use_ds2: local_dfs.append(ds2)
if use_ds3: local_dfs.append(ds3)
if use_ds4: local_dfs.append(ds4)

hf_dfs = []
if use_ds5: hf_dfs.append(extract_qa(ds5, "Questions", "Answers", drop_cols=["Question_ID"]))
if use_ds6: hf_dfs.append(extract_qa(ds6, "Context", "Response"))
if use_ds7: hf_dfs.append(extract_qa(ds7, "Patient", "Doctor"))
if use_ds8: hf_dfs.append(extract_qa(ds8, "input", "output"))


## 2. Train Emotion Classifier (`SamLowe/roberta-base-go_emotions`)

In [36]:
# Load model and tokenizer
model_name = "SamLowe/roberta-base-go_emotions"
tokenizer = AutoTokenizer.from_pretrained(model_name)
fine_tuned_model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [37]:
# Send model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
fine_tuned_model.to(device)
fine_tuned_model.eval()

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [38]:
# Load dataset and run inference for emotion prediction
full_df = pd.read_csv("./data/unified_mental_health_chatbot_dataset.csv")
questions = full_df['question'].astype(str).tolist()

In [39]:
# === ⚡ Parallel Tokenization (Optional Upgrade) ===
from concurrent.futures import ThreadPoolExecutor

def parallel_tokenize(batch_texts, tokenizer, batch_size=32):
    def process_batch(batch):
        return tokenizer(batch, padding=True, truncation=True, return_tensors="pt")
    
    with ThreadPoolExecutor() as executor:
        batches = [batch_texts[i:i + batch_size] for i in range(0, len(batch_texts), batch_size)]
        tokenized = list(executor.map(process_batch, batches))
    return tokenized


In [None]:
predicted_labels = []
batch_size = 16
with torch.no_grad():
    for i in tqdm(range(0, len(questions), batch_size)):
        batch = questions[i:i+batch_size]
        batch = [str(q) for q in batch if isinstance(q, str) or pd.notna(q)]  # clean/sanitize
        try:
            inputs = tokenizer(batch, padding=True, truncation=True, return_tensors="pt").to(device)
            outputs = fine_tuned_model(**inputs)
            probs = torch.sigmoid(outputs.logits)
            preds = (probs > 0.5).int().tolist()
            predicted_labels.extend([','.join(map(str, [i for i, val in enumerate(p) if val == 1])) for p in preds])
        except Exception as e:
            print(f"Error at batch {i}: {e}")
            predicted_labels.extend([""] * len(batch))  # pad with empty if failure

  2%|▏         | 500/22577 [04:34<5:44:31,  1.07it/s]

In [None]:
# Save emotion-labeled data
full_df['label'] = predicted_labels
full_df.to_csv("./data/emotion_labeled_dataset.csv", index=False)
print("Saved labeled dataset to emotion_labeled_dataset.csv")

NameError: name 'predicted_labels' is not defined

In [None]:
# Prepare multi-hot encoded dataset for fine-tuning
raw_df = pd.read_csv("./data/emotion_labeled_dataset.csv")
raw_df['label'] = raw_df['label'].apply(lambda x: list(map(int, str(x).split(','))) if pd.notna(x) else [])

In [None]:
mlb = MultiLabelBinarizer()
multi_hot = mlb.fit_transform(raw_df['label'])
label_cols = [f"label_{i}" for i in range(multi_hot.shape[1])]
raw_df[label_cols] = multi_hot
raw_df = raw_df.drop(columns=["label"])
raw_df = raw_df.rename(columns={"question": "text"})

In [None]:
# Tokenize and prepare Hugging Face dataset
dataset = Dataset.from_pandas(raw_df)
dataset = dataset.train_test_split(test_size=0.2)

def tokenize(batch):
    texts = [str(t) if pd.notna(t) else "" for t in batch["text"]]
    return tokenizer(texts, padding=True, truncation=True)

tokenized = dataset.map(tokenize, batched=True)

def merge_labels(example):
    example['labels'] = torch.tensor([example[f'label_{i}'] for i in range(len(label_cols))])
    return example

tokenized = tokenized.map(merge_labels)
tokenized.set_format("torch")

Map: 100%|██████████| 289443/289443 [01:48<00:00, 2658.51 examples/s]
Map: 100%|██████████| 72361/72361 [00:27<00:00, 2648.55 examples/s]
Map: 100%|██████████| 289443/289443 [01:48<00:00, 2661.18 examples/s]
Map: 100%|██████████| 72361/72361 [00:27<00:00, 2647.64 examples/s]


In [None]:
# Define custom Trainer for multi-label classification
class MultiLabelTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = nn.BCEWithLogitsLoss()
        loss = loss_fct(logits, labels.type_as(logits))
        return (loss, outputs) if return_outputs else loss

In [None]:
# Set up TrainingArguments
training_args = TrainingArguments(
    output_dir="./saved_models/roberta_emotion",
    evaluation_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    save_total_limit=1,
    fp16=True,
)

TypeError: TrainingArguments.__init__() got an unexpected keyword argument 'evaluation_strategy'

: 

In [None]:
# Train using custom MultiLabelTrainer
trainer = MultiLabelTrainer(
    model=fine_tuned_model,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer
)

NameError: name 'training_args' is not defined

In [None]:
trainer.train()

# Save fine-tuned model and tokenizer
fine_tuned_model.save_pretrained("./saved_models/roberta_emotion")
tokenizer.save_pretrained("./saved_models/roberta_emotion")
print("Fine-tuned multi-label model saved to ./saved_models/roberta_emotion")

## 3. Train T5 for Response Generation

In [None]:
t5_model_name = "t5-small"
tokenizer_t5 = T5Tokenizer.from_pretrained(t5_model_name)
model_t5 = T5ForConditionalGeneration.from_pretrained(t5_model_name)

In [None]:
def preprocess_function(examples):
    inputs = ["question: " + q for q in examples["question"]]
    model_inputs = tokenizer_t5(inputs, max_length=512, truncation=True, padding="max_length")

    with tokenizer_t5.as_target_tokenizer():
        labels = tokenizer_t5(examples["answer"], max_length=512, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [None]:
t5_dataset = Dataset.from_pandas(full_df[['question', 'answer']])
t5_dataset = t5_dataset.train_test_split(test_size=0.2)
tokenized_t5 = t5_dataset.map(preprocess_function, batched=True)

In [None]:
args_t5 = TrainingArguments(
    output_dir="./saved_models/t5_response_generator",
    evaluation_strategy="epoch",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=2,
    save_total_limit=1,
    predict_with_generate=True,
    fp16=True,
)

In [None]:
trainer_t5 = Trainer(
    model=model_t5,
    args=args_t5,
    train_dataset=tokenized_t5["train"],
    eval_dataset=tokenized_t5["test"],
    tokenizer=tokenizer_t5,
    data_collator=DataCollatorForSeq2Seq(tokenizer_t5, model=model_t5)
)

In [None]:

trainer_t5.train()
model_t5.save_pretrained("./saved_models/t5_response_generator")


## 4. Train T5 for Q&A Assistant

In [None]:
prof_df = full_df[full_df['answer'].str.contains("Bro|Yo|Hey|Dude|Ugh|memes|suck|spill", case=False) == False]

qa_dataset = Dataset.from_pandas(prof_df[['question', 'answer']])
qa_dataset = qa_dataset.train_test_split(test_size=0.2)

def preprocess_qa(examples):
    inputs = ["question: " + q for q in examples["question"]]
    model_inputs = tokenizer_t5(inputs, max_length=512, truncation=True, padding="max_length")

    with tokenizer_t5.as_target_tokenizer():
        labels = tokenizer_t5(examples["answer"], max_length=512, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [None]:
tokenized_qa = qa_dataset.map(preprocess_qa, batched=True)
model_t5_qa = T5ForConditionalGeneration.from_pretrained(t5_model_name)

args_qa = TrainingArguments(
    output_dir="./saved_models/t5_qa_assistant",
    evaluation_strategy="epoch",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=2,
    save_total_limit=1,
    predict_with_generate=True,
    fp16=True,
)

In [None]:
trainer_qa = Trainer(
    model=model_t5_qa,
    args=args_qa,
    train_dataset=tokenized_qa["train"],
    eval_dataset=tokenized_qa["test"],
    tokenizer=tokenizer_t5,
    data_collator=DataCollatorForSeq2Seq(tokenizer_t5, model=model_t5_qa)
)

In [None]:
trainer_qa.train()
model_t5_qa.save_pretrained("./saved_models/t5_qa_assistant")

## 5. Emotion-Aware T5 Response Generator & QA Assistant Training

In [None]:
from datasets import Dataset
from transformers import T5Tokenizer, T5ForConditionalGeneration, DataCollatorForSeq2Seq, TrainingArguments, Trainer
import pandas as pd

In [None]:
# Load emotion-enriched dataset
df = pd.read_csv("./data/t5_emotion_augmented_dataset.csv")
dataset = Dataset.from_pandas(df[["emotion_name", "question", "answer"]])
dataset = dataset.train_test_split(test_size=0.2)

In [None]:
# Tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("t5-small")

In [None]:
# Format input: emotion: [emotion] question: [question]
def preprocess(example):
    inputs = [f"emotion: {e} question: {q}" for e, q in zip(example["emotion_name"], example["question"])]
    targets = example["answer"]
    model_inputs = tokenizer(inputs, max_length=512, padding="max_length", truncation=True)

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=512, padding="max_length", truncation=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [None]:
# Tokenize dataset
tokenized = dataset.map(preprocess, batched=True)

In [None]:
# ------------------------
# TRAINING: Response Generator
# ------------------------
model_response = T5ForConditionalGeneration.from_pretrained("t5-small")
args_response = TrainingArguments(
    output_dir="./saved_models/t5_response_emotion_aware",
    evaluation_strategy="epoch",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=2,
    save_total_limit=1,
    predict_with_generate=True,
    fp16=True,

)

In [None]:
trainer_response = Trainer(
    model=model_response,
    args=args_response,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(tokenizer, model=model_response)
)

In [None]:
trainer_response.train()
model_response.save_pretrained("./saved_models/t5_response_emotion_aware")
print("✅ Trained and saved: t5_response_emotion_aware")

In [None]:
# ------------------------
# TRAINING: QA Assistant
# ------------------------
model_qa = T5ForConditionalGeneration.from_pretrained("t5-small")
args_qa = TrainingArguments(
    output_dir="./saved_models/t5_qa_emotion_aware",
    evaluation_strategy="epoch",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=2,
    save_total_limit=1,
    predict_with_generate=True
)

In [None]:
trainer_qa = Trainer(
    model=model_qa,
    args=args_qa,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(tokenizer, model=model_qa)
)

In [None]:
# Train the model
trainer_qa.train()
model_qa.save_pretrained("./saved_models/t5_qa_emotion_aware")
print("✅ Trained and saved: t5_qa_emotion_aware")


## 6. Gradio Chatbot Interface (Emotion-Aware with Memory)

In [None]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration

# Load all models
tokenizer_roberta = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions")
model_roberta = AutoModelForSequenceClassification.from_pretrained("./saved_models/roberta_emotion")
id2label = model_roberta.config.id2label

tokenizer_t5 = T5Tokenizer.from_pretrained("t5-small")
model_t5 = T5ForConditionalGeneration.from_pretrained("./saved_models/t5_response_emotion_aware")
model_qa = T5ForConditionalGeneration.from_pretrained("./saved_models/t5_qa_emotion_aware")


In [None]:
chat_history = []

def chatbot_response(user_input):
    global chat_history

    # Detect Emotion
    emo_inputs = tokenizer_roberta(user_input, return_tensors="pt")
    with torch.no_grad():
        emo_outputs = model_roberta(**emo_inputs)
    emotion = id2label[int(torch.argmax(emo_outputs.logits, dim=1))]

    # Support message
    support_msg = "I'm here for you." if emotion != "neutral" else "Let’s talk more about how you're feeling."

    # Emotion-aware prompt
    prompt = f"emotion: {emotion} question: {user_input}"
    input_ids = tokenizer_t5(prompt, return_tensors="pt").input_ids

    # Generate Responses
    with torch.no_grad():
        response_ids = model_t5.generate(input_ids, max_length=100)
        response_text = tokenizer_t5.decode(response_ids[0], skip_special_tokens=True)

        qa_ids = model_qa.generate(input_ids, max_length=100)
        qa_text = tokenizer_t5.decode(qa_ids[0], skip_special_tokens=True)

    # Chat memory
    combined_response = (
        f"Detected Emotion: {emotion}\n"
        f"Empathy: {support_msg}\n"
        f"Response: {response_text}\n"
        f"Answer: {qa_text}"
    )
    chat_history.append((f"You: {user_input}", f"{combined_response}"))
    return "\n\n".join([f"{q}\n{a}" for q, a in chat_history])

In [None]:
gr.ChatInterface(fn=chatbot_response, title="🧠 Emotion-Aware Mental Health Chatbot", description="Ask any question or share how you're feeling. The bot will respond with empathy and advice.").launch()


## 7. Evaluate Models (ROUGE, BERTScore, Perplexity)

In [None]:
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

In [None]:
# Sample evaluation on response generation
sample_batch = tokenized_t5["test"].select(range(50))
predictions = trainer_t5.predict(sample_batch)

In [None]:
decoded_preds = tokenizer_t5.batch_decode(predictions.predictions, skip_special_tokens=True)
decoded_labels = tokenizer_t5.batch_decode(sample_batch["labels"], skip_special_tokens=True)

In [None]:
# ROUGE Evaluation
rouge_result = rouge.compute(predictions=decoded_preds, references=decoded_labels)

# BERTScore Evaluation
bertscore_result = bertscore.compute(predictions=decoded_preds, references=decoded_labels, lang="en")

In [None]:
# Perplexity Calculation
losses = []
for i in range(len(sample_batch)):
    input_ids = torch.tensor([sample_batch[i]["input_ids"]]).to(model_t5.device)
    labels = torch.tensor([sample_batch[i]["labels"]]).to(model_t5.device)
    with torch.no_grad():
        outputs = model_t5(input_ids=input_ids, labels=labels)
    loss = outputs.loss
    losses.append(loss.item())

In [None]:
# Calculate perplexity
import math
perplexity = math.exp(sum(losses)/len(losses))

rouge_result, bertscore_result, perplexity