In [69]:
from datasets import Dataset
from sklearn.metrics import f1_score
import numpy as np
import pandas as pd
from dataclasses import dataclass
from datasets import load_dataset, Dataset, concatenate_datasets
from pathlib import Path
from pydub import AudioSegment
from sklearn.preprocessing import MultiLabelBinarizer
from transformers import (AutoTokenizer, AutoModelForSequenceClassification, T5ForConditionalGeneration, TrainingArguments,
                          Trainer, Seq2SeqTrainer, DataCollatorWithPadding)
from transformers import AutoTokenizer, T5ForConditionalGeneration
from transformers import AutoTokenizer, T5ForConditionalGeneration, AutoModelForSequenceClassification
from transformers import DataCollatorForSeq2Seq
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import Trainer
from transformers import TrainerCallback
from transformers import default_data_collator
from transformers import logging as hf_logging
import datetime
import evaluate
import gradio as gr
import numpy as np
import numpy as np, torch
import os
import os, pprint, glob
import os, random, json, time, itertools
import pandas as pd
import speech_recognition as sr
import tempfile
import torch
import torch, torch.nn.functional as F
import torch.nn.functional as F
import warnings, logging

# Mental‑Health Chatbot: Training & Deployment

This notebook walks through **building, training, and serving** a multi‑task mental‑health chatbot that:

1. Detects the user's emotions (multi‑label classification).
2. Generates empathetic free‑text responses.
3. Answers direct mental‑health questions accurately.

We combine three fine‑tuned Hugging Face models:

| Task | Base model | Output dir |
|------|------------|------------|
| Emotion classification | `SamLowe/roberta-base-go_emotions` | `./saved_models/emotion_classifier` |
| Response generation | `google/t5-small` (or any T5) | `./saved_models/t5_response_generator` |
| Question‑answering | `google/t5-small` (or any T5) | `./saved_models/t5_qa` |

Finally, we wire them together in a small **pipeline** and expose it through a minimal [Gradio](https://gradio.app) UI that *remembers* the conversation.

> **Tip** Training large models can take a while. Feel free to toggle individual datasets on/off or start with a tiny subset while you iterate.

## 0. Environment setup *(optional)*

In [70]:
# If running on Colab/Kaggle add any missing libraries:
# !pip install -q transformers datasets evaluate bert-score gradio sentencepiece

## 1. Imports & global configuration

In [71]:
hf_logging.set_verbosity_info()     # show INFO messages from Trainer





                          
warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.INFO)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Running on: {device}")

# Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if device == "cuda":
    torch.cuda.manual_seed_all(SEED)

# Ensure save directories exist
SAVE_ROOT = Path("./saved_models")
for sub in ["emotion_classifier", "t5_response_generator", "t5_qa", "final_combined"]:
    (SAVE_ROOT / sub).mkdir(parents=True, exist_ok=True)

Running on: cuda


In [72]:
# --- Data collator: ensures labels are float32 tensors -----------------
def float_label_collator(features):
    batch = default_data_collator(features)
    if "labels" in batch:
        batch["labels"] = batch["labels"].to(torch.float32)
    return batch

In [73]:
# --- Custom Trainer for multi-label BCE loss ---------------------------

class MultiLabelTrainer(Trainer):
    """Casts labels to float32 and reshapes to logits shape if needed."""
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels").float()
        outputs = model(**inputs)
        logits = outputs.logits
        if labels.shape != logits.shape:
            labels = labels.view_as(logits)
        loss = F.binary_cross_entropy_with_logits(logits, labels, reduction="mean")
        print("LOGITS", logits.shape, "LABELS", labels.shape)

        return (loss, outputs) if return_outputs else loss

In [74]:
# ─── Data collator to ensure BCEWithLogitsLoss gets float labels ───

def float_label_collator(features):
    batch = default_data_collator(features)
    if "labels" in batch:
        batch["labels"] = batch["labels"].to(torch.float32)
    print("collator labels dtype/shape:", batch["labels"].dtype, batch["labels"].shape)
    return batch

In [75]:
# ─── Callback: print step loss + epoch eval while keeping tqdm bar ───
class StepPrinter(TrainerCallback):
    def on_log(self, args, state, control, logs=None, **kwargs):
        if not logs or not state.is_local_process_zero:
            return
        if "loss" in logs:
            print(f"Step {state.global_step:>6} • loss {logs['loss']:.4f}")
        if "eval_loss" in logs:
            metric = logs.get("micro_f1") or logs.get("bertscore_f1") or logs.get("rougeL")
            metric_str = f" • metric {metric:.4f}" if metric is not None else ""
            print(f"Epoch {int(state.epoch)}/{int(args.num_train_epochs)}"
                  f" • eval_loss {logs['eval_loss']:.4f}{metric_str}")

In [76]:
# ──────────────────────────────────────────────────────────────
# Callback: print "Step xx • loss ..." and epoch eval metrics
class StepPrinter(TrainerCallback):
    def on_log(self, args, state, control, logs=None, **kwargs):
        if not logs or not state.is_local_process_zero:
            return
        if "loss" in logs:
            print(f"Step {state.global_step:>6} • loss {logs['loss']:.4f}")
        if "eval_loss" in logs:
            f1 = logs.get("micro_f1") or logs.get("bertscore_f1") or logs.get("rougeL")
            extra = f" • metric {f1:.4f}" if f1 is not None else ""
            print(f"Epoch {int(state.epoch)}/{int(args.num_train_epochs)}"
                  f" • eval_loss {logs['eval_loss']:.4f}{extra}")
# ──────────────────────────────────────────────────────────────

## 2. Dataset switches

In [77]:
# Toggle individual CSVs and provide their column mapping
# Format: name: (enabled, path, question_col, answer_col)
DATASETS = {
    "ds1": (False,  "./data/ds1_transformed_mental_health_chatbot_dataset.csv",  "question", "answer"),
    "ds2": (False,  "./data/ds2_transformed_mental_health_chatbot.csv",         "question", "answer"),
    "ds3": (False,  "./data/ds3_mental_health_faq_cleaned.csv",                 "Question", "Answer"),
    "ds4": (False,  "./data/ds4_mental_health_chatbot_dataset_merged_modes.csv","prompt",   "response"),
    "ds5": (False,  "./data/ds5_Mental_Health_FAQ.csv",                         "Question", "Answer"),
    "ds6": (False, "./data/ds6_mental_health_counseling.csv",                  "query",    "completion"),
}

## 3. Load & preprocess datasets

In [78]:
# Robust cleaner that auto‑maps columns to 'question' / 'answer'
def load_and_clean(path, q_col, a_col):

    df = pd.read_csv(path)

    # normalise headers
    df.columns = [c.lower().strip() for c in df.columns]
    q_col = q_col.lower().strip()
    a_col = a_col.lower().strip()

    # common renames
    rename_map = {
        "prompt": "question",
        "response": "answer",
        "questions": "question",
        "answers": "answer",
    }
    df = df.rename(columns=rename_map)

    # if provided cols exist, rename them
    if q_col in df.columns:
        df = df.rename(columns={q_col: "question"})
    if a_col in df.columns:
        df = df.rename(columns={a_col: "answer"})

        # Try to map context → question if needed
    if "question" not in df.columns and "context" in df.columns:
        df = df.rename(columns={"context": "question"})

    if not {"question", "answer"}.issubset(df.columns):
        raise ValueError(f"Could not find 'question'/'answer' in {path}. Available columns: {list(df.columns)}")



    df = df[["question", "answer"]].dropna()
    df["question"] = df["question"].astype(str).str.strip().str.replace(r"\s+", " ", regex=True)
    df["answer"]   = df["answer"].astype(str).str.strip().str.replace(r"\s+", " ", regex=True)
    df = df.drop_duplicates()

    return Dataset.from_pandas(df.reset_index(drop=True))

In [79]:
# ─── 1.  Put this once, after your imports ──────────────────────────

def float_label_collator(features):
    """
    Wrap the default HF collator but cast the `labels` tensor to float32
    so BCEWithLogitsLoss gets the right dtype.
    """
    batch = default_data_collator(features)
    if "labels" in batch:
        batch["labels"] = batch["labels"].to(torch.float32)
    return batch


In [80]:
# ─── Final version of MultiLabelTrainer ─────────────────────────────

class MultiLabelTrainer(Trainer):
    """
    BCEWithLogitsLoss that survives any label dtype/shape:
    * casts to float32
    * reshapes to logits.shape when needed
    """
    def compute_loss(
        self,
        model,
        inputs,
        return_outputs: bool = False,
        **kwargs,
    ):
        labels = inputs.pop("labels").float()        # cast dtype
        outputs = model(**inputs)
        logits = outputs.logits

        # Fix shape if the collator flattened the labels
        if labels.shape != logits.shape:
            labels = labels.view_as(logits)

        loss = F.binary_cross_entropy_with_logits(logits, labels, reduction="mean")
        return (loss, outputs) if return_outputs else loss




### 3.1 Emotion label setup

In [81]:
# Labels from the GoEmotions paper (27 emotions + neutral)
GO_EMOTION_LABELS = [
    'admiration','amusement','anger','annoyance','approval','caring','confusion',
    'curiosity','desire','disappointment','disapproval','disgust','embarrassment',
    'excitement','fear','gratitude','grief','joy','love','nervousness','optimism',
    'pride','realization','relief','remorse','sadness','surprise','neutral'
]
num_labels = len(GO_EMOTION_LABELS)

### 3.2 Binarize emotion annotations

In [82]:
# ── Re‑create train_ds / test_ds ───────────────────────────────────
datasets_list = []
for name, (enabled, path, q_col, a_col) in DATASETS.items():
    if not enabled:
        continue
    ds = load_and_clean(path=path, q_col=q_col, a_col=a_col)
    datasets_list.append(ds)

if not datasets_list:
    print("No datasets were enabled, using a fallback test dataset.")
    fallback_data = {
        "text": [
            "How are you?",
            "I feel really down today.",
            "I'm so happy with my progress!",
            "Why does nobody understand me?",
            "I'm feeling anxious about school.",
            "Life is good lately.",
            "Sometimes I just want to cry.",
            "Everything is falling apart.",
            "I’m grateful for my therapist.",
            "Can someone please just listen?"
        ]
    }
    ds = Dataset.from_dict(fallback_data)
    datasets_list.append(ds)


full_ds = concatenate_datasets(datasets_list) if len(datasets_list) > 1 else datasets_list[0]
full_ds = full_ds.shuffle(seed=SEED)

split = full_ds.train_test_split(test_size=0.1, seed=SEED)
train_ds, test_ds = split["train"], split["test"]

print(f"train: {len(train_ds):,} • test: {len(test_ds):,}")
# ───────────────────────────────────────────────────────────────────

No datasets were enabled, using a fallback test dataset.
train: 9 • test: 1


In [83]:
if not datasets_list:
    print("No datasets were enabled, using a fallback test dataset.")
    fallback_data = {
        "text": [
            "How are you?",
            "I feel really down today.",
            "I'm so happy with my progress!",
            "Why does nobody understand me?",
            "I'm feeling anxious about school.",
            "Life is good lately.",
            "Sometimes I just want to cry.",
            "Everything is falling apart.",
            "I’m grateful for my therapist.",
            "Can someone please just listen?"
        ]
    }
    ds = Dataset.from_dict(fallback_data)
    datasets_list.append(ds)


In [84]:
# For this demo we'll fake some annotations by mapping keywords -> emotions.
# Replace with your real emotion annotations if available.
KEYWORD2EMO = {
    "sad": "sadness", "angry": "anger", "happy": "joy",
    "thank": "gratitude", "sorry": "remorse", "love": "love",
    "fear": "fear", "nervous": "nervousness"
}

def annotate_emotions(example):
    emos = example.get("emotions", [])
    
    # If no emotions assigned, default to ["neutral"]
    if not emos:
        emos = ["neutral"]
    
    example["emotions"] = emos  # store for visibility
    example["labels"] = [1.0 if lbl in emos else 0.0 for lbl in GO_EMOTION_LABELS]
    return example

def get_input_text(example):
    return example.get("text") or example.get("question") or "[NO TEXT FOUND]"
emo_train = train_ds.map(annotate_emotions)
emo_test  = test_ds.map(annotate_emotions)

print("Sample:", get_input_text(emo_train[0]), "->", emo_train[0]["emotions"])


def get_input_text(example):
    return example.get("text") or example.get("question") or "[NO TEXT FOUND]"

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Sample: I'm feeling anxious about school. -> ['neutral']


In [85]:
def has_nonzero_labels(example):
    return sum(example["labels"]) > 0

# Apply to your actual datasets
emo_train = emo_train.filter(has_nonzero_labels)
emo_test = emo_test.filter(has_nonzero_labels)


Filter:   0%|          | 0/9 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1 [00:00<?, ? examples/s]

In [86]:
# Check for label issues
bad_labels = [ex for ex in emo_train if "labels" not in ex or sum(ex["labels"]) == 0]

print("Number of bad label examples:", len(bad_labels))

# Print the first bad one if any
if bad_labels:
    print("Example with bad label:", bad_labels[0])


Number of bad label examples: 0


In [87]:
emo_tokenizer = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions")

loading file vocab.json from cache at C:\Users\mward\.cache\huggingface\hub\models--SamLowe--roberta-base-go_emotions\snapshots\58b6c5b44a7a12093f782442969019c7e2982299\vocab.json
loading file merges.txt from cache at C:\Users\mward\.cache\huggingface\hub\models--SamLowe--roberta-base-go_emotions\snapshots\58b6c5b44a7a12093f782442969019c7e2982299\merges.txt
loading file tokenizer.json from cache at C:\Users\mward\.cache\huggingface\hub\models--SamLowe--roberta-base-go_emotions\snapshots\58b6c5b44a7a12093f782442969019c7e2982299\tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at C:\Users\mward\.cache\huggingface\hub\models--SamLowe--roberta-base-go_emotions\snapshots\58b6c5b44a7a12093f782442969019c7e2982299\special_tokens_map.json
loading file tokenizer_config.json from cache at C:\Users\mward\.cache\huggingface\hub\models--SamLowe--roberta-base-go_emotions\snapshots\58b6c5b44a7a12093f782442969019c7e2982299\tokenizer_config

In [88]:
# 🔄 Print + rename if necessary
print("BEFORE COLUMN RENAME:", emo_train.column_names)
if "question" in emo_train.column_names:
    emo_train = emo_train.rename_column("question", "text")
if "question" in emo_test.column_names:
    emo_test = emo_test.rename_column("question", "text")
print("AFTER COLUMN RENAME:", emo_train.column_names)

# ✅ Tokenizer using just 'text'
def emo_tokenize(batch):
    return emo_tokenizer(batch["text"], padding=True, truncation=True)

# ✅ Float conversion
def cast_to_float(example):
    example["labels"] = np.array(example["labels"], dtype=np.float32)
    return example

# ✅ Tokenize + cast
emo_train_tok = emo_train.map(emo_tokenize, batched=True).map(cast_to_float)
emo_test_tok  = emo_test.map(emo_tokenize, batched=True).map(cast_to_float)

# ✅ Set format for PyTorch
emo_train_tok.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
emo_test_tok.set_format("torch", columns=["input_ids", "attention_mask", "labels"])



BEFORE COLUMN RENAME: ['text', 'emotions', 'labels']
AFTER COLUMN RENAME: ['text', 'emotions', 'labels']


Map:   0%|          | 0/9 [00:00<?, ? examples/s]

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

## 4. Train emotion classifier (RoBERTa)

In [89]:
print(emo_train.column_names)


['text', 'emotions', 'labels']


In [90]:


def emo_tokenize(batch):
    return emo_tokenizer(
        batch["text"],  # <- THIS is now the standard column name
        truncation=True,
        padding="max_length",
        max_length=128,
    )


def cast_to_float(example):
    example["labels"] = np.array(example["labels"], dtype=np.float32)
    return example

emo_model = AutoModelForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions", problem_type="multi_label_classification", num_labels=len(GO_EMOTION_LABELS)).to(device)


emo_train_tok = emo_train.map(emo_tokenize, batched=True).map(cast_to_float)
emo_test_tok  = emo_test.map(emo_tokenize, batched=True).map(cast_to_float)

emo_train_tok.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
emo_test_tok.set_format("torch", columns=["input_ids", "attention_mask", "labels"])


# Metrics
metric_f1 = evaluate.load("f1")

def compute_emo_metrics(pred):

    logits, labels = pred
    probs = torch.sigmoid(torch.tensor(logits))
    preds = (probs > 0.3).int().numpy()

    labels = np.array(labels)

    # Defensive check: filter out rows where labels.sum() == 0
    mask = labels.sum(axis=1) > 0
    if mask.sum() == 0:
        print("Warning: all evaluation labels are empty")
        return {"micro_f1": 0.0}

    try:
        f1 = f1_score(labels[mask], preds[mask], average="micro", zero_division=0)
    except ValueError as e:
        print("Metric error:", e)
        f1 = 0.0

    return {"micro_f1": f1}



emo_args = TrainingArguments(
    output_dir=str(SAVE_ROOT / "emotion_classifier"),

    # logging
    logging_strategy="steps", logging_steps=10, logging_dir="./logs", report_to="none",

    # core hyper‑params
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5,
    num_train_epochs=3,

    # eval / ckpt
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="micro_f1",
    greater_is_better=True,
    seed=SEED,
)

trainer_emo = MultiLabelTrainer(
    model=emo_model,
    args=emo_args,
    train_dataset=emo_train_tok,
    eval_dataset=emo_test_tok,
    tokenizer=emo_tokenizer,
    data_collator=float_label_collator,
    compute_metrics=compute_emo_metrics,
    callbacks=[StepPrinter],
)


# Uncomment to train (may take a while)
trainer_emo.train()

# Save robustly
(SAVE_ROOT/"emotion_classifier").mkdir(exist_ok=True, parents=True)
emo_model.save_pretrained(SAVE_ROOT/"emotion_classifier")
emo_tokenizer.save_pretrained(SAVE_ROOT/"emotion_classifier")

trainer_emo.save_model()            # writes to output_dir
emo_tokenizer.save_pretrained(emo_args.output_dir)


# Cast multi‑label targets to float32 tensors





loading configuration file config.json from cache at C:\Users\mward\.cache\huggingface\hub\models--SamLowe--roberta-base-go_emotions\snapshots\58b6c5b44a7a12093f782442969019c7e2982299\config.json
Model config RobertaConfig {
  "_name_or_path": "SamLowe/roberta-base-go_emotions",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "joy",
    "18": "love",
    "19": "nervousness",
    "20": "optimism"

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

PyTorch: setting up devices
The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: emotions, text. If emotions, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 9
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 3
  Number of trainable parameters = 124,667,164


Epoch,Training Loss,Validation Loss,Micro F1
1,No log,0.201476,0.0
2,No log,0.189389,0.0
3,No log,0.183481,0.0


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: emotions, text. If emotions, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 1
  Batch size = 16


Epoch 1/3 • eval_loss 0.2015


Saving model checkpoint to saved_models\emotion_classifier\checkpoint-1
Configuration saved in saved_models\emotion_classifier\checkpoint-1\config.json
Model weights saved in saved_models\emotion_classifier\checkpoint-1\model.safetensors
tokenizer config file saved in saved_models\emotion_classifier\checkpoint-1\tokenizer_config.json
Special tokens file saved in saved_models\emotion_classifier\checkpoint-1\special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: emotions, text. If emotions, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 1
  Batch size = 16


Epoch 2/3 • eval_loss 0.1894


Saving model checkpoint to saved_models\emotion_classifier\checkpoint-2
Configuration saved in saved_models\emotion_classifier\checkpoint-2\config.json
Model weights saved in saved_models\emotion_classifier\checkpoint-2\model.safetensors
tokenizer config file saved in saved_models\emotion_classifier\checkpoint-2\tokenizer_config.json
Special tokens file saved in saved_models\emotion_classifier\checkpoint-2\special_tokens_map.json
Saving model checkpoint to saved_models\emotion_classifier\checkpoint-3
Configuration saved in saved_models\emotion_classifier\checkpoint-3\config.json
Model weights saved in saved_models\emotion_classifier\checkpoint-3\model.safetensors
tokenizer config file saved in saved_models\emotion_classifier\checkpoint-3\tokenizer_config.json
Special tokens file saved in saved_models\emotion_classifier\checkpoint-3\special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and ha

Epoch 3/3 • eval_loss 0.1835


Saving model checkpoint to saved_models\emotion_classifier\checkpoint-3
Configuration saved in saved_models\emotion_classifier\checkpoint-3\config.json
Model weights saved in saved_models\emotion_classifier\checkpoint-3\model.safetensors
tokenizer config file saved in saved_models\emotion_classifier\checkpoint-3\tokenizer_config.json
Special tokens file saved in saved_models\emotion_classifier\checkpoint-3\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from saved_models\emotion_classifier\checkpoint-1 (score: 0.0).
Configuration saved in saved_models\emotion_classifier\config.json
Model weights saved in saved_models\emotion_classifier\model.safetensors
tokenizer config file saved in saved_models\emotion_classifier\tokenizer_config.json
Special tokens file saved in saved_models\emotion_classifier\special_tokens_map.json
Saving model checkpoint to saved_models\emotion_classifier
Configuration saved in saved

('saved_models\\emotion_classifier\\tokenizer_config.json',
 'saved_models\\emotion_classifier\\special_tokens_map.json',
 'saved_models\\emotion_classifier\\vocab.json',
 'saved_models\\emotion_classifier\\merges.txt',
 'saved_models\\emotion_classifier\\added_tokens.json',
 'saved_models\\emotion_classifier\\tokenizer.json')

In [91]:
sample = emo_train_tok[0]
print("Sample input_ids shape:", sample["input_ids"].shape)
print("Sample labels:", sample["labels"])
print("Label dtype:", type(sample["labels"]), "—", sample["labels"].dtype if hasattr(sample["labels"], 'dtype') else "no dtype")
print("Model expects num_labels:", emo_model.config.num_labels)

Sample input_ids shape: torch.Size([128])
Sample labels: tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1.])
Label dtype: <class 'torch.Tensor'> — torch.float32
Model expects num_labels: 28


In [92]:
resp_tokenizer = AutoTokenizer.from_pretrained("t5-small")
resp_model = T5ForConditionalGeneration.from_pretrained("t5-small")

loading file spiece.model from cache at C:\Users\mward\.cache\huggingface\hub\models--t5-small\snapshots\df1b051c49625cf57a3d0d8d3863ed4d13564fe4\spiece.model
loading file tokenizer.json from cache at C:\Users\mward\.cache\huggingface\hub\models--t5-small\snapshots\df1b051c49625cf57a3d0d8d3863ed4d13564fe4\tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at None
loading file tokenizer_config.json from cache at C:\Users\mward\.cache\huggingface\hub\models--t5-small\snapshots\df1b051c49625cf57a3d0d8d3863ed4d13564fe4\tokenizer_config.json
loading file chat_template.jinja from cache at None
loading configuration file config.json from cache at C:\Users\mward\.cache\huggingface\hub\models--t5-small\snapshots\df1b051c49625cf57a3d0d8d3863ed4d13564fe4\config.json
Model config T5Config {
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "classifier_dropout": 0.0,
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 512,
  "decoder_s

In [93]:

data_collator = DataCollatorForSeq2Seq(
    tokenizer=resp_tokenizer,
    model=resp_model,
    padding=True,  # enables dynamic padding
)


## 5. Train T5 for response generation

In [94]:
# Build input/target pairs: user text -> helpful response
# For now we use 'question' as input and 'answer' as target
def build_t5_pairs(example):
    question = example.get("question") or example.get("text") or ""
    answer = example.get("answer") or example.get("response") or ""
    example["input_text"] = "respond: " + question
    example["target_text"] = answer
    return example


resp_train = train_ds.map(build_t5_pairs)
resp_test  = test_ds.map(build_t5_pairs)

t5_resp_model_name = "t5-small"
tokenizer_t5 = AutoTokenizer.from_pretrained(t5_resp_model_name)

def t5_tokenize(batch):
    model_inputs = tokenizer_t5(batch["input_text"], max_length=128, truncation=True)
    with tokenizer_t5.as_target_tokenizer():
        labels = tokenizer_t5(batch["target_text"], max_length=128, truncation=True)
    model_inputs["labels"] = labels["input_ids"]

    return model_inputs

resp_train_tok = resp_train.map(t5_tokenize, batched=True, remove_columns=resp_train.column_names)
resp_test_tok  = resp_test.map(t5_tokenize, batched=True, remove_columns=resp_test.column_names)

resp_train_tok.set_format("torch")
resp_test_tok.set_format("torch")

resp_model = T5ForConditionalGeneration.from_pretrained(t5_resp_model_name).to(device)

rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

def compute_resp_metrics(eval_pred):
    preds, labels = eval_pred

    # Replace -100 in the labels as we can't decode them
    labels = np.where(labels != -100, labels, tokenizer_t5.pad_token_id)

    decoded_preds = tokenizer_t5.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer_t5.batch_decode(labels, skip_special_tokens=True)

    r = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    b = bertscore.compute(predictions=decoded_preds, references=decoded_labels, lang="en")
    
    return {
        "rougeL": r["rougeL"],
        "bertscore_f1": np.mean(b["f1"])
    }



resp_args = Seq2SeqTrainingArguments(
    output_dir=str(SAVE_ROOT / "t5_response_generator"),

    # logging
    logging_strategy="steps", logging_steps=10, logging_dir="./logs", report_to="none",

    # core hyper‑params
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=3e-4,
    num_train_epochs=3,

    # eval / ckpt
    evaluation_strategy="epoch",
    save_strategy="epoch",
    predict_with_generate=True,   # now valid
    seed=SEED,
)

trainer_resp = Seq2SeqTrainer(
    model=resp_model,
    args=resp_args,
    train_dataset=resp_train_tok,
    eval_dataset=resp_test_tok,
    tokenizer=tokenizer_t5,
    data_collator=data_collator,  # ✅ keep this
    compute_metrics=compute_resp_metrics,
    callbacks=[StepPrinter],
)


# Uncomment to train
trainer_resp.train()

# Robust save
resp_model.save_pretrained(SAVE_ROOT/"t5_response_generator")
tokenizer_t5.save_pretrained(SAVE_ROOT/"t5_response_generator"),
callbacks=[StepPrinter]
trainer_resp.save_model()
tokenizer_t5.save_pretrained(resp_args.output_dir) 

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

loading file spiece.model from cache at C:\Users\mward\.cache\huggingface\hub\models--t5-small\snapshots\df1b051c49625cf57a3d0d8d3863ed4d13564fe4\spiece.model
loading file tokenizer.json from cache at C:\Users\mward\.cache\huggingface\hub\models--t5-small\snapshots\df1b051c49625cf57a3d0d8d3863ed4d13564fe4\tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at None
loading file tokenizer_config.json from cache at C:\Users\mward\.cache\huggingface\hub\models--t5-small\snapshots\df1b051c49625cf57a3d0d8d3863ed4d13564fe4\tokenizer_config.json
loading file chat_template.jinja from cache at None


Map:   0%|          | 0/9 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

loading configuration file config.json from cache at C:\Users\mward\.cache\huggingface\hub\models--t5-small\snapshots\df1b051c49625cf57a3d0d8d3863ed4d13564fe4\config.json
Model config T5Config {
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "classifier_dropout": 0.0,
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 512,
  "decoder_start_token_id": 0,
  "dense_act_fn": "relu",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": false,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 6,
  "num_heads": 8,
  "num_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "length_penalty": 2.0,
      "max_length": 200,
      "min_length": 30,
      "no_repeat_ngram_size":

Epoch,Training Loss,Validation Loss,Rougel,Bertscore F1
1,No log,0.828214,0.0,0.0
2,No log,0.091553,0.0,0.0
3,No log,0.023079,0.0,0.0



***** Running Evaluation *****
  Num examples = 1
  Batch size = 8
INFO:absl:Using default tokenizer.
loading configuration file config.json from cache at C:\Users\mward\.cache\huggingface\hub\models--roberta-large\snapshots\722cf37b1afa9454edce342e7895e588b6ff1d59\config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-large",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.49.0",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at C:\Users\

Epoch 1/3 • eval_loss 0.8282


Saving model checkpoint to saved_models\t5_response_generator\checkpoint-2
Configuration saved in saved_models\t5_response_generator\checkpoint-2\config.json
Configuration saved in saved_models\t5_response_generator\checkpoint-2\generation_config.json
Model weights saved in saved_models\t5_response_generator\checkpoint-2\model.safetensors
tokenizer config file saved in saved_models\t5_response_generator\checkpoint-2\tokenizer_config.json
Special tokens file saved in saved_models\t5_response_generator\checkpoint-2\special_tokens_map.json
Copy vocab file to saved_models\t5_response_generator\checkpoint-2\spiece.model

***** Running Evaluation *****
  Num examples = 1
  Batch size = 8
INFO:absl:Using default tokenizer.


Epoch 2/3 • eval_loss 0.0916


Saving model checkpoint to saved_models\t5_response_generator\checkpoint-4
Configuration saved in saved_models\t5_response_generator\checkpoint-4\config.json
Configuration saved in saved_models\t5_response_generator\checkpoint-4\generation_config.json
Model weights saved in saved_models\t5_response_generator\checkpoint-4\model.safetensors
tokenizer config file saved in saved_models\t5_response_generator\checkpoint-4\tokenizer_config.json
Special tokens file saved in saved_models\t5_response_generator\checkpoint-4\special_tokens_map.json
Copy vocab file to saved_models\t5_response_generator\checkpoint-4\spiece.model
Saving model checkpoint to saved_models\t5_response_generator\checkpoint-6
Configuration saved in saved_models\t5_response_generator\checkpoint-6\config.json
Configuration saved in saved_models\t5_response_generator\checkpoint-6\generation_config.json
Model weights saved in saved_models\t5_response_generator\checkpoint-6\model.safetensors
tokenizer config file saved in saved

Epoch 3/3 • eval_loss 0.0231


Saving model checkpoint to saved_models\t5_response_generator\checkpoint-6
Configuration saved in saved_models\t5_response_generator\checkpoint-6\config.json
Configuration saved in saved_models\t5_response_generator\checkpoint-6\generation_config.json
Model weights saved in saved_models\t5_response_generator\checkpoint-6\model.safetensors
tokenizer config file saved in saved_models\t5_response_generator\checkpoint-6\tokenizer_config.json
Special tokens file saved in saved_models\t5_response_generator\checkpoint-6\special_tokens_map.json
Copy vocab file to saved_models\t5_response_generator\checkpoint-6\spiece.model


Training completed. Do not forget to share your model on huggingface.co/models =)


Configuration saved in saved_models\t5_response_generator\config.json
Configuration saved in saved_models\t5_response_generator\generation_config.json
Model weights saved in saved_models\t5_response_generator\model.safetensors
tokenizer config file saved in saved_models\t5_response_generato

('saved_models\\t5_response_generator\\tokenizer_config.json',
 'saved_models\\t5_response_generator\\special_tokens_map.json',
 'saved_models\\t5_response_generator\\spiece.model',
 'saved_models\\t5_response_generator\\added_tokens.json',
 'saved_models\\t5_response_generator\\tokenizer.json')

In [95]:
sample = resp_train_tok[0]
print("Input length:", len(sample["input_ids"]))
print("Label length:", len(sample["labels"]))
print("Label:", sample["labels"])
print("Type:", type(sample["labels"]))

Input length: 11
Label length: 1
Label: tensor([1])
Type: <class 'torch.Tensor'>


## 6. Train T5 for question‑answering

In [96]:

qa_model = T5ForConditionalGeneration.from_pretrained("saved_models/t5_qa").to("cuda" if torch.cuda.is_available() else "cpu")
tokenizer_t5 = AutoTokenizer.from_pretrained("saved_models/t5_qa")


loading configuration file saved_models/t5_qa\config.json
Model config T5Config {
  "_name_or_path": "t5-small",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "classifier_dropout": 0.0,
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 512,
  "decoder_start_token_id": 0,
  "dense_act_fn": "relu",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": false,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 6,
  "num_heads": 8,
  "num_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "length_penalty": 2.0,
      "max_length": 200,
      "min_length": 30,
      "no_repeat_ngram_size": 3,
      "num_beams": 4,
      "prefix": "summarize: "
    },
    "translation_en

In [97]:

qa_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer_t5,
    model=qa_model,
    padding=True
)


In [98]:
# Build QA pairs: "question: <text>" -> answer
def build_qa_pairs(example):
    question = example.get("question") or example.get("text") or ""
    answer = example.get("answer") or example.get("response") or ""
    example["input_text"] = "question: " + question
    example["target_text"] = answer
    return example


qa_train = train_ds.map(build_qa_pairs)
qa_test  = test_ds.map(build_qa_pairs)

def qa_tokenize(batch):
    model_inputs = tokenizer_t5(batch["input_text"], max_length=128, truncation=True)
    with tokenizer_t5.as_target_tokenizer():
        labels = tokenizer_t5(batch["target_text"], max_length=128, truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

qa_train_tok = qa_train.map(qa_tokenize, batched=True, remove_columns=qa_train.column_names)
qa_test_tok  = qa_test.map(qa_tokenize, batched=True, remove_columns=qa_test.column_names)

qa_train_tok.set_format("torch")
qa_test_tok.set_format("torch")

qa_model = T5ForConditionalGeneration.from_pretrained(t5_resp_model_name).to(device)

qa_args = Seq2SeqTrainingArguments(
    output_dir=str(SAVE_ROOT / "t5_qa"),

    logging_strategy="steps", logging_steps=10, logging_dir="./logs", report_to="none",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=3e-4,
    num_train_epochs=3,

    evaluation_strategy="epoch",
    save_strategy="epoch",
    predict_with_generate=True,
    seed=SEED,
)

trainer_qa = Seq2SeqTrainer(
    model=qa_model,
    args=qa_args,
    train_dataset=qa_train_tok,
    eval_dataset=qa_test_tok,
    tokenizer=tokenizer_t5,
    data_collator=qa_collator,
    compute_metrics=compute_resp_metrics,
    callbacks=[StepPrinter],
)

# Uncomment to train
trainer_qa.train()

qa_model.save_pretrained(SAVE_ROOT/"t5_qa")
tokenizer_t5.save_pretrained(SAVE_ROOT/"t5_qa")
callbacks=[StepPrinter]
trainer_qa.save_model()
tokenizer_t5.save_pretrained(qa_args.output_dir)


Map:   0%|          | 0/9 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

loading configuration file config.json from cache at C:\Users\mward\.cache\huggingface\hub\models--t5-small\snapshots\df1b051c49625cf57a3d0d8d3863ed4d13564fe4\config.json
Model config T5Config {
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "classifier_dropout": 0.0,
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 512,
  "decoder_start_token_id": 0,
  "dense_act_fn": "relu",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": false,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 6,
  "num_heads": 8,
  "num_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "length_penalty": 2.0,
      "max_length": 200,
      "min_length": 30,
      "no_repeat_ngram_size":

Epoch,Training Loss,Validation Loss,Rougel,Bertscore F1
1,No log,0.024853,0.0,0.0
2,No log,0.000743,0.0,0.0
3,No log,0.000398,0.0,0.0



***** Running Evaluation *****
  Num examples = 1
  Batch size = 8
INFO:absl:Using default tokenizer.


Epoch 1/3 • eval_loss 0.0249


Saving model checkpoint to saved_models\t5_qa\checkpoint-2
Configuration saved in saved_models\t5_qa\checkpoint-2\config.json
Configuration saved in saved_models\t5_qa\checkpoint-2\generation_config.json
Model weights saved in saved_models\t5_qa\checkpoint-2\model.safetensors
tokenizer config file saved in saved_models\t5_qa\checkpoint-2\tokenizer_config.json
Special tokens file saved in saved_models\t5_qa\checkpoint-2\special_tokens_map.json
Copy vocab file to saved_models\t5_qa\checkpoint-2\spiece.model

***** Running Evaluation *****
  Num examples = 1
  Batch size = 8
INFO:absl:Using default tokenizer.


Epoch 2/3 • eval_loss 0.0007


Saving model checkpoint to saved_models\t5_qa\checkpoint-4
Configuration saved in saved_models\t5_qa\checkpoint-4\config.json
Configuration saved in saved_models\t5_qa\checkpoint-4\generation_config.json
Model weights saved in saved_models\t5_qa\checkpoint-4\model.safetensors
tokenizer config file saved in saved_models\t5_qa\checkpoint-4\tokenizer_config.json
Special tokens file saved in saved_models\t5_qa\checkpoint-4\special_tokens_map.json
Copy vocab file to saved_models\t5_qa\checkpoint-4\spiece.model
Saving model checkpoint to saved_models\t5_qa\checkpoint-6
Configuration saved in saved_models\t5_qa\checkpoint-6\config.json
Configuration saved in saved_models\t5_qa\checkpoint-6\generation_config.json
Model weights saved in saved_models\t5_qa\checkpoint-6\model.safetensors
tokenizer config file saved in saved_models\t5_qa\checkpoint-6\tokenizer_config.json
Special tokens file saved in saved_models\t5_qa\checkpoint-6\special_tokens_map.json
Copy vocab file to saved_models\t5_qa\chec

Epoch 3/3 • eval_loss 0.0004


Saving model checkpoint to saved_models\t5_qa\checkpoint-6
Configuration saved in saved_models\t5_qa\checkpoint-6\config.json
Configuration saved in saved_models\t5_qa\checkpoint-6\generation_config.json
Model weights saved in saved_models\t5_qa\checkpoint-6\model.safetensors
tokenizer config file saved in saved_models\t5_qa\checkpoint-6\tokenizer_config.json
Special tokens file saved in saved_models\t5_qa\checkpoint-6\special_tokens_map.json
Copy vocab file to saved_models\t5_qa\checkpoint-6\spiece.model


Training completed. Do not forget to share your model on huggingface.co/models =)


Configuration saved in saved_models\t5_qa\config.json
Configuration saved in saved_models\t5_qa\generation_config.json
Model weights saved in saved_models\t5_qa\model.safetensors
tokenizer config file saved in saved_models\t5_qa\tokenizer_config.json
Special tokens file saved in saved_models\t5_qa\special_tokens_map.json
Saving model checkpoint to saved_models\t5_qa
Configuration saved in saved_model

('saved_models\\t5_qa\\tokenizer_config.json',
 'saved_models\\t5_qa\\special_tokens_map.json',
 'saved_models\\t5_qa\\spiece.model',
 'saved_models\\t5_qa\\added_tokens.json',
 'saved_models\\t5_qa\\tokenizer.json')

In [99]:

root = Path("./saved_models")

print("Top‑level content of saved_models:")
pprint.pprint(os.listdir(root))

print("\nAny checkpoint folders?")
ckpts = sorted(glob.glob(str(root / "t5_response_generator" / "checkpoint-*")))
pprint.pprint(ckpts)



Top‑level content of saved_models:
['emotion_classifier', 'final_combined', 't5_qa', 't5_response_generator']

Any checkpoint folders?
['saved_models\\t5_response_generator\\checkpoint-116',
 'saved_models\\t5_response_generator\\checkpoint-174',
 'saved_models\\t5_response_generator\\checkpoint-2',
 'saved_models\\t5_response_generator\\checkpoint-20',
 'saved_models\\t5_response_generator\\checkpoint-239',
 'saved_models\\t5_response_generator\\checkpoint-4',
 'saved_models\\t5_response_generator\\checkpoint-40',
 'saved_models\\t5_response_generator\\checkpoint-478',
 'saved_models\\t5_response_generator\\checkpoint-58',
 'saved_models\\t5_response_generator\\checkpoint-6',
 'saved_models\\t5_response_generator\\checkpoint-60',
 'saved_models\\t5_response_generator\\checkpoint-717']


## 7. Combined pipeline & Gradio UI

In [100]:
# Load (fine‑tuned) models back in – comment these if still in memory
emo_model = AutoModelForSequenceClassification.from_pretrained(SAVE_ROOT/"emotion_classifier").to(device)
emo_tokenizer = AutoTokenizer.from_pretrained(SAVE_ROOT/"emotion_classifier")

qa_model = T5ForConditionalGeneration.from_pretrained(SAVE_ROOT/"t5_qa").to(device)
qa_tokenizer = AutoTokenizer.from_pretrained(SAVE_ROOT/"t5_qa")

# ──────────────────────────────────────────────────────────────
class MentalHealthChatbotPipeline:
    def __init__(self, labels, device="cpu"):
        self.device = device
        self.labels = labels
        self.chat_history = []  # (speaker, text)

        self.emo_model = emo_model.eval()
        self.qa_model  = qa_model.eval()
        self.resp_model = resp_model.eval()

    @torch.no_grad()
    def __call__(self, text, max_length=128):
        self.chat_history.append(("user", text))

        # Emotion detection
        emo_inputs = emo_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(self.device)
        probs = torch.sigmoid(self.emo_model(**emo_inputs).logits)[0]
        emotions = [lbl for lbl, p in zip(self.labels, probs) if p > 0.3]

        # Pick model
        model, tok = (self.qa_model, qa_tokenizer) if "?" in text else (self.resp_model, resp_tokenizer)
        ids = model.generate(**tok(text, return_tensors="pt").to(self.device),
                             max_length=max_length)
        reply = tok.decode(ids[0], skip_special_tokens=True)
        self.chat_history.append(("bot", reply))

        return {"Detected Emotions": emotions, "Response": reply, "History": self.chat_history}

chatbot = MentalHealthChatbotPipeline(labels=GO_EMOTION_LABELS, device=device)

loading configuration file saved_models\emotion_classifier\config.json
Model config RobertaConfig {
  "_name_or_path": "saved_models\\emotion_classifier",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "joy",
    "18": "love",
    "19": "nervousness",
    "20": "optimism",
    "21": "pride",
    "22": "realization",
    "23": "relief",
    "24": "remorse",
    "25": "sadness",
    "26": "surpri

## 8. Save final bundle metadata

In [101]:
metadata = {
    "emotion_model": str(SAVE_ROOT/"emotion_classifier"),
    "response_model": str(SAVE_ROOT/"t5_response_generator"),
    "qa_model": str(SAVE_ROOT/"t5_qa"),
    "labels": GO_EMOTION_LABELS
}
with open(SAVE_ROOT/"final_combined"/"metadata.json", "w") as f:
    json.dump(metadata, f, indent=2)
print("Metadata saved to", SAVE_ROOT/"final_combined"/"metadata.json")

Metadata saved to saved_models\final_combined\metadata.json


In [102]:

# Load models and tokenizers
resp_model_path = "saved_models/t5_response_generator"
qa_model_path = "saved_models/t5_qa"
emo_model_path = "saved_models/emotion_classifier"

resp_tokenizer = AutoTokenizer.from_pretrained(resp_model_path)
resp_model = T5ForConditionalGeneration.from_pretrained(resp_model_path).to("cuda" if torch.cuda.is_available() else "cpu")

qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_path)
qa_model = T5ForConditionalGeneration.from_pretrained(qa_model_path).to(resp_model.device)

emo_tokenizer = AutoTokenizer.from_pretrained(emo_model_path)
emo_model = AutoModelForSequenceClassification.from_pretrained(emo_model_path).to(resp_model.device)
emo_model.eval()

# Emotion labels (based on go_emotions)
NUM_EMO_LABELS = emo_model.config.num_labels

DEFAULT_LABELS = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity',
    'desire', 'disappointment', 'disapproval', 'embarrassment', 'excitement', 'fear', 'gratitude',
    'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse',
    'sadness', 'surprise', 'neutral'
]

EMOTION_LABELS = DEFAULT_LABELS[:NUM_EMO_LABELS]
emotion_router_labels = set(EMOTION_LABELS) & {
    'confusion', 'caring', 'nervousness', 'grief', 'sadness', 'fear', 'remorse', 'love', 'anger'
}


# Format prompt
def format_input_prompt(user_input, language="English", history=None):
    if history:
        combined = "\n".join(history + [user_input])
        return f"You are a supportive mental health assistant. Respond in {language}. The conversation so far:\n{combined}"
    return f"You are a supportive mental health assistant. Respond in {language}. The user says: {user_input}"

# Emotion classifier
def detect_emotions(text):
    inputs = emo_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(emo_model.device)
    with torch.no_grad():
        logits = emo_model(**inputs).logits
        probs = torch.sigmoid(logits).cpu().numpy()[0]
    thresholded = [EMOTION_LABELS[i] for i, p in enumerate(probs) if p > 0.3]
    return thresholded if thresholded else ["neutral"]

# Voice input to text
def transcribe_audio(audio_file):
    recognizer = sr.Recognizer()
    audio = AudioSegment.from_file(audio_file)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
        audio.export(tmp.name, format="wav")
        with sr.AudioFile(tmp.name) as source:
            audio_data = recognizer.record(source)
            try:
                return recognizer.recognize_google(audio_data)
            except sr.UnknownValueError:
                return "[Unrecognized speech]"
            except sr.RequestError:
                return "[Speech recognition failed]"

# Generator with router
def generate_chatbot_response(user_text, audio_input, mode, language, use_history, history, route_by_emotion, persist):
    history = history or []
    user_input = user_text if mode == "text" else transcribe_audio(audio_input)
    emotions = detect_emotions(user_input)
    use_resp_model = any(e in emotion_router_labels for e in emotions) if route_by_emotion else False

    if use_resp_model:
        prompt = format_input_prompt(user_input, language, history if use_history else None)
        inputs = resp_tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(resp_model.device)
        model = resp_model
        tokenizer = resp_tokenizer
    else:
        prompt = "question: " + user_input
        inputs = qa_tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(qa_model.device)
        model = qa_model
        tokenizer = qa_tokenizer

    output_ids = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=64,
        num_beams=4,
        no_repeat_ngram_size=2,
        early_stopping=True
    )

    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    full_history = history + [f"User: {user_input}", f"Bot: {response}"]

    if persist:
        with open("chatlog.txt", "a", encoding="utf-8") as log:
            log.write(f"\n[{datetime.datetime.now()}]\n{full_history[-2]}\n{full_history[-1]}\nDetected emotions: {emotions}\n")

    return response, emotions, full_history

# Gradio interface
demo = gr.Interface(
    fn=generate_chatbot_response,
    inputs=[
        gr.Textbox(label="Type your message here (if using text mode)"),
        gr.Audio(type="filepath", label="Or speak here (if using voice mode)"),
        gr.Radio(["text", "voice"], value="text", label="Input Mode"),
        gr.Dropdown(choices=["English", "German", "Spanish", "French"], value="English", label="Response Language"),
        gr.Checkbox(label="Include chat history in response", value=True),
        gr.State(value=[]),
        gr.Checkbox(label="Route by detected emotion", value=True),
        gr.Checkbox(label="Save conversation to chatlog.txt", value=True)
    ],
    outputs=[
        gr.Textbox(label="Therapist Response"),
        gr.Textbox(label="Detected Emotions"),
        gr.State()
    ],
    title="Voice + Text Enabled Emotion-Aware Mental Health Chatbot",
    description="You can type or speak your message. Emotion-aware routing decides between Q&A and therapist-style support."
)

demo.launch()


loading file spiece.model
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading file chat_template.jinja
loading configuration file saved_models/t5_response_generator\config.json
Model config T5Config {
  "_name_or_path": "t5-small",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "classifier_dropout": 0.0,
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 512,
  "decoder_start_token_id": 0,
  "dense_act_fn": "relu",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": false,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 6,
  "num_heads": 8,
  "num_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
