## Mounting Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


## Installations

In [41]:
!pip install -q -r /content/drive/MyDrive/mental-health-assisstant/requirements.txt

# Imports

In [40]:
import threading
import json
import os
import shutil
from typing import List, Dict
import torch
from langchain import PromptTemplate, LLMChain
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
from langchain_together import Together
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModelForSequenceClassification, pipeline, BitsAndBytesConfig
from peft import PeftModel

# Sentiment Inference

In [26]:
sentiment_model_id = "i5-8300h/bert-base-emotion-07july"
tokenizer = AutoTokenizer.from_pretrained(sentiment_model_id)
model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_id)

emotion_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

Device set to use cuda:0


In [27]:
# Sentiment Inference Testing

user_query = "I am feeling very bad"
result = emotion_classifier(user_query)

print(result)

[{'label': 'overwhelmed', 'score': 0.6095592379570007}]


# Global Constants

In [28]:
root_dir = "/content/drive/MyDrive/mental-health-assisstant"
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Creating Vector Databases persistent in Google Drive
### If run when the vector database already exists in the drive folder, then does nothing

In [29]:
import os
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

google_drive_root = os.path.join(root_dir, "RAG_texts")

splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

for topic in os.listdir(google_drive_root):
    topic_path = os.path.join(google_drive_root, topic)
    if not os.path.isdir(topic_path):
        continue

    persist_path = os.path.join(topic_path, "chroma_db")
    if (os.path.exists(persist_path)):
        continue

    print(f"\nProcessing topic: {topic}")

    docs = []

    for filename in os.listdir(topic_path):
        filepath = os.path.join(topic_path, filename)

        if filename.endswith(".txt"):
            try:
                loader = TextLoader(filepath)
                text_docs = loader.load()
                docs.extend(text_docs)
                print(f"Loaded {len(text_docs)} text chunks from {filename}")
            except Exception as e:
                print(f"Failed to load TXT {filename}: {e}")

        elif filename.endswith(".pdf"):
            try:
                loader = PyPDFLoader(filepath)
                pdf_docs = loader.load()
                docs.extend(pdf_docs)
                print(f"Loaded {len(pdf_docs)} PDF chunks from {filename}")
            except Exception as e:
                print(f"Failed to load PDF {filename}: {e}")

    if not docs:
        print(f"No documents found in {topic_path}, skipping.")
        continue

    docs = splitter.split_documents(docs)

    os.makedirs(persist_path, exist_ok=True)

    db = Chroma.from_documents(
        documents=docs,
        embedding=embedding,
        persist_directory=persist_path
    )
    db.persist()
    print(f"DB content: {db._collection.count()} vectors saved.")

    print(f"Saved vector DB to: {persist_path}")


# Set-Up

## RAG Domain Topics

In [30]:
topics = ["substance-misuse", "anxiety-depression", "schizophrenia", "obcessive-compulsive-disorder", "post-traumatic-stress-disorder", "suicide-self-harm", "grief-loss", "anger", "relationships", "self-esteem"]

## Vector Stores

In [31]:
texts_dir = os.path.join(root_dir, "RAG_texts")

chroma_stores = {}

for topic in os.listdir(texts_dir):
    persist_path = os.path.join(texts_dir, topic, "chroma_db")
    if os.path.isdir(persist_path):
        store = Chroma(
            persist_directory=persist_path,
            embedding_function=embedding
        )
        chroma_stores[topic] = store

for topic, store in chroma_stores.items():
    print(f"{topic}: {store._collection.count()} vectors")

substance-misuse: 0 vectors
anxiety-depression: 0 vectors
schizophrenia: 0 vectors
obcessive-compulsive-disorder: 135 vectors
post-traumatic-stress-disorder: 35 vectors
suicide-self-harm: 24 vectors
grief-loss: 140 vectors
anger: 112 vectors
relationships: 28 vectors
self-esteem: 24 vectors


## LLMs

In [33]:
# Run if model requires hugging face login and read token
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

### Flan-t5-large (Categories and Keywords)

In [34]:
model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

hf_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=20,
)

classification_llm = HuggingFacePipeline(pipeline=hf_pipeline)

Device set to use cuda:0


# Prompt Engineering

## Few Shot Prompt for Category Selection

In [35]:
category_prompt = PromptTemplate.from_template("""
I want you to determine which of the following categories of knowledge are required for a mental health agent to respond to the users message appropriately.
Pick categories only from the below list of possible categories:
[substance-misuse, anxiety-depression, schizophrenia, obcessive-compulsive-disorder, post-traumatic-stress-disorder, suicide-self-harm, grief-loss, anger, relationships, self-esteem]

Pick any number of relevant categories from the above list. Be generous with the selections. Select as many as required.

Examples:

User message: I am addicted to alcohol. I feel like I can never overcome this addiction.
Output: substance-misuse

User message: I am obcessive over perfectionism and it never lets me complete any task given to me.
Output: obcessive-compulsive-disorder

User message: I have a lost a loved one, and I fear I might harm myself because of it.
Output: grief-loss suicide-self-harm

User message: I see hallucination of my past self, and I cannot focus on the present. This is causing me to feel anxious.
Output: schizophrenia anxiety-depression

User message: {query}
Output:
""")

category_chain = category_prompt | classification_llm

## Zero Shot Prompt for Keyword Extraction

In [36]:
keyword_prompt = PromptTemplate.from_template("""
Extract keywords related to mental health from the given user prompt.
user prompt: {query}
keywords:
""")

keyword_chain = keyword_prompt | classification_llm

# PEFT Mistral-7B-Instruct-v0.2

## Loading Base Model

In [38]:
!pip install -U bitsandbytes



In [42]:
base_model_id = "mistralai/Mistral-7B-Instruct-v0.2"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
    use_auth_token = True,
)

tokenizer = AutoTokenizer.from_pretrained(base_model_id)

ImportError: Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`

## loading the Lora-Adapter Weights into the Model

In [None]:
lora_adapter_weights_path = os.path.join(root_dir, "lora_adapter_checkpoints/checkpoint-222")
model = PeftModel.from_pretrained(model, lora_adapter_weights_path)

## Creating a Chain

In [None]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto"
)

response_llm = HuggingFacePipeline(pipeline=pipe)

## Structured System Prompt for Final Response

In [None]:
final_prompt = PromptTemplate.from_template("""
INSTRUCTION:
You are a mental health assistant.
Provide an informative and compassionate reply.

CONTEXT:
Use the below domain specific knowledge to respond to the user query.
{context}

INFERENCE ON:
Emotion: {sentiment}\nUser: {user_query}\nBot:
""")

synthesis_chain = final_prompt | response_llm

## Final Response Testing

In [None]:
print(synthesis_chain.invoke({"emotion": "sad", "user_input": "I feel like crying"}))

# RAG Pipeline

In [None]:
def rag_pipeline(user_query: str):
    categories = category_chain.invoke({"query": user_query}).split()
    # print(categories)
    keywords = list(set(keyword_chain.invoke({"query": user_query}).split()))
    # print(keywords)

    retrieved_chunks = {}
    lock = threading.Lock()

    def count_keyword_matches(text, keywords):
        return sum(1 for kw in keywords if kw.lower() in text.lower())

    def retrieve_for_topic(topic):
        db = chroma_stores[topic]
        if db is not None:
            results = db.similarity_search(user_query, k=10)
            reranked = sorted(
                results,
                key=lambda chunk: count_keyword_matches(chunk.page_content, keywords),
                reverse=True
            )
            with lock:
                retrieved_chunks[topic] = reranked

    threads = []

    for topic in categories:
        thread = threading.Thread(target=retrieve_for_topic, args=(topic,))
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

    combined_context = ""
    for topic, snippets in retrieved_chunks.items():
        combined_context += f"{topic.upper()}:\n"
        for snippet in snippets:
            combined_context += snippet.page_content + "\n\n"
    # print(combined_context)

    return combined_context

# RAG Testing

In [None]:
# Remove commented print statement from rag_pipeline() for testing
query = "I am overly obsessive about things"
print("\nOutput:", rag_pipeline(query))

['obcessive-compulsive-disorder']
['obsessive']
OBCESSIVE-COMPULSIVE-DISORDER:
Obsessive- 
Compulsive 
Disorder: 
When Unwanted Thoughts or 
Repetitive Behaviors Take Over

or blasphemy. 
• Excessive concern with  
right/wrong or morality.
Other Obsessions 
• Concern with getting a physical 
illness or disease (not by 
contamination e.g., cancer)
• Superstitious ideas about lucky/
unlucky numbers, certain colors
*   Reprinted with permission by New Harbinger Publications, Inc. This is an adaptation of the OC 
Checklist which appears in S. Wilhelm and G. S. Steketee’s, “Cognitive Therapy for Obsessive-

in significant clutter in the home  
(also called hoarding)
• Putting things in order  
or arranging things until it  
"feels right" 
• Telling, asking, or confessing  
to get reassurance
• Avoiding situations that might 
trigger your obsessions
*  Reprinted with permission by New Harbinger Publications, Inc. This is an adaptation of the OC 
Checklist which appears in S. Wilhelm and G. S

# FINAL RESPONSE RESULT

In [None]:
USER_QUERY = "I am feeling nervous. I don't feel safe."

# Sentiment Analysis of User Query
sentiment = emotion_classifier(USER_QUERY)

# Calling the Rag Pipeline
context = rag_pipeline(USER_QUERY)

# Fine-tuned LLM Response
response = synthesis_chain.invoke({"context": context, "sentiment": sentiment,"user_query": USER_QUERY})

# Display the final response
print(response)