In [16]:
from langchain_groq import ChatGroq
llm = ChatGroq(
    temperature = 0,
    groq_api_key = "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",
    model_name = "llama-3.3-70b-versatile"
)
result = llm.invoke("Who is Donald Trump?")
print(result.content)

Donald Trump is the 45th President of the United States, serving from 2017 to 2021. He is a businessman, real estate developer, and television personality who was born on June 14, 1946, in Queens, New York.

Before entering politics, Trump made a name for himself in the business world, building a real estate empire and creating a brand that became synonymous with luxury and success. He developed numerous high-end properties, including hotels, casinos, and golf courses, and wrote several bestselling books on business and entrepreneurship.

Trump's entry into politics began in 2015, when he announced his candidacy for the Republican presidential nomination. Despite being a newcomer to politics, Trump's unconventional style and populist message resonated with many voters, and he won the Republican nomination in 2016.

In the general election, Trump faced off against Democratic nominee Hillary Clinton, and his campaign was marked by controversy and polarization. Despite being a significant

In [17]:
import pandas as pd

from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain_chroma import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from collections import defaultdict



In [2]:
import os

def create_vector_db(file_path, persist_directory="./chroma_db", collection_name="qa_collection"):

    df = pd.read_csv(file_path)
    
    if "Context" not in df.columns or "Response" not in df.columns:
        raise ValueError("CSV file must contain 'Context' and 'Response' columns.")

    if not os.path.exists(persist_directory):
        os.makedirs(persist_directory)

    # Initialize embedding model
    embeddings = HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

    # Initialize ChromaDB collection
    vector_store = Chroma(
        collection_name=collection_name,  
        persist_directory=persist_directory,  
        embedding_function=embeddings
    )

    # Store questions with multiple responses
    qa_dict = defaultdict(list)

    for _, row in df.iterrows():
        question = str(row["Context"]).strip()
        answer = str(row["Response"]).strip()
        qa_dict[question].append(answer)

    # Convert questions and answers into LangChain Documents
    docs = []
    for question, answers in qa_dict.items():
        answers_str = "\n\n".join(answers)  # Combine multiple answers
        docs.append(Document(page_content=question, metadata={"answers": answers_str}))

    # Add all documents to Chroma
    vector_store.add_documents(docs)

    print(f"Stored {len(qa_dict)} unique questions in ChromaDB using LangChain!")

    return vector_store  # Return the vector store instance

# Example usage:
file_path = "/home/wenjinf/NLP_Proj/train.csv"
vector_db = create_vector_db(file_path)


  embeddings = HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')


Stored 995 unique questions in ChromaDB using LangChain!


In [18]:
def initialize_llm():
  llm = ChatGroq(
    temperature = 0,
    groq_api_key = "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",
    model_name = "llama-3.3-70b-versatile"
)
  return llm

In [19]:
import random

def query_chromadb(user_query, top_k=3):
    results = vector_db.similarity_search(user_query, k=top_k * 2)  # Get more than required

    seen_questions = set()
    unique_results = []

    for result in results:
        question = result.page_content
        if question not in seen_questions:
            seen_questions.add(question)
            # Split the string into a list of answers
            all_answers = result.metadata["answers"].split("\n\n")
            response = random.choice(all_answers)  # Randomly select an answer
            unique_results.append({"question": question, "response": response})
        
        if len(unique_results) == top_k:
            break

    return unique_results

# Example Query
user_question = "How can I deal with stress?"
retrieved_answers = query_chromadb(user_question, top_k=3)

# Display responses
for idx, qa in enumerate(retrieved_answers, 1):
    print(f"{idx}. Q: {qa['question']}\n   A: {qa['response']}\n")


1. Q: I need help knowing how to deal with stress. What can I do?
   A: Something different works for each of us.There are the outward answers of self-pampering and making your home and work environment as pleasant as possible.A deeper level way to decrease stress is through exercise or alternative practices like yoga or tai chi.If the stress is more deeply rooted than temporarily feeling irritated for a few days, then give yourself some time to reflect and clarify what the meaning of the stress is to you.Self-understanding and appreciating your efforts to know yourself may decrease stress because you'll be more focused and attentive to who you are.  This will influence you overall to make good decisions for yourself and these will naturally be ones which decrease stress as much as possible.

2. Q: I need help dealing with stress. How can I handle it all and feel less stressed out?
   A: Part of handling stress is making sure that your perception of the stress is accurate. Sometimes st

In [20]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

def setup_qa_chain(vector_db, llm):
    retriever = vector_db.as_retriever()

    def get_few_shot_examples(user_query, top_k=3):
        """Retrieve few-shot examples from ChromaDB."""
        examples = query_chromadb(user_query, top_k=top_k)
        formatted_examples = "\n\n".join(
            [f"Example {idx}:\nUser: {ex['question']}\nChatbot: {ex['response']}" for idx, ex in enumerate(examples, 1)]
        )
        return formatted_examples

    
    prompt_template = """You are a compassionate mental health chatbot. Here are some past similar counseling conversation:
    {context}

    From the above examples, provide advice to the following user question:
    User: {question}
    Chatbot: """

    PROMPT = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])  # ✅ 'question' not 'query'


    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        chain_type_kwargs={"prompt": PROMPT}
    )

    def qa_chain_invoke(user_query):
        """Fetch few-shot examples and run RetrievalQA with the correct input format."""
        few_shot_examples = get_few_shot_examples(user_query)
        return qa_chain.invoke({"query": user_query, "context": few_shot_examples})  # ✅ Use 'question' instead of 'query'

    return qa_chain_invoke  # Returns the function that dynamically injects few-shot examples


In [7]:
# Setup the QA chain
qa_chain = setup_qa_chain(vector_db, llm)

# Example Query from the User
user_question = "How can I cope with anxiety?"

# Retrieve relevant responses with few-shot learning
response = qa_chain(user_question)

# Print the AI-generated response
print(response['result'])

I'm so glad you reached out for support. Coping with anxiety can be challenging, but there are many strategies that can help. Based on what has worked for others, I'd like to suggest a few things that might be helpful for you.

Firstly, it's essential to acknowledge that anxiety is a common experience, and it's not a sign of weakness. Many people have found it helpful to have a support system, whether it's a trusted friend, family member, or a mental health professional.

Some people have found comfort in having an emotional support animal, which can provide a sense of calm and companionship. If you have a pet, spending time with them might help alleviate some of your anxiety.

In terms of practical strategies, you might find it helpful to start small. For example, if you're feeling anxious about going to stores, you could start by going with a trusted friend or family member and gradually work up to going alone. You could also try practicing relaxation techniques, such as deep breathi

In [45]:
import os
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceBgeEmbeddings

def main():
    print("Initializing Chatbot...")

    # Initialize LLM model (Ensure this function is defined elsewhere)
    llm = initialize_llm()

    db_path = "./chroma_db"

    # Check if ChromaDB exists, if not, create it
    if not os.path.exists(db_path):
        vector_db = create_vector_db()  # Ensure this function is defined elsewhere
    else:
        embeddings = HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
        vector_db = Chroma(persist_directory=db_path, embedding_function=embeddings)

    # Set up the QA chain and get the function that invokes responses
    qa_chain_invoke = setup_qa_chain(vector_db, llm)  

    # Interactive Chat Loop
    while True:
        query = input("\nHuman: ")
        if query.lower() == "exit":
            print("Chatbot: Take care of yourself. Goodbye!")
            break

        response = qa_chain_invoke(query) 
        print(f"Chatbot: {response['result']}") 

if __name__ == "__main__":
    main()


Initializing Chatbot...
Chatbot: It's lovely to meet you. Is there something on your mind that you'd like to talk about, or are you just looking for someone to chat with? I'm here to listen and offer support if you need it. How's your day been so far?
Chatbot: I'm so sorry to hear about the loss of your grandma. It's completely normal to feel empty and depressed after losing a loved one. Grieving is a process, and it's okay to take your time to work through your emotions.

Firstly, please know that you're not alone in this feeling. It's essential to allow yourself to feel the emotions that come with grief, rather than trying to suppress or deny them. Acknowledge your feelings, and give yourself permission to grieve.

Here are some suggestions that may help you cope with your emotions:

1. **Talk to someone**: Reach out to a trusted friend, family member, or mental health professional who can listen to you without judgment. Sharing your feelings and memories of your grandma can be incre

In [55]:
import gradio as gr

# Initialize the QA Chain
qa_chain_invoke = setup_qa_chain(vector_db, llm)

def chatbot_response(user_input, history=[]):
    """Handles user input and returns only the chatbot response as a string."""
    if not user_input.strip():
        return "Please provide a valid input"  
    
    response = qa_chain_invoke(user_input)

    
    chatbot_reply = response if isinstance(response, str) else response.get("result", "Sorry, I couldn't understand.")
    
    
    history.append((user_input, chatbot_reply))

    return chatbot_reply  


with gr.Blocks(theme="shivi/calm_seafoam") as app:
    gr.Markdown("# 🧠 Mental Health Chatbot 🤖")
    gr.Markdown("A compassionate chatbot designed to assist with mental well-being. Please note: For serious concerns, contact a professional.")

    chatbot = gr.ChatInterface(fn=chatbot_response, title="Mental Health Chatbot")

    gr.Markdown("This chatbot provides general support. For urgent issues, seek help from licensed professionals.")

app.launch(share=True)


Running on local URL:  http://127.0.0.1:7869


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Running on public URL: https://106a7a9b02c98ddff9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [21]:

import pickle
import re
import numpy as np
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from scipy.sparse import hstack

def load_model(model_name):
    model_filename = f"{model_name.replace(' ', '_').lower()}_model.pkl"
    with open(model_filename, "rb") as file:
        model = pickle.load(file)
    print(f"Loaded model: {model_filename}")
    return model

def load_label_mapping():
    """Load the label mapping from the pickle file."""
    with open("label_mapping.pkl", "rb") as file:
        label_mapping = pickle.load(file)
    print("Loaded label mapping from label_mapping.pkl")
    return {v: k for k, v in label_mapping.items()} 

with open("tfidf_vectorizer.pkl", "rb") as file:
    tfidf_vectorizer = pickle.load(file)

xgb_model = load_model("XGB")
label_mapping = load_label_mapping()

# # Load required resources
# nltk.download('punkt')
porter = PorterStemmer()

import re

from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import hstack


def clean_text(input_text):
    input_text = input_text.lower()
    # Remove web URLs
    input_text = re.sub(r"https?://\S+", "", input_text)

    # Remove markdown links of the format [text](url)
    input_text = re.sub(r"\[.*?\]\(.*?\)", "", input_text)

    input_text = re.sub(r'\[.*?\]', '', input_text)  # Remove text in square brackets

    # Remove user mentions (handles starting with '@')
    input_text = re.sub(r"@\w+", "", input_text)

    input_text = re.sub(r'\n', '', input_text)

    # Remove punctuation and special characters, keeping only words and spaces
    input_text = re.sub(r"[^\w\s]", "", input_text)

    return input_text.strip()



# Define preprocessing function
def preprocess_text(input_text, vectorizer):


    char_count = len(input_text)
    sentence_count = len(nltk.sent_tokenize(input_text))

    cleaned_text = clean_text(input_text)

    tokens = word_tokenize(cleaned_text)

    stemmed_tokens = ' '.join([porter.stem(word) for word in tokens])

    input_tfidf = vectorizer.transform([stemmed_tokens])

    input_numerical_features = np.array([[char_count, sentence_count]])

    input_features = hstack([input_tfidf, input_numerical_features])

    return input_features

user_text = "I feel like everyone dislikes me and excludes me, making me feel like a coward. Sometimes, it makes me want to kill myself."

processed_input = preprocess_text(user_text, tfidf_vectorizer)

# Make prediction using the loaded XGBoost model
predicted_label = xgb_model.predict(processed_input)[0]
print(predicted_label)
predicted_class = label_mapping[predicted_label]

print(f"Predicted Category: {predicted_class}")



Loaded model: xgb_model.pkl
Loaded label mapping from label_mapping.pkl
6
Predicted Category: Suicidal


  Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only
  machine. Consider using `save_model/load_model` instead. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.  Changing `tree_method` to `hist`.


In [24]:
# Initialize the QA Chain
import gradio as gr
qa_chain_invoke = setup_qa_chain(vector_db, llm)
first_interaction = True  # Track first round
user_mode = None  # Store user-selected mode

def chatbot_response(user_input, history=[]):
    """Handles user input step by step, guiding users through selection, then answering."""
    global first_interaction, user_mode

    if not user_input.strip():
        return "Please provide a valid input"

    if first_interaction:
        # First greeting and mode selection
        first_interaction = False  # Mark first round as completed
        return (
            "Hello my friend, how can I help you today? "
            "Typing the numeric number to proceed or 'exit' to end the conversation.\n"
            "1. Find examples of counseling sessions to get help on your questions.\n"
            "2. Get real-time advice online to solve your questions.\n"
            "3. Perdict likelihood of your health issue backened by trained ML model"
        )

    # Handle user selecting mode (Step 2)
    if user_mode is None:
        if user_input.strip() == "1":
            user_mode = 1
            return "Glad to help! What is your question?"
        elif user_input.strip() == "2":
            user_mode = 2
            return "Glad to help! What is your question?"
        elif user_input.strip() == "3":
            user_mode = 3
            return "Glad to help! What is your mental health state?"
        elif user_input.lower() == "exit":
            return "Chatbot: Take care of yourself. Goodbye!"
        else:
            return "Please select a valid option: 1, 2, 3 or exit"

    # Handle user's actual question (Step 3)
    if user_mode == 1:
        response_examples = query_chromadb(user_input, top_k=3)
        response_text = "\n\n".join(
            [f"Example {idx}:\nUser: {ex['question']}\nChatbot: {ex['response']}" for idx, ex in enumerate(response_examples, 1)]
        )
    elif user_mode == 2:
        response_text = qa_chain_invoke(user_input)
        response_text = response_text if isinstance(response_text, str) else response_text.get("result", "Sorry, I couldn't understand.")
    elif user_mode == 3:
        processed_input = preprocess_text(user_input, tfidf_vectorizer)
        # Make prediction using the loaded XGBoost model
        predicted_label = xgb_model.predict(processed_input)[0]

        predicted_class = label_mapping[predicted_label]

        response_text = f"You probably have {predicted_class} mental health issue!"
        

    # Reset mode for next round
    user_mode = None

    #Prompt for next round**
    next_prompt = (
        "\n\n"
        "Hope my answer above helps! Do you have more questions?\n"
        "Typing the numeric number to proceed or 'exit' to end the conversation.\n"
        "1. Find examples of counseling sessions to get help on your questions.\n"
        "2. Get real-time advice online to solve your questions.\n"
        "3. Perdict likelihood of your health issue backened by trained ML model"
    )

    return response_text + "\n\n" + next_prompt  # Response + next action prompt

css=""".gradio-container .avatar-container {height: 40px width: 40px !important;} #duplicate-button {margin: auto; color: white; background: #f1a139; border-radius: 100vh;}"""


with gr.Blocks(theme="shivi/calm_seafoam", css=css) as app:
    gr.Markdown("# 🧠 Mental Health Chatbot 🤖")
    gr.Markdown("A compassionate chatbot designed to assist with mental well-being.")

    chatbot = gr.ChatInterface(fn=chatbot_response, title="Mental Health Chatbot")


app.launch(share=True)

Running on local URL:  http://127.0.0.1:7862


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Running on public URL: https://2e7ba64916f0eb1f92.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


