# Mental Health Chatbot

## Goal


## Imports

In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import os
import wandb
import gradio as gr
import psycopg2
from dotenv import load_dotenv
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from rank_bm25 import BM25Okapi
from openai import OpenAI
load_dotenv()
NEON_PG_CONNECTION_URL = os.environ['NEON_PG_CONNECTION_URL']

## Data Pre-Processing & Insert to DB

In [15]:
def preprocess_data():
    ds1 = load_dataset("Amod/mental_health_counseling_conversations")
    ds2 = load_dataset("mpingale/mental-health-chat-dataset")

    # Rename columns "Context": "Question", "Response": "Answer" of ds1
    ds1 = ds1.rename_column("Context", "Question")
    ds1 = ds1.rename_column("Response", "Answer")
    ds2 = ds2.remove_columns(["questionID", "questionTitle", "questionLink", "topic", "therapistInfo", "therapistURL", "upvotes", "views", "text"])
    ds2 = ds2.rename_column("questionText", "Question")
    ds2 = ds2.rename_column("answerText", "Answer")

    # Convert to pandas DataFrame
    df1 = ds1['train'].to_pandas()
    df2 = ds2['train'].to_pandas()

    # Drop duplicates & NAs
    df1 = df1.drop_duplicates(subset=["Question", "Answer"]).dropna(subset=["Question", "Answer"])
    df2 = df2.drop_duplicates(subset=["Question", "Answer"]).dropna(subset=["Question", "Answer"])

    # Combine datasets
    combined_df = pd.concat([df1, df2])

    questions = combined_df['Question'].tolist()
    answers = combined_df['Answer'].tolist()

    return questions, answers

# Connect to the database
try:
    connection = psycopg2.connect(NEON_PG_CONNECTION_URL)
    connection.autocommit = True
    print("Connected to Neon Postgres!")
except Exception as e:
    print("Cannot connect to Neon Postgres:", e)

cursor = connection.cursor()

questions, answers = preprocess_data()
# Vectorization
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
question_vectors = model.encode(questions)
cursor.execute("""
    CREATE TABLE IF NOT EXISTS mental_health_qa (
        id SERIAL PRIMARY KEY,
        question TEXT,
        answer TEXT,
        vector FLOAT8[]
    )
""")

# Insert the data into the database
for i in range(len(questions)):
    cursor.execute("""
        INSERT INTO mental_health_qa (question, answer, vector)
        VALUES (%s, %s, %s)
    """, (questions[i], answers[i], question_vectors[i].tolist()))
connection.close()

Connected to Neon Postgres!




## Retrieval Model

In [10]:
# Function to query the database and augment responses
def query_and_augment(user_query):
    # Connect to the database
    connection = psycopg2.connect(NEON_PG_CONNECTION_URL)
    cursor = connection.cursor()

    cursor.execute("SELECT question, answer FROM mental_health_qa")
    records = cursor.fetchall()

    questions = [record[0] for record in records]
    answers = [record[1] for record in records]

    # Close the connection
    connection.close()

    # Implement BM25 to find the best match
    tokenized_questions = [q.split() for q in questions]
    bm25 = BM25Okapi(tokenized_questions)
    best_match_index = bm25.get_top_n(user_query.split(), questions, n=1)[0]
    best_answer = answers[questions.index(best_match_index)]

    # Generate augmented answer using the pipeline
    prompt = f"User: {user_query}\n\n\nLimit your knowledge to these related questions only:\n\nBot: {best_answer}\nBot (improved):"
    print(f"Prompt: {prompt}")
    client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "user", "content": "Answer the user_query based on the best answer"},
            {"role": "assistant", "content": best_answer}
        ],
        model="gpt-4o",
    )
    return chat_completion.choices[0].message.content

# Define Gradio Interface
def chatbot_interface(user_query):
    return query_and_augment(user_query)

interface = gr.Interface(fn=chatbot_interface, inputs="text", outputs="text", title="Mental Health Chatbot")
interface.launch()

Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.




Prompt: User: what's depression?


Limit your knowledge to these related questions only:

Bot: Probably.  I always tell the new parents I work with (and this is just as true for dads as it is for moms) that if you are feeling off, seek help.  Postpartum Support International (www.postpartum.net) is a great place to start finding resources and locating support.
Bot (improved):


In [None]:
# # Initialize the text generation pipeline
# generator = pipeline("text-generation", model="Llama-3-8B-instruct")

# # Function to query the database and augment responses
# def query_and_augment(user_query):
#     # Connect to the database
#     connection = psycopg2.connect(NEON_PG_CONNECTION_URL)
#     cursor = connection.cursor()

#     cursor.execute("SELECT question, answer FROM mental_health_chatbot")
#     records = cursor.fetchall()

#     questions = [record[0] for record in records]
#     answers = [record[1] for record in records]

#     # Close the connection
#     connection.close()

#     # Implement BM25 to find the best match
#     tokenized_questions = [q.split() for q in questions]
#     bm25 = BM25Okapi(tokenized_questions)
#     best_match_index = bm25.get_top_n(user_query.split(), questions, n=1)[0]
#     best_answer = answers[questions.index(best_match_index)]

#     # Generate augmented answer using the pipeline
#     prompt = f"User: {user_query}\nBot: {best_answer}\nBot (improved):"
#     augmented_answer = generator(prompt, max_new_tokens=100, return_full_text=False)[0]['generated_text']

#     return augmented_answer

# # Define Gradio Interface
# def chatbot_interface(user_query):
#     return query_and_augment(user_query)

# interface = gr.Interface(fn=chatbot_interface, inputs="text", outputs="text", title="Mental Health Chatbot")
# interface.launch()