# Mental Health Chatbot

## Goal


## RAG

### Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import os
import sys
print(sys.executable)
import wandb
import gradio as gr
import psycopg2
from dotenv import load_dotenv
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from rank_bm25 import BM25Okapi
from openai import OpenAI
load_dotenv()
NEON_PG_CONNECTION_URL = os.environ['NEON_PG_CONNECTION_URL']

### Data Pre-Processing & Insert to DB

In [None]:
def preprocess_data():
    ds1 = load_dataset("Amod/mental_health_counseling_conversations")
    ds2 = load_dataset("mpingale/mental-health-chat-dataset")

    # Rename columns "Context": "Question", "Response": "Answer" of ds1
    ds1 = ds1.rename_column("Context", "Question")
    ds1 = ds1.rename_column("Response", "Answer")
    ds2 = ds2.remove_columns(["questionID", "questionTitle", "questionLink", "topic", "therapistInfo", "therapistURL", "upvotes", "views", "text"])
    ds2 = ds2.rename_column("questionText", "Question")
    ds2 = ds2.rename_column("answerText", "Answer")

    # Convert to pandas DataFrame
    df1 = ds1['train'].to_pandas()
    df2 = ds2['train'].to_pandas()

    # Drop duplicates & NAs
    df1 = df1.drop_duplicates(subset=["Question", "Answer"]).dropna(subset=["Question", "Answer"])
    df2 = df2.drop_duplicates(subset=["Question", "Answer"]).dropna(subset=["Question", "Answer"])

    # Combine datasets
    combined_df = pd.concat([df1, df2])

    questions = combined_df['Question'].tolist()
    answers = combined_df['Answer'].tolist()

    return questions, answers

# Connect to the database
try:
    connection = psycopg2.connect(NEON_PG_CONNECTION_URL)
    connection.autocommit = True
    print("Connected to Neon Postgres!")
except Exception as e:
    print("Cannot connect to Neon Postgres:", e)

cursor = connection.cursor()

questions, answers = preprocess_data()
# Vectorization
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
question_vectors = model.encode(questions)
cursor.execute("""
    CREATE TABLE IF NOT EXISTS mental_health_qa (
        id SERIAL PRIMARY KEY,
        question TEXT,
        answer TEXT,
        vector FLOAT8[]
    )
""")

# Insert the data into the database
for i in range(len(questions)):
    cursor.execute("""
        INSERT INTO mental_health_qa (question, answer, vector)
        VALUES (%s, %s, %s)
    """, (questions[i], answers[i], question_vectors[i].tolist()))
connection.close()

### Retrieval Model

In [None]:
# Function to query the database and augment responses
def query_and_augment(user_query):
    # Connect to the database
    connection = psycopg2.connect(NEON_PG_CONNECTION_URL)
    cursor = connection.cursor()

    cursor.execute("SELECT question, answer FROM mental_health_qa")
    records = cursor.fetchall()

    questions = [record[0] for record in records]
    answers = [record[1] for record in records]

    # Close the connection
    connection.close()
    
    # Implement BM25 to find the best match
    tokenized_questions = [q.split() for q in questions]
    bm25 = BM25Okapi(tokenized_questions)
    best_match_index = bm25.get_top_n(user_query.split(), questions, n=1)[0]
    # print the top 5 matches
    print(bm25.get_top_n(user_query.split(), questions, n=5))
    best_answer = answers[questions.index(best_match_index)]

    # Generate augmented answer using the pipeline
    # prompt = f"User: {user_query}\n\n\nLimit your knowledge to these related questions only:\n\nBot: {best_answer}\nBot (improved):"
    prompt = f"User: {user_query}\n\n\nImprove the response from the database:\n\nBot: {best_answer}\nBot (improved):"
    print(f"Prompt: {prompt}")
    client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
    response = client.chat.completions.create(
        messages=[
            {"role": "user", "content": "Answer the user_query based on the best answer.\n\n" + prompt},
            {"role": "assistant", "content": best_answer}
        ],
        model="gpt-4o",
        temperature=0.1,
    )
    return response.choices[0].message.content

# answer = query_and_augment("My grandson's step-mother sends him to school with a pink Barbie backpack as a form of punishment.")
answer = query_and_augment("I'm feeling depressed")
print(answer)
# Define Gradio Interface
# def chatbot_interface(user_query):
#     return query_and_augment(user_query)

# interface = gr.Interface(fn=chatbot_interface, inputs="text", outputs="text", title="Mental Health Chatbot")
# interface.launch()

# Fine-tuning Approach

In [73]:
from datasets import load_dataset, load_metric
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorWithPadding
from transformers.integrations import WandbCallback
from transformers.trainer_callback import EarlyStoppingCallback
import wandb
import gradio as gr
import torch
import os

# Set the environment variable to adjust memory allocation limits
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"

# os.environ["WANDB_NOTEBOOK_NAME"] = "main.ipynb"

# Initialize Weights and Biases
wandb.init(project="mental-health-chatbot")

# Check if CUDA is available (optional, more relevant for GPU setup)
device = torch.device("cuda" if torch.cuda.is_available() else "mps")

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2084592145015106, max=1.0…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011167688433650054, max=1.0…

### Data Preparation

In [69]:
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")

# Ensure pad token is set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load the dataset and limit to 10,000 samples
dataset = load_dataset('Estwld/empathetic_dialogues_llm', split='train[:10000]')
print(dataset[0])

# Tokenize the dataset
def tokenize_function(examples):
    concatenated_texts = [
        f"Emotion: {emotion} Situation: {situation} " + " ".join([conv['content'] for conv in conversations])
        for emotion, situation, conversations in zip(examples['emotion'], examples['situation'], examples['conversations'])
    ]
    tokenized_inputs = tokenizer(concatenated_texts, padding="max_length", truncation=True, max_length=512)
    tokenized_inputs["labels"] = tokenized_inputs["input_ids"].copy()
    return tokenized_inputs

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Split the dataset into training, validation, and test sets
train_val_dataset = tokenized_datasets.train_test_split(test_size=0.1)
train_dataset = train_val_dataset['train']
val_dataset = train_val_dataset['test']

train_val_split = train_dataset.train_test_split(test_size=0.1)
train_dataset = train_val_split['train']
val_dataset = train_val_split['test']
test_dataset = val_dataset.train_test_split(test_size=0.5)['test']

{'conv_id': 'hit:0_conv:1', 'situation': 'I remember going to the fireworks with my best friend. There was a lot of people, but it only felt like us in the world.', 'emotion': 'sentimental', 'conversations': [{'content': 'I remember going to see the fireworks with my best friend. It was the first time we ever spent time alone together. Although there was a lot of people, we felt like the only people in the world.', 'role': 'user'}, {'content': 'Was this a friend you were in love with, or just a best friend?', 'role': 'assistant'}, {'content': 'This was a best friend. I miss her.', 'role': 'user'}, {'content': 'Where has she gone?', 'role': 'assistant'}, {'content': 'We no longer talk.', 'role': 'user'}, {'content': 'Oh was this something that happened because of an argument?', 'role': 'assistant'}]}


Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

### Model Setup

In [74]:
# Define data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    load_best_model_at_end=True,
    logging_dir="./logs",
    logging_steps=10,
    bf16=True,  # Mixed precision training
    gradient_accumulation_steps=16,  # Gradient accumulation
    report_to="wandb",
    run_name="phi-2-finetuning"
)

# Define metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    accuracy = load_metric("accuracy").compute(predictions=predictions, references=labels)
    bleu = load_metric("bleu").compute(predictions=predictions, references=labels)
    perplexity = load_metric("perplexity").compute(predictions=predictions, references=labels)
    f1 = load_metric("f1").compute(predictions=predictions, references=labels)
    return {"accuracy": accuracy, "bleu": bleu, "perplexity": perplexity, "f1": f1}

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

# Define prediction function
def predict(input_text):
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

### Training & Evaluation

In [75]:
# Clear the cache
torch.mps.empty_cache()

# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate(test_dataset)
print(eval_results)

  0%|          | 0/759 [00:00<?, ?it/s]

{'loss': 6.699, 'grad_norm': 7.526485919952393, 'learning_rate': 4.9341238471673256e-05, 'epoch': 0.04}
{'loss': 5.2732, 'grad_norm': 14.982749938964844, 'learning_rate': 4.868247694334651e-05, 'epoch': 0.08}
{'loss': 3.0443, 'grad_norm': 13.900446891784668, 'learning_rate': 4.8023715415019764e-05, 'epoch': 0.12}
{'loss': 1.2564, 'grad_norm': 1.8573697805404663, 'learning_rate': 4.736495388669302e-05, 'epoch': 0.16}
{'loss': 0.7184, 'grad_norm': 0.38899242877960205, 'learning_rate': 4.670619235836627e-05, 'epoch': 0.2}
{'loss': 0.6355, 'grad_norm': 0.24211318790912628, 'learning_rate': 4.6047430830039526e-05, 'epoch': 0.24}
{'loss': 0.5985, 'grad_norm': 0.17091038823127747, 'learning_rate': 4.538866930171278e-05, 'epoch': 0.28}
{'loss': 0.5644, 'grad_norm': 0.15374329686164856, 'learning_rate': 4.472990777338604e-05, 'epoch': 0.32}


### Gradio

In [64]:
# Create Gradio interface
iface = gr.Interface(
    fn=predict,
    inputs="text",
    outputs="text",
    title="Phi-2 Fine-Tuned Model",
    description="Enter text to get predictions from the fine-tuned Phi-2 model."
)

# Launch the interface
iface.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


