In [1]:
import pandas as pd
from openai import OpenAI
from tqdm.auto import tqdm
from elasticsearch import Elasticsearch

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()
ollama_api_key = os.getenv("OLLAMA_API_KEY")
client_ollama = OpenAI(base_url="http://localhost:11434/v1/", api_key=ollama_api_key)

In [3]:
from datasets import load_dataset

dataset = load_dataset("Amod/mental_health_counseling_conversations")

In [4]:
# Access the 'train' split of the dataset
train_dataset = dataset["train"]

# Convert to a Pandas DataFrame
df = pd.DataFrame(train_dataset)

# Remove duplicates based on 'Context' and 'Response' columns
df = df.drop_duplicates(subset=["Context", "Response"]).reset_index(drop=True)

# Convert the DataFrame to a list of dictionaries
documents = df.to_dict(orient="records")

In [5]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-mpnet-base-v2")

2025-02-10 15:00:13.581892: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-10 15:00:13.581978: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-10 15:00:13.582047: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-10 15:00:13.590118: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
es_client = Elasticsearch("http://localhost:9200")

es_client.info()

ObjectApiResponse({'name': 'a6093b0b66f0', 'cluster_name': 'docker-cluster', 'cluster_uuid': '-sAtmUKZTz6ltxy-y4uXGw', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [7]:
# To create the dense vector using the pre-trained Sentence Transformer model
operations = []

for doc in tqdm(documents):
    context = doc["Context"]
    response = doc["Response"]
    context_embedding = model.encode(context)
    response_embedding = model.encode(response)

    operations.append(
        {
            "Context": context,
            "Response": response,
            "Context_embedding": context_embedding.tolist(),
            "Response_embedding": response_embedding.tolist()
        }
    )

  0%|          | 0/2752 [00:00<?, ?it/s]

In [8]:
es_client.indices.delete(index="conversations", ignore_unavailable=True)

ObjectApiResponse({'acknowledged': True})

In [9]:
index_name = "conversations"

if es_client.indices.exists(index=index_name):
    print(f"Index '{index_name}' already exists.")
else:
    # Define the index mapping
    index_mapping = {
        "settings": {"number_of_shards": 1, "number_of_replicas": 0},
        "mappings": {
            "properties": {
                "Context": {"type": "text"},
                "Response": {"type": "text"},
                "Context_embedding": {"type": "dense_vector", "dims": 768, "index": True, "similarity": "cosine"},
                "Cesponse_embedding": {"type": "dense_vector", "dims": 768, "index": True, "similarity": "cosine"}
            }
        },
    }

    # Create the index with the mapping
    es_client.indices.create(index=index_name, body=index_mapping)

In [10]:
for doc in tqdm(operations):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/2752 [00:00<?, ?it/s]

In [11]:
def llm_ollama(prompt):
    response = client_ollama.chat.completions.create(
        model="llama3.2", messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content

In [22]:
def build_prompt_ollama(query, search_re):
    # Define the system-level instructions
    system_prompt = """
You are a highly empathetic and supportive mental health counseling assistant. Your task is to assist the PATIENT by providing detailed, thoughtful, and compassionate responses solely based on the CONTEXT from the Mental Health Counseling Conversations database.

### Guidelines:
1. Always use information explicitly found in the CONTEXT when responding to the PATIENT.
   - Even if the CONTEXT isn't directly aligned, synthesize any insights that might be relevant or helpful to the patient's situation.
2. If the CONTEXT contains multiple examples, leverage as many as necessary to craft a comprehensive, empathetic response.
3. Write your response as a single, cohesive paragraph in the first-person singular perspective (e.g., "I understand that...") and answer like a human being.
4. Maintain a supportive and conversational tone — be understanding, empathetic, and encouraging.
5. Provide thoughtful, comprehensive answers. Avoid short or superficial responses.  
6. Use the response "I'm sorry, I don't have enough information to answer that right now." ONLY if the CONTEXT truly lacks sufficient information.
   - If the CONTEXT contains any helpful insights, you must provide a response.

### Example Response Format:
Response: I understand that suicidal thoughts can be incredibly overwhelming, and I'm truly sorry you're going through this. Based on similar situations in the database, seeking support from a trusted mental health professional can be very important during these difficult moments. Talking to someone you trust, such as a friend or family member, might also help reduce some of the pressure you're feeling. You don't have to face this alone, and there are people who care and can help you.
    """.strip()

    # Build the context content dynamically
    context = "\n".join(
        f"Database Patient: {doc.get('_source', {}).get('Context', 'N/A')}\n"
        f"Database Response: {doc.get('_source', {}).get('Response', 'N/A')}"
        for doc in search_re["hits"]["hits"]
    )

    # Define user-specific content
    user_prompt = f"""
PATIENT: {query}

CONTEXT from Mental Health Counseling Conversations database:
{context}
""".strip()

    return f"<|start_header_id|>system<|end_header_id|>\n{system_prompt}\n<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\n{user_prompt}\n<|eot_id|>"

In [26]:
# rag_ollama function with elasticsearch

def rag_ollama(query):

    vector_search_term = model.encode(query)

    search_query = {
        "field": "Context_embedding",
        "query_vector": vector_search_term,
        "k": 5,
        "num_candidates": 10000,
    }

    search_re = es_client.search(
        index=index_name, knn=search_query, source=["Context", "Response"]
    )
    search_re["hits"]["hits"]

    prompt = build_prompt_ollama(query, search_re)
    answer = llm_ollama(prompt)
    print(prompt)
    print("")
    print("-" * 40)
    print("")
    return answer

In [27]:
# To run the rag_ollama function

query = "I moved to a new city and I'm feeling really lonely. I don't know how to make friends here."
print(rag_ollama(query))

InternalServerError: Error code: 500 - {'error': {'message': 'model requires more system memory (3.4 GiB) than is available (2.7 GiB)', 'type': 'api_error', 'param': None, 'code': None}}