In [1]:
import pandas as pd
import re
from openai import OpenAI
from tqdm.auto import tqdm
from elasticsearch import Elasticsearch

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()
ollama_api_key = os.getenv("OLLAMA_API_KEY")

In [3]:
client_ollama = OpenAI(base_url="http://localhost:11434/v1/", api_key=ollama_api_key)

In [4]:
from datasets import load_dataset

dataset = load_dataset("Amod/mental_health_counseling_conversations")

In [5]:
# Access the 'train' split of the dataset
train_dataset = dataset["train"]

# Convert to a Pandas DataFrame
df = pd.DataFrame(train_dataset)


# Define a cleaning function to apply to the context and response columns
def clean_text(text):
    # Convert text to lowercase
    text = text.lower()
    # Remove newline characters, non-breaking spaces, and extra spaces
    text = text.replace("\n", "").replace("\xa0", "")
    # Replace multiple spaces between sentences with a single space
    text = re.sub(r"\s+", " ", text)
    # Remove any special characters except comma, dot, parentheses, and space
    text = re.sub(r"[^a-z0-9.,() ]+", "", text)
    # Strip leading/trailing spaces
    text = text.strip()

    return text


# Apply the cleaning function to both the 'Context' and 'Response' columns
df["Context"] = df["Context"].apply(clean_text)
df["Response"] = df["Response"].apply(clean_text)

# Remove duplicates based on 'Context' and 'Response' columns
df = df.drop_duplicates(subset=["Context", "Response"]).reset_index(drop=True)

# Convert the DataFrame to a list of dictionaries
documents = df.to_dict(orient="records")

In [6]:
es_client = Elasticsearch("http://localhost:9200")

es_client.info()

ObjectApiResponse({'name': '4667e37c0da8', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'hNUOD6s_TvWzBpt-q9kmog', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [7]:
index_name = "conversations"

In [8]:
# To list all the indices

es_client.indices.get_alias()

ObjectApiResponse({'conversations2': {'aliases': {}}, 'conversations': {'aliases': {}}})

In [9]:
resp = es_client.indices.delete(index=index_name)

In [10]:
index_settings = {
    "settings": {"number_of_shards": 1, "number_of_replicas": 0},
    "mappings": {
        "properties": {
            "Context": {"type": "text"},
            "Response": {"type": "text"},
        }
    },
}

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'conversations'})

In [11]:
es_client.indices.get(index=index_name)

ObjectApiResponse({'conversations': {'aliases': {}, 'mappings': {'properties': {'Context': {'type': 'text'}, 'Response': {'type': 'text'}}}, 'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}}, 'number_of_shards': '1', 'provided_name': 'conversations', 'creation_date': '1739034636968', 'number_of_replicas': '0', 'uuid': 'kgChYQshQRuMhuLo-64Swg', 'version': {'created': '8040399'}}}}})

In [12]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/2076 [00:00<?, ?it/s]

In [13]:
def llm_ollama(prompt):
    response = client_ollama.chat.completions.create(
        model="llama3.2", messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content

In [14]:
def build_prompt_ollama(query, search_re):
    # Define the system-level instructions
    system_prompt = """
You are a highly empathetic and supportive mental health counseling assistant. Your task is to assist the PATIENT by providing detailed, thoughtful, and compassionate responses solely based on the CONTEXT from the Mental Health Counseling Conversations database.

### Guidelines:
1. Always use information explicitly found in the CONTEXT when responding to the PATIENT.
   - Even if the CONTEXT isn't directly aligned, synthesize any insights that might be relevant or helpful to the patient's situation.
2. If the CONTEXT contains multiple examples, leverage as many as necessary to craft a comprehensive, empathetic response.
3. Write your response as a single, cohesive paragraph in the first-person singular perspective (e.g., "I understand that...").
4. Maintain a supportive and conversational tone — be understanding, empathetic, and encouraging.
5. Provide thoughtful, comprehensive answers. Avoid short or superficial responses.  
6. Use the response "I'm sorry, I don't have enough information to answer that right now." ONLY if the CONTEXT truly lacks sufficient information.
   - If the CONTEXT contains any helpful insights, you must provide a response.

### Example Response Format:
Response: I understand that suicidal thoughts can be incredibly overwhelming, and I'm truly sorry you're going through this. Based on similar situations in the database, seeking support from a trusted mental health professional can be very important during these difficult moments. Talking to someone you trust, such as a friend or family member, might also help reduce some of the pressure you're feeling. You don't have to face this alone, and there are people who care and can help you.
    """.strip()

    # Build the context content dynamically
    context = "\n".join(
        f"Database Patient: {doc.get('_source', {}).get('Context', 'N/A')}\n"
        f"Database Response: {doc.get('_source', {}).get('Response', 'N/A')}"
        for doc in search_re
    )

    # Define user-specific content
    user_prompt = f"""
PATIENT: {query}

CONTEXT from Mental Health Counseling Conversations database:
{context}
""".strip()

    return f"<|start_header_id|>system<|end_header_id|>\n{system_prompt}\n<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\n{user_prompt}\n<|eot_id|>"

In [15]:
# rag_ollama function with elasticsearch


def rag_ollama(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["Context^2", "Response"],
                        "type": "most_fields",
                    }
                }
            }
        },
    }

    response = es_client.search(index=index_name, body=search_query)
    search_re = response["hits"]["hits"]

    prompt = build_prompt_ollama(query, search_re)
    answer = llm_ollama(prompt)
    print(prompt)
    print("")
    print("-" * 40)
    print("")
    return answer

In [16]:
# To run the rag_ollama function

query = "I moved to a new city and I'm feeling really lonely. I don't know how to make friends here."
print(rag_ollama(query))

<|start_header_id|>system<|end_header_id|>
You are a highly empathetic and supportive mental health counseling assistant. Your task is to assist the PATIENT by providing detailed, thoughtful, and compassionate responses solely based on the CONTEXT from the Mental Health Counseling Conversations database.

### Guidelines:
1. Always use information explicitly found in the CONTEXT when responding to the PATIENT.
   - Even if the CONTEXT isn't directly aligned, synthesize any insights that might be relevant or helpful to the patient's situation.
2. If the CONTEXT contains multiple examples, leverage as many as necessary to craft a comprehensive, empathetic response.
3. Write your response as a single, cohesive paragraph in the first-person singular perspective (e.g., "I understand that...").
4. Maintain a supportive and conversational tone — be understanding, empathetic, and encouraging.
5. Provide thoughtful, comprehensive answers. Avoid short or superficial responses.  
6. Use the respon