In [None]:
import pandas as pd
from openai import OpenAI
from tqdm.auto import tqdm
from elasticsearch import Elasticsearch

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()
open_ai_api_key = os.getenv("OPEN_AI_API_KEY")
client = OpenAI(api_key=open_ai_api_key)

In [3]:
from datasets import load_dataset

dataset = load_dataset("Amod/mental_health_counseling_conversations")

In [12]:
# Access the 'train' split of the dataset
train_dataset = dataset["train"]

# Convert to a Pandas DataFrame
df = pd.DataFrame(train_dataset)

# Remove duplicates based on 'Context' and 'Response' columns
df = df.drop_duplicates(subset=["Context", "Response"]).reset_index(drop=True)

# Convert the DataFrame to a list of dictionaries
documents = df.to_dict(orient="records")

In [13]:
es_client = Elasticsearch("http://localhost:9200")

es_client.info()

ObjectApiResponse({'name': 'a6093b0b66f0', 'cluster_name': 'docker-cluster', 'cluster_uuid': '-sAtmUKZTz6ltxy-y4uXGw', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [14]:
index_name = "conversations"

if es_client.indices.exists(index=index_name):
    print(f"Index '{index_name}' already exists.")
else:
    # Define the index mapping
    index_mapping = {
        "settings": {"number_of_shards": 1, "number_of_replicas": 0},
        "mappings": {
            "properties": {
                "Context": {"type": "text"},
                "Response": {"type": "text"},
            }
        },
    }

    # Create the index with the mapping
    es_client.indices.create(index=index_name, body=index_mapping)

Index 'conversations' already exists.


In [15]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/2752 [00:00<?, ?it/s]

In [8]:
def build_prompt_api(query, search_re):
    prompt_template = """
You are a highly empathetic and supportive mental health counseling assistant. Your task is to assist the PATIENT by providing detailed, thoughtful, and compassionate responses solely based on the CONTEXT from the Mental Health Counseling Conversations database.

### Guidelines:
1. **Always use information explicitly found in the CONTEXT when responding to the PATIENT.**  
   - Even if the CONTEXT isn't directly aligned, synthesize any insights that might be relevant or helpful to the patient's situation.  
2. **If the CONTEXT contains multiple examples, leverage as many as necessary** to craft a comprehensive, empathetic response.  
3. Write your response as a **single, cohesive paragraph** in the **first-person singular** perspective (e.g., "I understand that...") and answer like a human being.
4. Maintain a supportive and conversational tone — be understanding, empathetic, and encouraging.
5. Provide thoughtful, comprehensive answers. Avoid short or superficial responses.  
6. **Use the response "I'm sorry, I don't have enough information to answer that right now." ONLY if the CONTEXT truly lacks sufficient information.**
   - If the CONTEXT contains any helpful insights, you must provide a response.

CONTEXT from Mental Health Counseling Conversations database:
{context}

### Example Response Format:
Response: <Your detailed answer here>

PATIENT: {question}
""".strip()
    context = ""

    for doc in search_re:
        source = doc.get("_source", {})
        context += (
            f"Database Patient: {source.get('Context', 'N/A')}\n"
            f"Database Response: {source.get('Response', 'N/A')}\n\n"
        )

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [9]:
def llm_api(prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini", messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content

In [10]:
def rag_api(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["Context^2", "Response"],
                        "type": "most_fields",
                    }
                }
            }
        },
    }

    response = es_client.search(index=index_name, body=search_query)
    search_re = response["hits"]["hits"]
    prompt = build_prompt_api(query, search_re)
    answer = llm_api(prompt)
    print(prompt)
    print("")
    print("-" * 40)
    print("")
    return answer

In [11]:
query = "I moved to a new city and I'm feeling really lonely. I don't know how to make friends here."
print(rag_api(query))

You are a highly empathetic and supportive mental health counseling assistant. Your task is to assist the PATIENT by providing detailed, thoughtful, and compassionate responses solely based on the CONTEXT from the Mental Health Counseling Conversations database.

### Guidelines:
1. **Always use information explicitly found in the CONTEXT when responding to the PATIENT.**  
   - Even if the CONTEXT isn't directly aligned, synthesize any insights that might be relevant or helpful to the patient's situation.  
2. **If the CONTEXT contains multiple examples, leverage as many as necessary** to craft a comprehensive, empathetic response.  
3. Write your response as a **single, cohesive paragraph** in the **first-person singular** perspective (e.g., "I understand that...") and answer like a human being.
4. Maintain a supportive and conversational tone — be understanding, empathetic, and encouraging.
5. Provide thoughtful, comprehensive answers. Avoid short or superficial responses.  
6. **Use