In [1]:
import json
import pandas as pd
import minsearch
import re
from openai import OpenAI
from tqdm.auto import tqdm
from elasticsearch import Elasticsearch

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()
open_ai_api_key = os.getenv("OPEN_AI_API_KEY")

In [None]:
client = OpenAI(api_key=open_ai_api_key)

In [3]:
from datasets import load_dataset

dataset = load_dataset("Amod/mental_health_counseling_conversations")

In [4]:
# Access the 'train' split of the dataset
train_dataset = dataset['train']

# Convert to a Pandas DataFrame
df = pd.DataFrame(train_dataset)

# Define a cleaning function to apply to the context and response columns
def clean_text(text):
    # Convert text to lowercase
    text = text.lower()
    # Remove newline characters, non-breaking spaces, and extra spaces
    text = text.replace("\n", "").replace("\xa0", "")
    # Replace multiple spaces between sentences with a single space
    text = re.sub(r"\s+", " ", text)
    # Remove any special characters except comma, dot, parentheses, and space
    text = re.sub(r"[^a-z0-9.,() ]+", "", text)
    # Strip leading/trailing spaces
    text = text.strip()

    return text

# Apply the cleaning function to both the 'Context' and 'Response' columns
df["Context"] = df["Context"].apply(clean_text)
df["Response"] = df["Response"].apply(clean_text)

# Remove duplicates based on 'Context' and 'Response' columns
df = df.drop_duplicates(subset=["Context", "Response"]).reset_index(drop=True)

# Convert the cleaned DataFrame to JSON format
json_data = df.to_json(orient='records', lines=False)

# Load the JSON data
docs_raw = json.loads(json_data)

# Now your dataset is cleaned with no extra spaces or newlines between sentences

In [5]:
# Load the JSON data
docs_raw = json.loads(json_data)

# Initialize an empty list to hold the documents
documents = []

# Iterate over the loaded JSON
for doc in docs_raw:
    documents.append(doc)

In [6]:
index = minsearch.Index(
    text_fields=["Context", "Response"],
    keyword_fields=[]  # Pass an empty list for keyword fields
)

In [7]:
index.fit(documents)

<minsearch.Index at 0x7f04f44d33a0>

In [8]:
def search(query):
    boost = {"Context": 3.0, "Response": 1.0}

    search_results = index.search(
        query=query,
        boost_dict=boost,
        num_results=5
    )

    # If we've reached max attempts, return what we have
    return search_results

In [9]:
def build_prompt(query, search_re):
    prompt_template = """
You are mental health counseling assistant. Answer the PATIENT based on the CONTEXT from the Mental Health Counseling Conversations database.
Use only the facts from the CONTEXT when aswering the PATIENT.
Give answers with a human-like tone and not short.
If the CONTEXT does not contain the answer, output NONE.

PATIENT: {question}

CONTEXT:
{context}
""".strip()
    context = ""

    for doc in search_re:
        context = context + f"patient: {doc['Context']}\nresponse: {doc['Response']}\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [10]:
def llm(prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [11]:
def rag(query):
    search_re = search(query)
    prompt = build_prompt(query, search_re)
    answer = llm(prompt)
    print(prompt)
    print("")
    print("-" * 40)
    print("")
    return answer

In [12]:
query = "I feel depressed"

In [13]:
print(rag(query))

You are mental health counseling assistant. Answer the PATIENT based on the CONTEXT from the Mental Health Counseling Conversations database.
Use only the facts from the CONTEXT when aswering the PATIENT.
Give answers with a human-like tone and not short.
If the CONTEXT does not contain the answer, output NONE.

PATIENT: I feel depressed

CONTEXT:
patient: im very depressed. how do i find someone to talk to
response: hang in there feeling depressed sucks but talking with someone can really help  so its awesome that youre already trying to figure out how to find that person. tons of good suggestions already but to add my two cents  you can find therapists in your area and read a little bit about them to find someone you like the sound of via this website, and also via www.psychologytoday.com if you have health insurance your insurer might be able to make recommendations as to therapists covered by your plan, or if you happen to be in school, the school itself should be able to connect y

In [14]:
"""
docker run -it \
    --rm \
    --name elasticsearch \
    -m 4GB \
    -p 9200:9200 \
    -p 9300:9300 \
    -e "discovery.type=single-node" \
    -e "xpack.security.enabled=false" \
    docker.elastic.co/elasticsearch/elasticsearch:8.4.3
"""

'\ndocker run -it     --rm     --name elasticsearch     -m 4GB     -p 9200:9200     -p 9300:9300     -e "discovery.type=single-node"     -e "xpack.security.enabled=false"     docker.elastic.co/elasticsearch/elasticsearch:8.4.3\n'

In [15]:
es_client = Elasticsearch('http://localhost:9200')

In [16]:
es_client.info()

ObjectApiResponse({'name': 'c18f6eb81659', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'Z7LbOWGtTGe4-fy6B858nQ', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [17]:
index_settings = {
    "settings": {"number_of_shards": 1, "number_of_replicas": 0},
    "mappings": {
        "properties": {
            "Context": {"type": "text"},
            "Response": {"type": "text"},
        }
    },
}

index_name = "conversations"

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'conversations'})

In [18]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/2076 [00:00<?, ?it/s]

In [19]:
print(query)

I feel depressed


In [20]:
search_query = {
    "size": 5,
    "query": {
        "bool": {
            "must": {
                "multi_match": {
                    "query": query,
                    "fields": ["Context^3", "Response"],
                    "type": "most_fields",
                }
            }
        }
    },
}

In [21]:
response = es_client.search(index=index_name, body=search_query)
response["hits"]

{'total': {'value': 1919, 'relation': 'eq'},
 'max_score': 21.112282,
 'hits': [{'_index': 'conversations',
   '_id': 'hMm1w5IBlh6TdS5ZxOwJ',
   '_score': 21.112282,
   '_source': {'Context': 'im in my late teens and live with my dad. the only time i go out is for my college classes. sometimes when i see my friends i want to talk with them, but sometimes i wont want to talk to them for days or even weeks. sometimes i feel im not worth knowing or im never going to do anything right. are they right, am i depressed',
    'Response': 'if we were in my office together, i would offer that most likely you are feeling somewhat depressed. its most important for me to impress that you dont have to live like this... other helpful information have you experienced similar symptoms before anyone in your family been depressed before based on other physiological signs, like quality of sleep, appetite, energymotivation, i would present you with some treatment options, one of which would be to consult w

In [22]:
'''resp = es_client.indices.delete(
    index="conversations9",
)'''

'resp = es_client.indices.delete(\n    index="conversations9",\n)'

In [None]:
ollama_api_key = os.getenv("OPEN_AI_API_KEY")

In [None]:
client_ollama = OpenAI(base_url="http://localhost:11434/v1/", api_key=ollama_api_key)

In [24]:
def llm(prompt):
    response = client_ollama.chat.completions.create(
        model="llama3.2:1b", messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content

In [25]:
llm("I feel depressed")

"I'm so sorry to hear that you're feeling depressed. It's completely understand that it can be a really tough and overwhelming experience.\n\nFirstly, please know that you're not alone, and there are people who care about you and want to help. I'm here to listen and offer support in any way I can.\n\nCan you tell me more about how you're feeling? What's been going on that might be contributing to your depression? Is it related to work, relationships, or something else?\n\nAlso, have you tried anything to talk about your feelings or seek help so far? Sometimes just sharing what you're going through with someone who can offer support and guidance can make a big difference.\n\nRemember, I'm here to listen without judgment, and we can explore this together at our own pace."

In [26]:
print(_)

I'm so sorry to hear that you're feeling depressed. It's completely understand that it can be a really tough and overwhelming experience.

Firstly, please know that you're not alone, and there are people who care about you and want to help. I'm here to listen and offer support in any way I can.

Can you tell me more about how you're feeling? What's been going on that might be contributing to your depression? Is it related to work, relationships, or something else?

Also, have you tried anything to talk about your feelings or seek help so far? Sometimes just sharing what you're going through with someone who can offer support and guidance can make a big difference.

Remember, I'm here to listen without judgment, and we can explore this together at our own pace.
