In [25]:
import os

import pandas as pd
from dotenv import load_dotenv
import psycopg2
from psycopg2.extras import execute_values
from pyarrow.compute import top_k_unstable
from sentence_transformers import SentenceTransformer
from huggingface_hub import login
import json
import pprint

from RAG.retrieval_agent import RetrievalAgent

In [6]:
with open("../test_cases/expanded_knowledge_base.json", "r") as f:
    knowledge_base = json.load(f)
    stm_data = knowledge_base["STM_data"]

sorted_stm_data = sorted(stm_data, key=lambda x: x["timestamp"])
print(sorted_stm_data[:5])

knowledge_base['STM_data'] = sorted_stm_data

with open("../test_cases/expanded_knowledge_base.json", "w") as file:
    json.dump(knowledge_base, file, indent=4)


[{'document_id': 'STM_035', 'timestamp': '2025-09-19T10:24:46Z', 'content': 'I joined the online yoga session today morning.'}, {'document_id': 'STM_032', 'timestamp': '2025-09-19T13:42:51Z', 'content': 'I want to try painting this weekend; it’s been a while.'}, {'document_id': 'STM_038', 'timestamp': '2025-09-19T15:59:24Z', 'content': 'I accidentally overcooked the rice today.'}, {'document_id': 'STM_007', 'timestamp': '2025-09-19T19:29:01Z', 'content': 'I forgot where I placed my glasses again this morning.'}, {'document_id': 'STM_034', 'timestamp': '2025-09-19T19:34:55Z', 'content': 'I made toast and eggs for breakfast today.'}]


In [10]:
test_elderly_id = "87654321-4321-4321-4321-019876543210"
retrieval_agent = RetrievalAgent(test_elderly_id)

INFO:root:Loading embedding model: google/embeddinggemma-300m
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: google/embeddinggemma-300m
INFO:sentence_transformers.SentenceTransformer:14 prompts are loaded, with the keys: ['query', 'document', 'BitextMining', 'Clustering', 'Classification', 'InstructionRetrieval', 'MultilabelClassification', 'PairClassification', 'Reranking', 'Retrieval', 'Retrieval-query', 'Retrieval-document', 'STS', 'Summarization']
INFO:root:embedding model loaded successfully: google/embeddinggemma-300m


In [5]:
pprint.pprint(retrieval_agent.retrieve_similar_stm(
    query = "What should I have for dinner later?",
))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[{'content': 'I want to eat salad for lunch today.',
  'created_at': '2025-09-27T12:00:00',
  'similarity': 0.8104},
 {'content': 'I had kaya toast for breakfast today.',
  'created_at': '2025-09-18T09:00:00',
  'similarity': 0.8101},
 {'content': 'I want to go shopping later in the afternoon.',
  'created_at': '2025-09-29T09:30:00',
  'similarity': 0.8063},
 {'content': 'I forgot to take my medicine yesterday evening.',
  'created_at': '2025-09-22T08:00:00',
  'similarity': 0.792},
 {'content': 'Letâ€™s watch TV after dinner.',
  'created_at': '2025-09-25T19:30:00',
  'similarity': 0.7855}]


In [8]:
pprint.pprint(retrieval_agent.retrieve_similar_ltm(
    query = "Who is my daughter-in-law?"
))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[{'category': 'family',
  'key': 'Ganesh Menon Dhoby',
  'similarity': 0.801,
  'value': 'eldest son'},
 {'category': 'lifestyle',
  'key': 'hobby',
  'similarity': 0.7969,
  'value': 'gardening'},
 {'category': 'family',
  'key': 'Daughter-in-law',
  'similarity': 0.7878,
  'value': 'Meena Singh'},
 {'category': 'lifestyle',
  'key': 'friend',
  'similarity': 0.7873,
  'value': 'Mr Lim'},
 {'category': 'lifestyle',
  'key': 'likes',
  'similarity': 0.7759,
  'value': 'jazz music'}]


In [9]:
pprint.pprint(retrieval_agent.retrieve_similar_ltm(
    query = "What are my hobbies"
))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[{'category': 'lifestyle',
  'key': 'hobby',
  'similarity': 0.8986,
  'value': 'gardening'},
 {'category': 'lifestyle',
  'key': 'likes',
  'similarity': 0.8766,
  'value': 'jazz music'},
 {'category': 'lifestyle',
  'key': 'friend',
  'similarity': 0.8586,
  'value': 'Mr Lim'},
 {'category': 'family',
  'key': 'Daughter-in-law',
  'similarity': 0.8537,
  'value': 'Meena Singh'},
 {'category': 'family',
  'key': 'Ganesh Menon Dhoby',
  'similarity': 0.8478,
  'value': 'eldest son'}]


### Test with the rag_test_cases.json file

In [11]:
test_elderly_id = "87654321-4321-4321-4321-019876543210"
retrieval_agent = RetrievalAgent(test_elderly_id)

INFO:root:Loading embedding model: google/embeddinggemma-300m
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: google/embeddinggemma-300m
INFO:sentence_transformers.SentenceTransformer:14 prompts are loaded, with the keys: ['query', 'document', 'BitextMining', 'Clustering', 'Classification', 'InstructionRetrieval', 'MultilabelClassification', 'PairClassification', 'Reranking', 'Retrieval', 'Retrieval-query', 'Retrieval-document', 'STS', 'Summarization']
INFO:root:embedding model loaded successfully: google/embeddinggemma-300m


In [23]:
with open("../test_cases/augmented_agentic_rag_test_cases.json", "r") as f:
    test_cases = json.load(f)

user_query_list = []
results_lst = []
expected_retrievals_lst = []
for idx, test_case in enumerate(test_cases[:5]):
    print(f"Test Case {idx+1}: {test_case['id']}")
    user_query = test_case["conversation"][0]["user_query"]
    user_query_list.append(user_query)
    print("Query:", user_query)
    expected_retrievals = test_case.get("expected_retrieval", {})
    print(expected_retrievals.items())
    for key, value in expected_retrievals.items():
        if key == "ltm":
            print("Retrieving from ltm...")
            retrieved_ltm = retrieval_agent.retrieve_similar_ltm(user_query)
            expected_retrievals_lst.append(value)
            results_lst.append(retrieved_ltm)
        elif key == "stm":
            print("Retrieving from stm...")
            retrieved_stm = retrieval_agent.retrieve_similar_stm(user_query)
            expected_retrievals_lst.append(value)
            results_lst.append(retrieved_stm)
        elif key == "hcm":
            print("Retrieving from hcm...")
            retrieved_hcm = retrieval_agent.retrieve_similar_health(user_query)
            expected_retrievals_lst.append(value)
            results_lst.append(retrieved_hcm)
        else:
            print(f"Unknown retrieval type: {key}")


Test Case 1: LTM_1
Query: Who is your daughter-in-law?
dict_items([('ltm', [{'document_id': 'LTM001', 'key': 'Daughter-in-law', 'value': 'Meena Singh'}])])
Retrieving from ltm...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Test Case 2: LTM_2
Query: Who usually joins me for birthday celebrations?
dict_items([('ltm', [{'document_id': 'LTM005', 'key': 'family_relationship', 'value': 'Children and grandchildren usually join birthday celebrations'}])])
Retrieving from ltm...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Test Case 3: HCM_1
Query: What medications do I take every morning?
dict_items([('hcm', [{'document_id': 'HCM001', 'medication_name': 'Metformin', 'dosage': '500mg'}, {'document_id': 'HCM002', 'medication_name': 'Amlodipine', 'dosage': '10mg'}])])
Retrieving from hcm...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Test Case 4: STM_1
Query: What should I have for dinner later?
dict_items([('stm', [{'document_id': 'STM014', 'timestamp': '2025-10-01T18:30:00Z', 'content': 'I feel like eating fish head curry tonight.'}])])
Retrieving from stm...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Test Case 5: STM_2
Query: What was I supposed to do in the morning?
dict_items([('stm', [{'document_id': 'STM_011', 'timestamp': '2025-09-28T20:00:00Z', 'content': 'I’d like to call my daughter Meena tomorrow morning.'}])])
Retrieving from stm...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [26]:
all_results = {
    "user_query" : user_query_list,
    "expected_retrievals" : expected_retrievals_lst,
    "results" : results_lst
}
all_results = pd.DataFrame(all_results)

display(all_results)

Unnamed: 0,user_query,expected_retrievals,results
0,Who is your daughter-in-law?,"[{'document_id': 'LTM001', 'key': 'Daughter-in...","[{'category': 'family', 'key': 'friend', 'valu..."
1,Who usually joins me for birthday celebrations?,"[{'document_id': 'LTM005', 'key': 'family_rela...","[{'category': 'family', 'key': 'family_relatio..."
2,What medications do I take every morning?,"[{'document_id': 'HCM001', 'medication_name': ...","[{'record_type': 'medication', 'description': ..."
3,What should I have for dinner later?,"[{'document_id': 'STM014', 'timestamp': '2025-...",[{'content': 'I want to eat salad for lunch to...
4,What was I supposed to do in the morning?,"[{'document_id': 'STM_011', 'timestamp': '2025...",[{'content': 'I had kaya toast for breakfast t...
