In [21]:
import json
import os
from dotenv import load_dotenv
from openai import OpenAI
from pprint import pprint
import pandas as pd
import re

### Setup OpenAI API

In [3]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
openai_client = OpenAI(api_key=OPENAI_API_KEY)

In [10]:
with open("../test_cases/knowledge_base.json", "r") as f:
    knowledge_base = json.load(f)
    stm_data = knowledge_base["STM_data"]
    ltm_data = knowledge_base["LTM_data"]
    hcm_data = knowledge_base["HCM_data"]
    test_profile = knowledge_base["test_elderly_profile"]


sample_stm = stm_data[:5]
sample_ltm = ltm_data[:5]
sample_hcm = hcm_data[:5]

print("\n Sample LTM \n")
pprint(sample_stm)
print("\n Sample LTM \n")
pprint(sample_ltm)
print("\n Sample LTM \n")
pprint(sample_hcm)
print("\n Test Profile \n")
pprint(test_profile)

LTM_HCM_KB_generation_prompt = f"""
You are an expert data generator for generating synthetic memory data for an elderly care conversational RAG System. The elderly person you are emulating is {test_profile}
The memory data should be in JSON format and divided into three categories:
1. **Long-Term Memory (LTM):** stable facts about the elderly person’s life, routines, relationships, hobbies, and preferences.
2. **Healthcare Memory (HCM):** health conditions, medications, or medical procedures.

Each memory entry should resemble something an elderly person might recall naturally and mention conversationally with a companion
Include timestamps and document IDs following existing naming conventions (e.g. `LTM_001`, `HCM_001`).

Try and include a variety of topics relevant to an elderly individuals life in Singapore, such as:
- Shopping experiences -- Focus on elderly people shopping in local markets, hawker centres, wet markets and neighborhood shops. Include interactions with shopkeepers, hawkers, asking about prices or items, commenting on quality or sharing experiences.
- Public transport experiences -- Focus on elderly people using public transport in Singapore, such as buses and MRT. Include interactions or questions, statements or comments about routes, fares schedules, tickeing or interaction with other commuters
- Health and hobbies -- Focus on health exercise and hobbies that are suitable for elderly in Singapore such as tai chi, walking gardening, visiting community centres, senior activity centres, parks and nature reserves. Include interactions or questions, statements or comments about health routines, exercise, hobbies or social activities
- Personal wellness -- Focus on daily wellness and daily wellbeing questions or statements that an elderly might have, such as about diet, sleep, relaxation, mindfulness or mental health. Include interactions or questions, statements or comments about wellness routines, diet, sleep or mental health
- Food and meals -- Focus on food and meals that are relevant to elderly in Singapore, such as local cuisine, hawker food, traditional dishes, cooking or dietary preferences. Include interactions or questions, statements or comments about food, meals, recipes, cooking habits or dining experiences
- Social interactions -- Focus on social interactions and conversations about family, grandchildren, children or relatives. Include questions, statements or stories about family, caregiving or family events.
- Community events -- Focus on participation in local community events, gatherings or volunteer activities. Include interactions or questions, statements or comments about community events, social clubs or volunteering or joining social groups.

Return the result as a single JSON object containing the following keys. Each memory entry should have a unique document_id and strictly follow the existing memory schema:
- `"LTM_data"`: [ ... ]
- `"HCM_data"`: [ ... ]

Sample long term memory entries:
{sample_ltm}
Do not include the personal information from the profile in the LTM entries
For the ltm category use of the following categories: 'personal','family','education','career','lifestyle','finance','legal'
Lifestyle can include hobbies, routines, preferences, likes and dislikes
Personal can include relations to others, friends, acquaintances, neighbours
Sample healthcare entries:
{sample_hcm}
For the healthcare entries type, use one of the following: 'condition','procedure','appointment','medication'
"""

STM_KB_generation_prompt = f"""
You are an expert data generator for generating synthetic memory data for an elderly care conversational RAG System. The elderly person you are emulating is {test_profile}
The memory data should be in JSON format and divided into three categories:
1. **Short-Term Memory (STM):** recent daily activities or thoughts within the last 14 days.

Each memory entry should resemble something an elderly person might recall naturally and mention conversationally with a companion
Include timestamps and document IDs following existing naming conventions (e.g., `STM_001`).

Try and include a variety of topics relevant to an elderly individuals life in Singapore, such as:
- Shopping experiences -- Focus on elderly people shopping in local markets, hawker centres, wet markets and neighborhood shops. Include interactions with shopkeepers, hawkers, asking about prices or items, commenting on quality or sharing experiences.
- Public transport experiences -- Focus on elderly people using public transport in Singapore, such as buses and MRT. Include interactions or questions, statements or comments about routes, fares schedules, tickeing or interaction with other commuters
- Health and hobbies -- Focus on health exercise and hobbies that are suitable for elderly in Singapore such as tai chi, walking gardening, visiting community centres, senior activity centres, parks and nature reserves. Include interactions or questions, statements or comments about health routines, exercise, hobbies or social activities
- Personal wellness -- Focus on daily wellness and daily wellbeing questions or statements that an elderly might have, such as about diet, sleep, relaxation, mindfulness or mental health. Include interactions or questions, statements or comments about wellness routines, diet, sleep or mental health
- Food and meals -- Focus on food and meals that are relevant to elderly in Singapore, such as local cuisine, hawker food, traditional dishes, cooking or dietary preferences. Include interactions or questions, statements or comments about food, meals, recipes, cooking habits or dining experiences
- Social interactions -- Focus on social interactions and conversations about family, grandchildren, children or relatives. Include questions, statements or stories about family, caregiving or family events.
- Community events -- Focus on participation in local community events, gatherings or volunteer activities. Include interactions or questions, statements or comments about community events, social clubs or volunteering or joining social groups.

Return the result as a single JSON object containing the following keys:
- `"STM_data"`: [ ... ]

Sample short term memory entries:
{sample_stm}
"""

print("\n LTM and HCM Prompt \n")
print(LTM_HCM_KB_generation_prompt)

print("\n STM Prompt \n")
print(STM_KB_generation_prompt)


 Sample LTM 

[{'content': 'I had kaya toast for breakfast today.',
  'document_id': 'STM_001',
  'timestamp': '2025-09-18T09:00:00Z'},
 {'content': 'I joined the online yoga session today morning.',
  'document_id': 'STM_002',
  'timestamp': '2025-09-19T10:24:46Z'},
 {'content': "I want to try painting this weekend; it's been a while.",
  'document_id': 'STM_003',
  'timestamp': '2025-09-19T13:42:51Z'},
 {'content': 'I want to watch a Bollywood movie tonight.',
  'document_id': 'STM_004',
  'timestamp': '2025-09-19T14:00:00Z'},
 {'content': 'I accidentally overcooked the rice today.',
  'document_id': 'STM_005',
  'timestamp': '2025-09-19T15:59:24Z'}]

 Sample LTM 

[{'category': 'family',
  'document_id': 'LTM_001',
  'key': 'Daughter-in-law',
  'value': 'Meena Singh'},
 {'category': 'family',
  'document_id': 'LTM_002',
  'key': 'Eldest son',
  'value': 'Ganesh Menon Dhoby'},
 {'category': 'lifestyle',
  'document_id': 'LTM_003',
  'key': 'friend',
  'value': 'Mr Lim'},
 {'category

### API call to generate LTM and HCM data

In [11]:
response = openai_client.chat.completions.create(
    model="gpt-4.1-mini",  # or "gpt-4o"
    messages=[
        {"role": "system", "content": LTM_HCM_KB_generation_prompt},
        {"role": "user", "content": "Generate 20 HCM entries and 30 LTM entries.  Output only the JSON output"}
    ],
    temperature=0.8,
)

generated_ltm_hcm = response.choices[0].message.content

In [36]:
parsed_ltm_hcm = json.loads(generated_ltm_hcm.strip().strip("`").replace("json", "", 1).strip())

with open("../test_cases/intermediate_results/111025_augmented_LTM_HCM_response.json", "w") as f:
    json.dump(parsed_ltm_hcm, f, indent=4)

In [14]:
response = openai_client.chat.completions.create(
    model="gpt-4.1-mini",
    messages=[
        {"role": "system", "content": STM_KB_generation_prompt},
        {"role": "user", "content": "Generate 300 STM entries.  Output only the JSON output"}
    ],
    temperature=0.8,
)

generated_stm = response.choices[0].message.content

In [38]:
with open("../test_cases/intermediate_results/111025_augmented_STM_response.json", "r") as f:
    augmented_stm = json.load(f)
with open("../test_cases/intermediate_results/111025_augmented_LTM_HCM_response.json", "r") as f:
    augmented_ltm_hcm = json.load(f)

combined = {
    "LTM_data": augmented_ltm_hcm["LTM_data"],
    "HCM_data": augmented_ltm_hcm["HCM_data"],
    "STM_data": augmented_stm["STM_data"]
}

with open("../test_cases/111025_augmented_kb.json", "w") as f:
    json.dump(combined, f, indent=4)