In [1]:
!pip install -q sentence-transformers transformers scikit-learn pandas numpy

import time
import torch
import numpy as np
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
from sentence_transformers.cross_encoder import CrossEncoder
from transformers import AutoTokenizer
from sklearn.preprocessing import MinMaxScaler

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/486.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m481.3/486.6 kB[0m [31m22.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.6/486.6 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25h

  * **h_n**: tensor of shape :math:`(D * \text{num\_layers}, H_{out})` or


Using device: cpu


In [2]:
import logging
import os
from typing import List
from huggingface_hub import login
from sentence_transformers import SentenceTransformer, CrossEncoder

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

class Embedder:
    """
    Lazy loader for SentenceTransformer embeddings.
    Loads model only when explicitly instantiated.
    """

    def __init__(self, model_name: str = "google/embeddinggemma-300m"):
        self.embedding_model_name = model_name
        self.model = None
        self._ensure_model_loaded()

    def _ensure_model_loaded(self):
        """Lazy load embedding model when needed."""
        if self.model is not None:
            return

        # Import heavy modules here, not at top-level


        huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
        if huggingface_token:
            login(token=huggingface_token)
        else:
            logger.warning("⚠️ No HuggingFace token found. Proceeding without login.")

        logger.info(f"Loading embedding model: {self.embedding_model_name}")
        self.model = SentenceTransformer(self.embedding_model_name, device="cpu")
        logger.info(f"✅ Embedding model loaded: {self.embedding_model_name}")

    def embed(self, txt: str) -> list:
        if not isinstance(txt, str) or not txt.strip():
            raise ValueError("Input must be a non-empty string.")
        if not self.model:
            self._ensure_model_loaded()
        embedding = self.model.encode(txt, normalize_embeddings=True, show_progress_bar=False)
        return embedding.tolist()

    def embed_batch(self, texts: List[str]) -> List[List[float]]:
        if not texts or not all(isinstance(t, str) and t.strip() for t in texts):
            raise ValueError("Input texts must be a list of non-empty strings.")
        if not self.model:
            self._ensure_model_loaded()
        embeddings = self.model.encode(texts, normalize_embeddings=True, show_progress_bar=False)
        return [emb.tolist() for emb in embeddings]


class CrossEmbedder:
    """
    Lazy loader for CrossEncoder reranker.
    """

    def __init__(self, model_name: str = "jinaai/jina-reranker-v1-turbo-en"):
        self.model_name = model_name
        self.model = None
        self._ensure_model_loaded()

    def _ensure_model_loaded(self):
        """Load CrossEncoder model lazily."""
        huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
        if huggingface_token:
            login(token=huggingface_token, trust_remote_code=True)
        else:
            logger.warning("⚠️ No HuggingFace token found. Proceeding with public model access.")

        try:
            logger.info(f"Loading CrossEncoder model: {self.model_name}")
            self.model = CrossEncoder(self.model_name)
            logger.info(f"✅ CrossEncoder loaded: {self.model_name}")
        except Exception as e:
            logger.error(f"❌ Error loading CrossEncoder model: {e}")
            self.model = None

    def predict(self, pairs):
        if not self.model:
            self._ensure_model_loaded()
        return self.model.predict(pairs)


#### Test set

In [3]:
test_set = [
    # ============== Original General Knowledge (Condensed) ==============
    {
        "qid": "factual_01",
        "query": "What is the boiling point of water at standard atmospheric pressure?",
        "documents": [
            "Water is a chemical compound with the formula H2O.",
            "The freezing point of water is 0 degrees Celsius or 32 degrees Fahrenheit.",
            "At sea level, water boils at 100°C (212°F).",
            "Saltwater has a higher boiling point due to the dissolved salts."
        ],
        "ground_truth": {2: 2}
    },
    {
        "qid": "factual_02",
        "query": "Who wrote 'To Kill a Mockingbird'?",
        "documents": [
            "The novel '1984' was written by George Orwell.",
            "Harper Lee is the acclaimed author of the Pulitzer Prize-winning novel 'To Kill a Mockingbird'.",
            "'The Catcher in the Rye' is a novel by J. D. Salinger.",
            "Truman Capote was a childhood friend of Harper Lee."
        ],
        "ground_truth": {1: 2, 3: 1}
    },
    {
        "qid": "comparative_01",
        "query": "Compare Python and Java for web development",
        "documents": [
            "Python is a high-level, interpreted programming language known for its simplicity and readability.",
            "Java is a class-based, object-oriented programming language designed to have as few implementation dependencies as possible.",
            "For web development, Python's frameworks like Django and Flask are often praised for rapid development, while Java's Spring framework is known for its robustness and scalability in enterprise-level applications.",
            "Both Python and Java have large, active communities and extensive libraries."
        ],
        "ground_truth": {2: 2, 3: 1}
    },
    {
        "qid": "negative_01",
        "query": "What are the best hiking trails in Antarctica?",
        "documents": [
            "Antarctica is the Earth's southernmost continent, containing the geographic South Pole.",
            "Hiking is a popular outdoor activity with numerous health benefits.",
            "The Appalachian Trail is a famous hiking trail in the Eastern United States.",
            "Scientific research stations in Antarctica are staffed year-round."
        ],
        "ground_truth": {}
    },
    {
        "qid": "ambiguous_01",
        "query": "What is Java?",
        "documents": [
            "Java is an island of Indonesia, bordered by the Indian Ocean.",
            "Many people enjoy a cup of java in the morning to get their day started.",
            "Java is a high-level, class-based, object-oriented programming language that is designed to have as few implementation dependencies as possible.",
            "A software developer is a person who creates computer software."
        ],
        "ground_truth": {2: 2, 0: 1, 1: 1}
    },

    # ============== Singapore Elderly Companion: Health & Daily Routines ==============
    {
        "qid": "sg_health_01",
        "query": "What time is my blood pressure check tomorrow?",
        "documents": [
            "You mentioned your knee pain has improved after physio.",
            "Your next blood pressure check with Dr. Lim is at 10:30 AM tomorrow at Tan Tock Seng Polyclinic.",
            "You had nasi lemak for breakfast today.",
            "Your daughter reminded you to take your metformin after lunch."
        ],
        "ground_truth": {1: 2}
    },
    {
        "qid": "sg_health_02",
        "query": "Should I walk more if my knees are stiff?",
        "documents": [
            "Walking helps keep joints flexible, but avoid overdoing it if there’s swelling—try 15 minutes twice a day on flat ground.",
            "You told the bot last Tuesday that your knees feel better after using the walking stick.",
            "Hawker centres are getting more expensive these days, lah.",
            "Your grandson sent you a video on how to use WhatsApp."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_health_03",
        "query": "I keep forgetting where I put my keys. Is this normal?",
        "documents": [
            "You’ve misplaced your keys three times this month.",
            "You now keep them in a red bowl by the door—'so I remember.'",
            "Your friend Aunty Lian also forgets things—she uses a keychain with a bell.",
            "You slept only 5 hours last night."
        ],
        "ground_truth": {1: 2, 2: 1}
    },
    {
        "qid": "sg_health_04",
        "query": "My legs feel heavy when I walk to the market. Normal or not?",
        "documents": [
            "You walk to Tekka Market every Tuesday and Friday.",
            "You said your legs feel better after resting on the bench halfway.",
            "The nurse said leg heaviness can happen with age—just don’t overdo it.",
            "You bought new walking shoes last month."
        ],
        "ground_truth": {2: 2, 1: 1}
    },
    {
        "qid": "sg_health_05",
        "query": "Should I take my pills before or after breakfast?",
        "documents": [
            "Your white pill (for blood) is taken after breakfast.",
            "Your blue pill (for sugar) is taken before breakfast.",
            "You mixed them up last Monday and felt dizzy.",
            "You keep your pills in a red box with days of the week."
        ],
        "ground_truth": {0: 1, 1: 2}
    },

    # ============== Family & Grandchildren ==============
    {
        "qid": "sg_family_01",
        "query": "When is my granddaughter’s birthday again?",
        "documents": [
            "Your granddaughter turns 8 on 12 November.",
            "You bought her a Hello Kitty bag last week.",
            "She loves eating ice cream at McDonald’s.",
            "You video-called her last Sunday—she showed you her school drawing."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_family_02",
        "query": "Did my son say he’s coming this weekend?",
        "documents": [
            "Your son messaged: 'Mum, I’ll come Saturday morning to fix your fan.'",
            "Your fan has been making noise for two days.",
            "You hope he brings his wife and kids.",
            "You cooked extra curry last night just in case."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_family_03",
        "query": "Why my grandson not calling me these days?",
        "documents": [
            "Your grandson is busy with exams until end of October.",
            "He called you two weeks ago and said he misses your chicken rice.",
            "You worry he’s eating too much junk food in hostel.",
            "You saved his number under 'Ah Boy Overseas'."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_family_04",
        "query": "My daughter said something about my phone photos—what was it?",
        "documents": [
            "Your daughter said: 'Mum, back up your photos to Google so you don’t lose them if phone spoilt.'",
            "You took 50 photos at the clan association dinner.",
            "You don’t like cloud—'Where is this Google? Can I see it?'",
            "You printed three photos last month at the mall."
        ],
        "ground_truth": {0: 2}
    },

    # ============== Food & Hawker Culture ==============
    {
        "qid": "sg_food_01",
        "query": "Which stall has the best wanton mee near my place?",
        "documents": [
            "You said the wanton mee at Block 45 coffee shop is too salty now.",
            "You prefer the one at Old Airport Road—Uncle Lim’s stall, open till 3 PM.",
            "You haven’t been to Old Airport Road since your knee pain started.",
            "You gave up char kway teow but still eat wanton mee once a week."
        ],
        "ground_truth": {1: 2}
    },
    {
        "qid": "sg_food_02",
        "query": "Can I eat kueh lapis every day?",
        "documents": [
            "You love the pandan kueh lapis from Bengawan Solo.",
            "Your doctor said: 'Sweet kueh okay once or twice a week, not daily.'",
            "You bought a whole box last Sunday for your bridge friends.",
            "Your blood sugar was high after eating too much kueh during CNY."
        ],
        "ground_truth": {1: 2, 3: 1}
    },
    {
        "qid": "sg_food_03",
        "query": "Wah, I miss eating bak chor mee from my old neighbourhood.",
        "documents": [
            "You used to eat bak chor mee at Tiong Bahru every Friday with Ah Ma.",
            "That stall closed 10 years ago after Ah Ma passed.",
            "You tried a new one in Jurong but said it’s not the same.",
            "You still have the old photo of you and Ah Ma at the stall."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_food_04",
        "query": "Can I eat bak kut teh if I got high uric acid?",
        "documents": [
            "Bak kut teh is high in purines—avoid if you have gout or high uric acid.",
            "Your uric acid level was 520 µmol/L last month (normal <420).",
            "You had bak kut teh last Sunday and felt joint pain after.",
            "Your favourite stall is in Kovan."
        ],
        "ground_truth": {0: 2, 1: 1, 2: 1}
    },

    # ============== Hobbies & Pastimes ==============
    {
        "qid": "sg_hobby_01",
        "query": "When is the next chess session at the void deck?",
        "documents": [
            "You play chess with Ah Gu every Monday, Wednesday, and Friday at 4 PM near Block 22.",
            "You lent your chess set to your grandson last week.",
            "You said the new chess table has a wobbly leg.",
            "You enjoy watching birds at Bishan Park in the morning."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_hobby_02",
        "query": "Did I water my orchids today?",
        "documents": [
            "You water your orchids every Tuesday, Thursday, and Saturday morning.",
            "Today is Thursday.",
            "You told the bot: 'Forgot to water yesterday—hope they don’t die!'",
            "Your favourite orchid bloomed last month—pink one."
        ],
        "ground_truth": {0: 1, 1: 2}
    },
    {
        "qid": "sg_hobby_03",
        "query": "Where did I put my knitting needles?",
        "documents": [
            "You were knitting a scarf for your granddaughter.",
            "You left your knitting bag on the dining table after lunch.",
            "You said the yarn is from that shop in Tiong Bahru Plaza.",
            "You watched a drama last night about a knitting club."
        ],
        "ground_truth": {1: 2}
    },
    {
        "qid": "sg_hobby_04",
        "query": "Can I join the tai chi group even if I’m a bit slow?",
        "documents": [
            "The tai chi instructor said everyone is welcome, no matter age or speed.",
            "You watched the group from the bench last week.",
            "They meet at 7 AM daily at the park near your block.",
            "You used to do tai chi in the 90s but stopped after your fall."
        ],
        "ground_truth": {0: 2, 2: 1}
    },
    {
        "qid": "sg_hobby_05",
        "query": "Why my plant leaves turning yellow?",
        "documents": [
            "You water your money plant every day.",
            "The nursery uncle said: 'Too much water—once every 3 days only.'",
            "You keep it near the window for sunlight.",
            "You bought it during Chinese New Year for good luck."
        ],
        "ground_truth": {1: 2}
    },

    # ============== Worries & Emotions ==============
    {
        "qid": "sg_worry_01",
        "query": "I feel so alone since Ah Gu moved to JB.",
        "documents": [
            "Ah Gu was your chess partner for 15 years.",
            "He moved to live with his daughter in Johor Bahru last month.",
            "You said: 'Now no one to talk to in the afternoon.'",
            "The community centre has a befriending program—volunteers visit weekly."
        ],
        "ground_truth": {1: 2, 2: 1, 3: 1}
    },
    {
        "qid": "sg_worry_02",
        "query": "What if I fall again and no one is home?",
        "documents": [
            "You fell in the bathroom last year—scared you.",
            "Your daughter wants you to wear the emergency button necklace.",
            "You said: 'I don’t like wearing it—it’s ugly.'",
            "The HDB has a scheme to install grab bars for seniors."
        ],
        "ground_truth": {1: 2, 3: 1}
    },
    {
        "qid": "sg_worry_03",
        "query": "My bills keep going up. How to save money?",
        "documents": [
            "You switched to LED bulbs last year—saved $15 on electricity.",
            "You now use the senior citizen discount at NTUC every Wednesday.",
            "You cancelled your cable TV and watch free Channel 8 now.",
            "You grow chilli and pandan on your balcony."
        ],
        "ground_truth": {1: 2, 0: 1, 2: 1}
    },
    {
        "qid": "sg_worry_04",
        "query": "I feel tired even after sleeping early.",
        "documents": [
            "You went to bed at 9 PM but woke up three times last night.",
            "You said your new mattress is too soft.",
            "You had sweet potato porridge for dinner—maybe too heavy?",
            "You plan to take a short nap after lunch."
        ],
        "ground_truth": {0: 2, 1: 1}
    },

    # ============== Routines & Practical Life ==============
    {
        "qid": "sg_routine_01",
        "query": "What time is my bus to the polyclinic?",
        "documents": [
            "You take Bus 131 from Stop 52121 every Thursday at 8:45 AM.",
            "The clinic appointment is at 9:30 AM.",
            "You always bring your green umbrella in case of rain.",
            "You like sitting at the front of the bus."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_routine_02",
        "query": "Did I pay my conservancy charges this month?",
        "documents": [
            "You paid your S&CC fee online last Friday with your daughter’s help.",
            "The receipt email is in your Gmail under 'Bills'.",
            "You used to pay at the post office but now do it online.",
            "You keep all payment receipts in a blue folder."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_routine_03",
        "query": "Where is my spectacles? I need to read the newspaper.",
        "documents": [
            "You left your glasses on the coffee table after watching TV.",
            "You said your new glasses are lighter than the old ones.",
            "You read The Straits Times every morning at 7 AM.",
            "You keep a spare pair in your handbag."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_routine_04",
        "query": "Is it raining later? I need to go to the market.",
        "documents": [
            "The weather app says 80% chance of rain after 2 PM.",
            "You plan to go to the market at 10 AM.",
            "You always bring your foldable umbrella.",
            "The market has a roof, but the walk there is open."
        ],
        "ground_truth": {0: 2, 1: 1}
    },

    # ============== Technology & Modern Life ==============
    {
        "qid": "sg_tech_01",
        "query": "How to send photo to my daughter in WhatsApp?",
        "documents": [
            "Tap the '+' icon → choose photo → select picture → tap send.",
            "Your daughter’s name is 'Mei Ling' in your contacts.",
            "You sent her a photo of your orchids last week.",
            "You said WhatsApp is easier than Telegram."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_tech_02",
        "query": "Why my phone keep showing ‘storage full’?",
        "documents": [
            "You have 2,000+ photos and 50 videos.",
            "Your daughter said: 'Delete old photos or move to Google Photos.'",
            "You don’t know how to delete—afraid to press wrong button.",
            "You took 100 photos at the clan dinner alone."
        ],
        "ground_truth": {1: 2, 2: 1}
    },
    {
        "qid": "sg_tech_03",
        "query": "So blur how to use this new phone leh!",
        "documents": [
            "Your daughter set up your new phone last weekend.",
            "She showed you how to make calls, send WhatsApp, and take photos.",
            "You said: 'So many buttons—my old Nokia better!'",
            "You accidentally called your neighbour three times yesterday."
        ],
        "ground_truth": {1: 2, 2: 1}
    },

    # ============== Singlish & Casual Chat ==============
    {
        "qid": "sg_singlish_01",
        "query": "Wah, my leg damn pain again leh. Last time you say go see doctor?",
        "documents": [
            "On 5 Oct, you said your right knee swelled up after walking to the market.",
            "The bot advised: 'Better see doctor if pain lasts more than 3 days or if you can’t bend knee.'",
            "You told Ah Ma the pain is worse in the morning.",
            "You bought Tiger Balm from the pharmacy."
        ],
        "ground_truth": {1: 2, 0: 1}
    },
    {
        "qid": "sg_singlish_02",
        "query": "Why the weather so shiok today?",
        "documents": [
            "It’s cloudy and 26°C—perfect for walking.",
            "You said: 'Not too hot, not too cold—just nice!'",
            "You went to the park and fed the pigeons.",
            "You bought ice lemon tea from the coffee shop."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_singlish_03",
        "query": "Aiyo, my back damn pain after cleaning house!",
        "documents": [
            "You cleaned your flat thoroughly on Sunday—washed curtains and mopped floors.",
            "You said: 'Next time ask my son to help—too heavy for me.'",
            "You used the long-handled mop your daughter bought you.",
            "You rested all afternoon and applied Tiger Balm."
        ],
        "ground_truth": {0: 2, 3: 1}
    },

    # ============== Nostalgia & Memories ==============
    {
        "qid": "sg_memory_01",
        "query": "Remember when we used to go to the cinema at Yio Chu Kang?",
        "documents": [
            "You and Ah Gu watched every Bruce Lee movie at Yio Chu Kang Cinema in the 70s.",
            "The cinema closed in 1985 and became a supermarket.",
            "You still have the ticket stub from 'Enter the Dragon'.",
            "You said: 'Those were the days—50 cents for a ticket!'"
        ],
        "ground_truth": {0: 2, 1: 1, 3: 1}
    },
    {
        "qid": "sg_memory_02",
        "query": "What did I cook for CNY last year?",
        "documents": [
            "You made pineapple tarts, bak kwa, and yee sang.",
            "Your grandchildren helped roll the tarts.",
            "You gave half to your neighbours.",
            "You said your hands were sore after three days of cooking."
        ],
        "ground_truth": {0: 2}
    },

    # ============== Community & Neighbours ==============
    {
        "qid": "sg_community_01",
        "query": "Is the senior activity centre open on public holidays?",
        "documents": [
            "The senior centre at Block 30 is closed on public holidays.",
            "It’s open Mon–Sat, 8 AM to 5 PM.",
            "They have bingo every Thursday afternoon.",
            "You made new friends there during the craft workshop."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_community_02",
        "query": "My neighbour’s dog keep barking at night. What to do?",
        "documents": [
            "You spoke to the neighbour last week—she said she’ll keep the dog inside.",
            "The barking stopped for two nights but started again last night.",
            "You called the NParks hotline—they said they’ll send a letter.",
            "You sleep with earplugs now."
        ],
        "ground_truth": {0: 1, 2: 2}
    },

    # ============== Multi-hop & Temporal Reasoning ==============
    {
        "qid": "sg_multihop_01",
        "query": "When was the last time I checked my HbA1c?",
        "documents": [
            "On 12 March 2025, you visited Raffles Medical for a diabetes follow-up.",
            "Your HbA1c result was 7.1%—slightly above target.",
            "You mentioned you forgot to bring your glucose meter to the clinic.",
            "You like eating kueh lapis from the bakery near your flat."
        ],
        "ground_truth": {0: 1, 1: 2}
    },
    {
        "qid": "sg_multihop_02",
        "query": "Did I take my blood pressure pill this morning?",
        "documents": [
            "At 8:15 AM today, you logged: 'Took BP pill and had kopi-O.'",
            "Your usual BP meds are amlodipine 5mg once daily in the morning.",
            "Yesterday you skipped your pill because you felt dizzy.",
            "You watered your orchids after breakfast."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_temporal_01",
        "query": "Before I started taking the new pills, was my blood pressure higher?",
        "documents": [
            "In August 2025, your average BP was 158/92.",
            "You started telmisartan on 1 Sept 2025.",
            "In October 2025, your average BP dropped to 138/84.",
            "You began walking 20 minutes daily in July."
        ],
        "ground_truth": {0: 2, 1: 1, 2: 1}
    },

    # ============== Ambiguous & Vague Queries ==============
    {
        "qid": "sg_ambiguous_01",
        "query": "What’s the update on my ‘appointment’?",
        "documents": [
            "Your dental cleaning is next Monday at 3 PM.",
            "Your daughter’s job interview went well—she got the offer!",
            "You have a physiotherapy session this Friday.",
            "The void deck chess group meets every evening."
        ],
        "ground_truth": {0: 1, 2: 1}
    },
    {
        "qid": "sg_ambiguous_02",
        "query": "You know that thing I take for sugar?",
        "documents": [
            "You take metformin 500mg twice daily for type 2 diabetes.",
            "You like adding less sugar to your kopi now.",
            "Your blood sugar log shows fasting levels between 6.0–7.2 mmol/L.",
            "You gave up eating sweet kueh last month."
        ],
        "ground_truth": {0: 2, 2: 1}
    },

    # ============== No Relevant Answer (False Positive Tests) ==============
    {
        "qid": "sg_negative_01",
        "query": "Can I get a refund for my expired bus card?",
        "documents": [
            "You topped up $20 on your EZ-Link card last Monday.",
            "Expired EZ-Link cards cannot be refunded, but you can transfer remaining value to a new card at TransitLink offices.",
            "You complained the bus was late twice this week.",
            "You like sitting at the back of the bus."
        ],
        "ground_truth": {}  # Note: doc[1] is a common misconception—value is lost, not transferable
    },
    {
        "qid": "sg_negative_02",
        "query": "How do I apply for a passport for my cat?",
        "documents": [
            "You took your cat to the vet last month for vaccination.",
            "Singapore doesn’t issue passports for pets—only health certificates for travel.",
            "Your cat’s name is Mimi.",
            "You feed Mimi wet food twice a day."
        ],
        "ground_truth": {}
    },
    {
        "qid": "sg_negative_03",
        "query": "Can I give my leftover medicine to my friend with same problem?",
        "documents": [
            "The nurse said: 'Never share medicine—even if symptoms look same.'",
            "Your friend has high blood pressure like you.",
            "You have extra pills from your last refill.",
            "You said your friend can’t afford to see a doctor."
        ],
        "ground_truth": {0: 2}
    },
    # ============== Singapore Elderly Companion: Health & Daily Routines ==============
    {
        "qid": "sg_health_01",
        "query": "What time is my blood pressure check tomorrow?",
        "documents": [
            "You mentioned your knee pain has improved after physio.",
            "Your next blood pressure check with Dr. Lim is at 10:30 AM tomorrow at Tan Tock Seng Polyclinic.",
            "You had nasi lemak for breakfast today.",
            "Your daughter reminded you to take your metformin after lunch."
        ],
        "ground_truth": {1: 2}
    },
    {
        "qid": "sg_health_02",
        "query": "Should I walk more if my knees are stiff?",
        "documents": [
            "Walking helps keep joints flexible, but avoid overdoing it if there’s swelling—try 15 minutes twice a day on flat ground.",
            "You told the bot last Tuesday that your knees feel better after using the walking stick.",
            "Hawker centres are getting more expensive these days, lah.",
            "Your grandson sent you a video on how to use WhatsApp."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_health_03",
        "query": "I keep forgetting where I put my keys. Is this normal?",
        "documents": [
            "You’ve misplaced your keys three times this month.",
            "You now keep them in a red bowl by the door—'so I remember.'",
            "Your friend Aunty Lian also forgets things—she uses a keychain with a bell.",
            "You slept only 5 hours last night."
        ],
        "ground_truth": {1: 2, 2: 1}
    },
    {
        "qid": "sg_health_04",
        "query": "My legs feel heavy when I walk to the market. Normal or not?",
        "documents": [
            "You walk to Tekka Market every Tuesday and Friday.",
            "You said your legs feel better after resting on the bench halfway.",
            "The nurse said leg heaviness can happen with age—just don’t overdo it.",
            "You bought new walking shoes last month."
        ],
        "ground_truth": {2: 2, 1: 1}
    },
    {
        "qid": "sg_health_05",
        "query": "Should I take my pills before or after breakfast?",
        "documents": [
            "Your white pill (for blood) is taken after breakfast.",
            "Your blue pill (for sugar) is taken before breakfast.",
            "You mixed them up last Monday and felt dizzy.",
            "You keep your pills in a red box with days of the week."
        ],
        "ground_truth": {0: 1, 1: 2}
    },
    {
        "qid": "sg_health_06",
        "query": "What's the name of the pill I take for my cholesterol?",
        "documents": [
            "You take Simvastatin 20mg every night for high cholesterol.",
            "The doctor said to eat less char siew and more fish.",
            "You missed your dose last Tuesday after visiting your sister.",
            "Your last blood test showed your LDL was slightly high."
        ],
        "ground_truth": {0: 2, 1: 1, 3: 1}
    },
    {
        "qid": "sg_health_07",
        "query": "Is it okay to stop my pain killer if my knee feels fine now?",
        "documents": [
            "Dr. Lee advised to take the Tramadol only when the pain is severe (above 5/10).",
            "You said your pain level has been 2/10 since you started swimming.",
            "You usually take it before bed but stopped two days ago.",
            "The pharmacy label says 'Do not stop abruptly without doctor's advice.'"
        ],
        "ground_truth": {0: 2, 1: 1, 3: 1}
    },
    {
        "qid": "sg_health_08",
        "query": "The doctor say my liver enzyme high. Is that from the herbal drink I started?",
        "documents": [
            "Your liver enzyme test on 5 Nov 2025 showed elevated AST/ALT.",
            "Dr. Chen warned you to stop the 'Lingzhi Cleansing Brew' immediately.",
            "You started drinking the herbal brew in early October after Aunty Mui recommended it.",
            "You felt very lethargic the week before the blood test."
        ],
        "ground_truth": {1: 2, 2: 1}
    },
    {
        "qid": "sg_health_09",
        "query": "Why does my stomach feel bloated after I take the white pill?",
        "documents": [
            "The white pill (Metformin) often causes initial bloating or gas—the doctor said to take it with food.",
            "You started taking it two weeks ago.",
            "You had plain porridge for dinner last night.",
            "Your doctor scheduled a review for your digestive issues next month."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_health_10",
        "query": "I need to do my eye drops. Which is the left eye?",
        "documents": [
            "The prescription says 'OD' for the right eye (Right Drop) and 'OS' for the left eye (Left Drop).",
            "You need to apply the glaucoma drops (blue cap) to your left eye (OS) only, twice a day.",
            "You keep the drops in the fridge door.",
            "You found the instructions very confusing at the polyclinic."
        ],
        "ground_truth": {0: 1, 1: 2}
    },

    # ============== Family & Grandchildren ==============
    {
        "qid": "sg_family_01",
        "query": "When is my granddaughter’s birthday again?",
        "documents": [
            "Your granddaughter turns 8 on 12 November.",
            "You bought her a Hello Kitty bag last week.",
            "She loves eating ice cream at McDonald’s.",
            "You video-called her last Sunday—she showed you her school drawing."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_family_02",
        "query": "Did my son say he’s coming this weekend?",
        "documents": [
            "Your son messaged: 'Mum, I’ll come Saturday morning to fix your fan.'",
            "Your fan has been making noise for two days.",
            "You hope he brings his wife and kids.",
            "You cooked extra curry last night just in case."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_family_03",
        "query": "Why my grandson not calling me these days?",
        "documents": [
            "Your grandson is busy with exams until end of October.",
            "He called you two weeks ago and said he misses your chicken rice.",
            "You worry he’s eating too much junk food in hostel.",
            "You saved his number under 'Ah Boy Overseas'."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_family_04",
        "query": "My daughter said something about my phone photos—what was it?",
        "documents": [
            "Your daughter said: 'Mum, back up your photos to Google so you don’t lose them if phone spoilt.'",
            "You took 50 photos at the clan association dinner.",
            "You don’t like cloud—'Where is this Google? Can I see it?'",
            "You printed three photos last month at the mall."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_family_05",
        "query": "My son said I shouldn't wear the same shirt three days. Why he say that?",
        "documents": [
            "Your son mentioned: 'Mum, you need to change your shirt—it's laundry day tomorrow.'",
            "You replied: 'Aiyo, this shirt is clean, just wore it for two hours!'",
            "You feel he is too fussy about clothing these days.",
            "He reminded you to put all your dirty clothes into the blue basket."
        ],
        "ground_truth": {0: 2, 1: 1, 3: 1}
    },
    {
        "qid": "sg_family_06",
        "query": "Did I send the ang bao to my nephew’s wedding already?",
        "documents": [
            "You placed the ang bao (red packet) in a small white envelope on your dining table.",
            "Your daughter-in-law collected it last Sunday to deliver for you.",
            "The wedding banquet is next Saturday at the Fairmont Hotel.",
            "You complained the ang bao money is getting more expensive every year."
        ],
        "ground_truth": {1: 2, 0: 1}
    },
    {
        "qid": "sg_family_07",
        "query": "When is my daughter coming back from her Australia trip?",
        "documents": [
            "Your daughter is scheduled to land at Changi Airport next Tuesday at 4:30 PM.",
            "You wanted to cook her favourite curry chicken when she returns.",
            "She video-called you from Sydney Opera House last week.",
            "She reminded you to water her succulent plants while she is away."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_family_08",
        "query": "What did I promise to cook for my grandson when he comes over?",
        "documents": [
            "You promised to cook your special homemade *tau yew bak* (braised pork) and white rice for him.",
            "He is coming over next Sunday after his tennis lesson.",
            "Your daughter asked you to cook less gravy for the dish because of your blood pressure.",
            "He doesn't like the canned mushrooms you sometimes add."
        ],
        "ground_truth": {0: 2, 1: 1}
    },

    # ============== Food & Hawker Culture ==============
    {
        "qid": "sg_food_01",
        "query": "Which stall has the best wanton mee near my place?",
        "documents": [
            "You said the wanton mee at Block 45 coffee shop is too salty now.",
            "You prefer the one at Old Airport Road—Uncle Lim’s stall, open till 3 PM.",
            "You haven’t been to Old Airport Road since your knee pain started.",
            "You gave up char kway teow but still eat wanton mee once a week."
        ],
        "ground_truth": {1: 2}
    },
    {
        "qid": "sg_food_02",
        "query": "Can I eat kueh lapis every day?",
        "documents": [
            "You love the pandan kueh lapis from Bengawan Solo.",
            "Your doctor said: 'Sweet kueh okay once or twice a week, not daily.'",
            "You bought a whole box last Sunday for your bridge friends.",
            "Your blood sugar was high after eating too much kueh during CNY."
        ],
        "ground_truth": {1: 2, 3: 1}
    },
    {
        "qid": "sg_food_03",
        "query": "Wah, I miss eating bak chor mee from my old neighbourhood.",
        "documents": [
            "You used to eat bak chor mee at Tiong Bahru every Friday with Ah Ma.",
            "That stall closed 10 years ago after Ah Ma passed.",
            "You tried a new one in Jurong but said it’s not the same.",
            "You still have the old photo of you and Ah Ma at the stall."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_food_04",
        "query": "Can I eat bak kut teh if I got high uric acid?",
        "documents": [
            "Bak kut teh is high in purines—avoid if you have gout or high uric acid.",
            "Your uric acid level was 520 µmol/L last month (normal <420).",
            "You had bak kut teh last Sunday and felt joint pain after.",
            "Your favourite stall is in Kovan."
        ],
        "ground_truth": {0: 2, 1: 1, 2: 1}
    },
    {
        "qid": "sg_food_05",
        "query": "Can I still eat oyster omelette even if my doctor told me no shellfish?",
        "documents": [
            "Your doctor strictly advised avoiding all shellfish due to a severe allergy risk (Hives last year).",
            "Oyster omelette contains oysters, which are a type of shellfish.",
            "You ate a small portion of it at the hawker centre last week and felt a slight itch.",
            "You prefer the chilli sauce from the Boon Lay Power Nasi Lemak stall."
        ],
        "ground_truth": {0: 2, 1: 2, 2: 1}
    },
    {
        "qid": "sg_food_06",
        "query": "What food should I eat to make my bones stronger?",
        "documents": [
            "The nutritionist recommended green leafy vegetables (kai lan, chye sim) and milk.",
            "You said: 'Cannot drink too much milk, my stomach a bit uneasy.'",
            "You had chicken rice for dinner yesterday.",
            "Your daughter bought you a vitamin D supplement last month."
        ],
        "ground_truth": {0: 2, 1: 1, 3: 1}
    },
    {
        "qid": "sg_food_07",
        "query": "Is it okay to drink one cup of kopi-O every day?",
        "documents": [
            "Your doctor said moderate caffeine is fine—one cup of kopi-O in the morning is okay.",
            "You usually buy your kopi from the stall at Block 105 at 8 AM.",
            "You used to drink two cups a day but felt palpitations.",
            "Kopi-O has no milk or sugar."
        ],
        "ground_truth": {0: 2, 2: 1}
    },
    {
        "qid": "sg_food_08",
        "query": "The chicken rice uncle gave me so much gravy. Is the gravy too salty?",
        "documents": [
            "You should limit high-sodium liquids like sauces and gravy because of your blood pressure (last reading 150/90).",
            "You prefer the dark sauce and chili, not the gravy.",
            "The doctor advised reducing salt intake to help with leg swelling.",
            "You told your companion you always ask the uncle for 'more black sauce, less gravy'."
        ],
        "ground_truth": {0: 2, 2: 1}
    },
    {
        "qid": "sg_food_09",
        "query": "I want a snack, but I cannot eat sweet things. What did I buy last time?",
        "documents": [
            "Last week, you bought plain unsalted peanuts and a packet of Hup Seng crackers.",
            "Your blood sugar was high after eating a kaya toast set last Tuesday.",
            "The doctor suggested you swap sweet snacks for small portions of nuts or wholemeal biscuits.",
            "You threw away your leftover pineapple tarts from CNY."
        ],
        "ground_truth": {0: 2, 2: 1}
    },

    # ============== Hobbies & Pastimes ==============
    {
        "qid": "sg_hobby_01",
        "query": "When is the next chess session at the void deck?",
        "documents": [
            "You play chess with Ah Gu every Monday, Wednesday, and Friday at 4 PM near Block 22.",
            "You lent your chess set to your grandson last week.",
            "You said the new chess table has a wobbly leg.",
            "You enjoy watching birds at Bishan Park in the morning."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_hobby_02",
        "query": "Did I water my orchids today?",
        "documents": [
            "You water your orchids every Tuesday, Thursday, and Saturday morning.",
            "Today is Thursday.",
            "You told the bot: 'Forgot to water yesterday—hope they don’t die!'",
            "Your favourite orchid bloomed last month—pink one."
        ],
        "ground_truth": {0: 1, 1: 2}
    },
    {
        "qid": "sg_hobby_03",
        "query": "Where did I put my knitting needles?",
        "documents": [
            "You were knitting a scarf for your granddaughter.",
            "You left your knitting bag on the dining table after lunch.",
            "You said the yarn is from that shop in Tiong Bahru Plaza.",
            "You watched a drama last night about a knitting club."
        ],
        "ground_truth": {1: 2}
    },
    {
        "qid": "sg_hobby_04",
        "query": "Can I join the tai chi group even if I’m a bit slow?",
        "documents": [
            "The tai chi instructor said everyone is welcome, no matter age or speed.",
            "You watched the group from the bench last week.",
            "They meet at 7 AM daily at the park near your block.",
            "You used to do tai chi in the 90s but stopped after your fall."
        ],
        "ground_truth": {0: 2, 2: 1}
    },
    {
        "qid": "sg_hobby_05",
        "query": "Why my plant leaves turning yellow?",
        "documents": [
            "You water your money plant every day.",
            "The nursery uncle said: 'Too much water—once every 3 days only.'",
            "You keep it near the window for sunlight.",
            "You bought it during Chinese New Year for good luck."
        ],
        "ground_truth": {1: 2}
    },
    {
        "qid": "sg_hobby_06",
        "query": "I want to start a new hobby. What did I say I wanted to try?",
        "documents": [
            "You mentioned wanting to learn how to play the harmonica like your old friend.",
            "You also considered joining the Community Centre's basic calligraphy class.",
            "You said your hands are too stiff for knitting these days.",
            "You bought a book on local bird-watching but haven't opened it."
        ],
        "ground_truth": {0: 2, 1: 2}
    },
    {
        "qid": "sg_hobby_07",
        "query": "When did the senior karaoke session start last time?",
        "documents": [
            "The karaoke session at the CC starts at 2:30 PM every second and fourth Tuesday of the month.",
            "You sang a Teresa Teng song last time and everyone clapped for you.",
            "You need to register at the counter before 2:00 PM.",
            "You lost your voice the day after the last session."
        ],
        "ground_truth": {0: 2}
    },

    # ============== Worries & Emotions ==============
    {
        "qid": "sg_worry_01",
        "query": "I feel so alone since Ah Gu moved to JB.",
        "documents": [
            "Ah Gu was your chess partner for 15 years.",
            "He moved to live with his daughter in Johor Bahru last month.",
            "You said: 'Now no one to talk to in the afternoon.'",
            "The community centre has a befriending program—volunteers visit weekly."
        ],
        "ground_truth": {1: 2, 2: 1, 3: 1}
    },
    {
        "qid": "sg_worry_02",
        "query": "What if I fall again and no one is home?",
        "documents": [
            "You fell in the bathroom last year—scared you.",
            "Your daughter wants you to wear the emergency button necklace.",
            "You said: 'I don’t like wearing it—it’s ugly.'",
            "The HDB has a scheme to install grab bars for seniors."
        ],
        "ground_truth": {1: 2, 3: 1}
    },
    {
        "qid": "sg_worry_03",
        "query": "My bills keep going up. How to save money?",
        "documents": [
            "You switched to LED bulbs last year—saved $15 on electricity.",
            "You now use the senior citizen discount at NTUC every Wednesday.",
            "You cancelled your cable TV and watch free Channel 8 now.",
            "You grow chilli and pandan on your balcony."
        ],
        "ground_truth": {1: 2, 0: 1, 2: 1}
    },
    {
        "qid": "sg_worry_04",
        "query": "I feel tired even after sleeping early.",
        "documents": [
            "You went to bed at 9 PM but woke up three times last night.",
            "You said your new mattress is too soft.",
            "You had sweet potato porridge for dinner—maybe too heavy?",
            "You plan to take a short nap after lunch."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_worry_05",
        "query": "I saw a message about HDB upgrading. Is it a scam?",
        "documents": [
            "Your neighbour, Uncle Tan, said there is a genuine HDB Lift Upgrading Program (LUP) going on in your block.",
            "He showed you the official HDB letter that came in the mail last week.",
            "You are worried that the construction noise will disturb your afternoon naps.",
            "The fake messages usually ask you to pay money upfront via a link."
        ],
        "ground_truth": {0: 2, 1: 1, 3: 1}
    },
    {
        "qid": "sg_worry_06",
        "query": "I feel like I don't remember things as well as my friend Aunty Lian.",
        "documents": [
            "You sometimes forget names but you can remember things from your childhood very well.",
            "Your doctor advised that occasional forgetfulness is normal with age.",
            "You said Aunty Lian uses many notes and lists to remember things.",
            "You worry about getting Alzheimer’s because your father had it."
        ],
        "ground_truth": {1: 2, 0: 1}
    },
    {
        "qid": "sg_worry_07",
        "query": "I saw the news about the high dengue cases near my home. Should I be worried?",
        "documents": [
            "The NEA's map shows a high-risk cluster in the next block (Block 123), but your block (Block 124) is clear.",
            "You have applied mosquito repellent every morning before going to the market.",
            "Remember to wear long sleeves and pants in the evening when gardening.",
            "Your neighbour Aunty Sally had dengue last year and was hospitalized."
        ],
        "ground_truth": {0: 2, 2: 1}
    },

    # ============== Routines & Practical Life ==============
    {
        "qid": "sg_routine_01",
        "query": "What time is my bus to the polyclinic?",
        "documents": [
            "You take Bus 131 from Stop 52121 every Thursday at 8:45 AM.",
            "The clinic appointment is at 9:30 AM.",
            "You always bring your green umbrella in case of rain.",
            "You like sitting at the front of the bus."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_routine_02",
        "query": "Did I pay my conservancy charges this month?",
        "documents": [
            "You paid your S&CC fee online last Friday with your daughter’s help.",
            "The receipt email is in your Gmail under 'Bills'.",
            "You used to pay at the post office but now do it online.",
            "You keep all payment receipts in a blue folder."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_routine_03",
        "query": "Where is my spectacles? I need to read the newspaper.",
        "documents": [
            "You left your glasses on the coffee table after watching TV.",
            "You said your new glasses are lighter than the old ones.",
            "You read The Straits Times every morning at 7 AM.",
            "You keep a spare pair in your handbag."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_routine_04",
        "query": "Is it raining later? I need to go to the market.",
        "documents": [
            "The weather app says 80% chance of rain after 2 PM.",
            "You plan to go to the market at 10 AM.",
            "You always bring your foldable umbrella.",
            "The market has a roof, but the walk there is open."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_routine_05",
        "query": "Did I get the government money they promised this year?",
        "documents": [
            "The $300 Senior Support Grant was credited to your DBS bank account on 22 September.",
            "Your daughter helped you check your bank book and confirmed the entry.",
            "You used the money to buy a new fan for the living room.",
            "You need to renew your Pioneer Generation card next year."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_routine_06",
        "query": "My toilet is always leaking. Who did I call last time?",
        "documents": [
            "You called Mr. Leong (Plumber, contact 9123 4567) who fixed your kitchen sink last month.",
            "He charged you $60 cash for the service.",
            "The leak is from the toilet bowl flush and started two days ago.",
            "You found a flyer for a new handyman at the void deck."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_routine_07",
        "query": "When is the next mass cleaning for my corridor?",
        "documents": [
            "The HDB cleaning schedule says the next quarterly corridor washing is this coming Saturday morning, starting at 9:00 AM.",
            "You need to keep your potted plants and shoes inside before Friday night.",
            "You complained about the neighbour leaving rubbish outside their door.",
            "The block cleaning used to be done monthly, not quarterly."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_routine_08",
        "query": "I need to get the spare key. Where did my son say he put it?",
        "documents": [
            "Your son put the spare key in a small, magnetic key box hidden behind the metal shoe rack outside the main door.",
            "He reminded you to only use it in an emergency.",
            "You lost your main house key last month while at the market.",
            "The spare key is tied with a red string so you can feel it."
        ],
        "ground_truth": {0: 2, 1: 1}
    },

    # ============== Technology & Modern Life ==============
    {
        "qid": "sg_tech_01",
        "query": "How to send photo to my daughter in WhatsApp?",
        "documents": [
            "Tap the '+' icon → choose photo → select picture → tap send.",
            "Your daughter’s name is 'Mei Ling' in your contacts.",
            "You sent her a photo of your orchids last week.",
            "You said WhatsApp is easier than Telegram."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_tech_02",
        "query": "Why my phone keep showing ‘storage full’?",
        "documents": [
            "You have 2,000+ photos and 50 videos.",
            "Your daughter said: 'Delete old photos or move to Google Photos.'",
            "You don’t know how to delete—afraid to press wrong button.",
            "You took 100 photos at the clan dinner alone."
        ],
        "ground_truth": {1: 2, 2: 1}
    },
    {
        "qid": "sg_tech_03",
        "query": "So blur how to use this new phone leh!",
        "documents": [
            "Your daughter set up your new phone last weekend.",
            "She showed you how to make calls, send WhatsApp, and take photos.",
            "You said: 'So many buttons—my old Nokia better!'",
            "You accidentally called your neighbour three times yesterday."
        ],
        "ground_truth": {1: 2, 2: 1}
    },
    {
        "qid": "sg_tech_04",
        "query": "That message about my bank account being frozen is real or not?",
        "documents": [
            "Your bank (OCBC) confirmed that genuine messages will *never* ask for your PIN or OTP.",
            "Your daughter warned you: 'Do not click on any links in suspicious text messages!'",
            "You received a text message today saying 'Urgent: Account Frozen. Click here to verify.'",
            "You are worried your CPF money is lost."
        ],
        "ground_truth": {1: 2, 0: 2, 2: 1}
    },
    {
        "qid": "sg_tech_05",
        "query": "How to make the letters bigger on my phone?",
        "documents": [
            "Your son set the font size to 'Maximum' last week because you complained you cannot see.",
            "To make it bigger, go to Settings → Display → Font Size.",
            "You usually read the news on the phone at night.",
            "You also increased the phone volume to maximum."
        ],
        "ground_truth": {0: 1, 1: 2}
    },
    {
        "qid": "sg_tech_06",
        "query": "How to video call my daughter? I only see her picture.",
        "documents": [
            "Find your daughter's chat, then tap the video camera icon at the top right corner of the screen.",
            "You usually call her every Saturday morning.",
            "You found the microphone button muted on your last call, so she couldn't hear you.",
            "She is stored as 'Mei Ling' in your contacts."
        ],
        "ground_truth": {0: 2, 2: 1}
    },
    {
        "qid": "sg_tech_07",
        "query": "My phone keeps making noise from an app. How to stop it?",
        "documents": [
            "The app 'Candy Rush Saga' keeps sending notifications—your grandson installed it last month.",
            "To stop the noise, go to Settings → Notifications → find 'Candy Rush Saga' → turn off.",
            "You said: 'Always making *ding dong* sound, very annoying!'",
            "You only use your phone for calls and WhatsApp."
        ],
        "ground_truth": {1: 2, 2: 1}
    },

    # ============== Singlish & Casual Chat ==============
    {
        "qid": "sg_singlish_01",
        "query": "Wah, my leg damn pain again leh. Last time you say go see doctor?",
        "documents": [
            "On 5 Oct, you said your right knee swelled up after walking to the market.",
            "The bot advised: 'Better see doctor if pain lasts more than 3 days or if you can’t bend knee.'",
            "You told Ah Ma the pain is worse in the morning.",
            "You bought Tiger Balm from the pharmacy."
        ],
        "ground_truth": {1: 2, 0: 1}
    },
    {
        "qid": "sg_singlish_02",
        "query": "Why the weather so shiok today?",
        "documents": [
            "It’s cloudy and 26°C—perfect for walking.",
            "You said: 'Not too hot, not too cold—just nice!'",
            "You went to the park and fed the pigeons.",
            "You bought ice lemon tea from the coffee shop."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_singlish_03",
        "query": "Aiyo, my back damn pain after cleaning house!",
        "documents": [
            "You cleaned your flat thoroughly on Sunday—washed curtains and mopped floors.",
            "You said: 'Next time ask my son to help—too heavy for me.'",
            "You used the long-handled mop your daughter bought you.",
            "You rested all afternoon and applied Tiger Balm."
        ],
        "ground_truth": {0: 2, 3: 1}
    },
    {
        "qid": "sg_singlish_04",
        "query": "So *pai seh* to go to the Community Centre alone leh. Any friend going?",
        "documents": [
            "You and Aunty Lian planned to go for the free health talk on Friday at 3 PM.",
            "Aunty Lian messaged you to say she cannot make it—her daughter needs help with the baby.",
            "You feel *pai seh* (shy/embarrassed) to sit alone.",
            "The community centre has a shuttle bus from your block."
        ],
        "ground_truth": {0: 2, 2: 1}
    },
    {
        "qid": "sg_singlish_05",
        "query": "Heng ah, I bought the market chicken early. Why I say this?",
        "documents": [
            "The market chicken stall usually sells out by 10 AM on Saturdays.",
            "You went to the market at 8:30 AM today (Saturday) and got the last whole chicken.",
            "You wanted to cook Hainanese Chicken Rice for your son’s visit.",
            "Heng ah means 'lucky' or 'thank goodness' in this context."
        ],
        "ground_truth": {0: 2, 1: 2}
    },
    {
        "qid": "sg_singlish_06",
        "query": "Jialat lah, I cannot remember if I locked the door.",
        "documents": [
            "You said 'jialat' because you often worry about forgetting things when leaving the house.",
            "Your habit is to lock the main door twice and check the gate latch.",
            "The time is 11:00 AM; you left the house 10 minutes ago.",
            "You bought a new digital lock, but your son hasn't installed it yet."
        ],
        "ground_truth": {1: 2, 2: 1}
    },
    {
        "qid": "sg_singlish_07",
        "query": "Is the new hawker centre too *ulu* for me to go?",
        "documents": [
            "The new hawker centre is 4 bus stops away—Bus 154 goes directly there and back.",
            "*Ulu* means 'remote' or 'far off the beaten track'.",
            "You said: 'Aiyo, so far, later if fall down, who help me?'",
            "You promised Aunty Lily you would meet her there next week for economic rice."
        ],
        "ground_truth": {0: 2, 1: 1}
    },

    # ============== Nostalgia & Memories ==============
    {
        "qid": "sg_memory_01",
        "query": "Remember when we used to go to the cinema at Yio Chu Kang?",
        "documents": [
            "You and Ah Gu watched every Bruce Lee movie at Yio Chu Kang Cinema in the 70s.",
            "The cinema closed in 1985 and became a supermarket.",
            "You still have the ticket stub from 'Enter the Dragon'.",
            "You said: 'Those were the days—50 cents for a ticket!'"
        ],
        "ground_truth": {0: 2, 1: 1, 3: 1}
    },
    {
        "qid": "sg_memory_02",
        "query": "What did I cook for CNY last year?",
        "documents": [
            "You made pineapple tarts, bak kwa, and yee sang.",
            "Your grandchildren helped roll the tarts.",
            "You gave half to your neighbours.",
            "You said your hands were sore after three days of cooking."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_memory_03",
        "query": "What did I say about the firecrackers during Chinese New Year?",
        "documents": [
            "You told your daughter you miss the sound of real firecrackers in the old days (banned since 1972).",
            "You said the smell of gunpowder reminded you of your childhood home.",
            "Your neighbours now use electronic firecracker sound effects instead.",
            "You helped your grandmother make paper offerings for 7th Month."
        ],
        "ground_truth": {0: 2, 1: 1}
    },

    # ============== Community & Neighbours ==============
    {
        "qid": "sg_community_01",
        "query": "Is the senior activity centre open on public holidays?",
        "documents": [
            "The senior centre at Block 30 is closed on public holidays.",
            "It’s open Mon–Sat, 8 AM to 5 PM.",
            "They have bingo every Thursday afternoon.",
            "You made new friends there during the craft workshop."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_community_02",
        "query": "My neighbour’s dog keep barking at night. What to do?",
        "documents": [
            "You spoke to the neighbour last week—she said she’ll keep the dog inside.",
            "The barking stopped for two nights but started again last night.",
            "You called the NParks hotline—they said they’ll send a letter.",
            "You sleep with earplugs now."
        ],
        "ground_truth": {0: 1, 2: 2}
    },
    {
        "qid": "sg_community_03",
        "query": "When is the next Residents' Committee (RC) meeting for seniors?",
        "documents": [
            "The RC is holding a focus group on HDB maintenance issues for seniors next Friday at 7:30 PM.",
            "It will be held in the Block 22 common room.",
            "You want to complain about the faulty lift button on the 4th floor.",
            "Uncle Tan from the RC usually drives you home afterwards."
        ],
        "ground_truth": {0: 2, 1: 1, 2: 1}
    },

    # ============== Financial & Government Schemes (New Category) ==============
    {
        "qid": "sg_gov_01",
        "query": "Did I get the free bus rides with the new card?",
        "documents": [
            "You are eligible for the Merdeka Generation (MG) concession, giving 25% off public transport fares, not free rides.",
            "You need to tap your MG Card (the blue one) on the bus and train gantries.",
            "You complained about the bus being too crowded during peak hours.",
            "Your friend Aunty Lily also thought the MG card gave free rides."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_gov_02",
        "query": "What is the minimum retirement sum for CPF again? I forgot.",
        "documents": [
            "The current Full Retirement Sum (FRS) is $205,000 for people turning 55 in 2024.",
            "You have $180,000 in your CPF Retirement Account currently.",
            "You told your son you worry about having enough money when you reach 65.",
            "The money market fund your financial planner suggested performed poorly last quarter."
        ],
        "ground_truth": {0: 2, 1: 1}
    },

    # ============== Multi-hop & Temporal Reasoning ==============
    {
        "qid": "sg_multihop_01",
        "query": "When was the last time I checked my HbA1c?",
        "documents": [
            "On 12 March 2025, you visited Raffles Medical for a diabetes follow-up.",
            "Your HbA1c result was 7.1%—slightly above target.",
            "You mentioned you forgot to bring your glucose meter to the clinic.",
            "You like eating kueh lapis from the bakery near your flat."
        ],
        "ground_truth": {0: 1, 1: 2}
    },
    {
        "qid": "sg_multihop_02",
        "query": "Did I take my blood pressure pill this morning?",
        "documents": [
            "At 8:15 AM today, you logged: 'Took BP pill and had kopi-O.'",
            "Your usual BP meds are amlodipine 5mg once daily in the morning.",
            "Yesterday you skipped your pill because you felt dizzy.",
            "You watered your orchids after breakfast."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_multihop_03",
        "query": "I need to take Bus 131. Is it a long wait?",
        "documents": [
            "You take Bus 131 from Stop 52121 every Thursday at 8:45 AM.",
            "The bus arrival app shows Bus 131 is 5 minutes away from Stop 52121 now (Current time is 8:40 AM on Thursday).",
            "You don't like taking the train because the stairs are too much.",
            "The clinic appointment is at 9:30 AM."
        ],
        "ground_truth": {1: 2, 0: 1}
    },
    {
        "qid": "sg_multihop_04",
        "query": "Did eating that char kway teow last night affect my blood sugar reading this morning?",
        "documents": [
            "You ate char kway teow with extra lard at 7 PM last night.",
            "Your fasting blood glucose reading this morning (8 AM) was 9.5 mmol/L (Target <7.0).",
            "The nurse told you high-carb, high-fat meals the night before can spike morning sugar.",
            "You normally have your highest reading on Mondays after a weekend treat."
        ],
        "ground_truth": {0: 1, 1: 2, 2: 2}
    },
    {
        "qid": "sg_temporal_01",
        "query": "Before I started taking the new pills, was my blood pressure higher?",
        "documents": [
            "In August 2025, your average BP was 158/92.",
            "You started telmisartan on 1 Sept 2025.",
            "In October 2025, your average BP dropped to 138/84.",
            "You began walking 20 minutes daily in July."
        ],
        "ground_truth": {0: 2, 1: 1, 2: 1}
    },

    # ============== Ambiguous & Vague Queries ==============
    {
        "qid": "sg_ambiguous_01",
        "query": "What’s the update on my ‘appointment’?",
        "documents": [
            "Your dental cleaning is next Monday at 3 PM.",
            "Your daughter’s job interview went well—she got the offer!",
            "You have a physiotherapy session this Friday.",
            "The void deck chess group meets every evening."
        ],
        "ground_truth": {0: 1, 2: 1}
    },
    {
        "qid": "sg_ambiguous_02",
        "query": "You know that thing I take for sugar?",
        "documents": [
            "You take metformin 500mg twice daily for type 2 diabetes.",
            "You like adding less sugar to your kopi now.",
            "Your blood sugar log shows fasting levels between 6.0–7.2 mmol/L.",
            "You gave up eating sweet kueh last month."
        ],
        "ground_truth": {0: 2, 2: 1}
    },
    {
        "qid": "sg_ambiguous_03",
        "query": "Which 'yellow' vaccine did the nurse tell me to get last time?",
        "documents": [
            "The nurse reminded you about the Influenza (Flu) vaccine and the Pneumonia vaccine.",
            "You got your Pneumonia vaccine (Prevenar) on 15 May 2025.",
            "You need to get the Flu shot next month.",
            "Yellow Fever vaccine is only needed for travel to specific countries, which you are not doing."
        ],
        "ground_truth": {0: 1, 1: 2}
    },
    {
        "qid": "sg_ambiguous_04",
        "query": "Where is the thing I use for my garden?",
        "documents": [
            "You were pruning your chilli plant earlier and put the small green shears on the balcony shelf.",
            "Your son bought you new gardening gloves last Sunday.",
            "You were looking for the small shovel this morning to re-pot your orchid.",
            "The large watering can is kept by the kitchen sink."
        ],
        "ground_truth": {0: 2, 2: 1}
    },

    # ============== No Relevant Answer (False Positive Tests) ==============
    {
        "qid": "sg_negative_01",
        "query": "Can I get a refund for my expired bus card?",
        "documents": [
            "You topped up $20 on your EZ-Link card last Monday.",
            "Expired EZ-Link cards cannot be refunded, but you can transfer remaining value to a new card at TransitLink offices.",
            "You complained the bus was late twice this week.",
            "You like sitting at the back of the bus."
        ],
        "ground_truth": {}
    },
    {
        "qid": "sg_negative_02",
        "query": "How do I apply for a passport for my cat?",
        "documents": [
            "You took your cat to the vet last month for vaccination.",
            "Singapore doesn’t issue passports for pets—only health certificates for travel.",
            "Your cat’s name is Mimi.",
            "You feed Mimi wet food twice a day."
        ],
        "ground_truth": {}
    },
    {
        "qid": "sg_negative_03",
        "query": "Can I give my leftover medicine to my friend with same problem?",
        "documents": [
            "The nurse said: 'Never share medicine—even if symptoms look same.'",
            "Your friend has high blood pressure like you.",
            "You have extra pills from your last refill.",
            "You said your friend can’t afford to see a doctor."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_negative_04",
        "query": "Where can I buy a lottery ticket for tomorrow's draw?",
        "documents": [
            "You usually buy your 4D tickets from the Singapore Pools outlet near Block 55.",
            "You won $500 with the numbers from your deceased husband's car plate last month.",
            "The staff there know your usual numbers.",
            "The documents do not contain information about tomorrow's draw or lottery operations."
        ],
        "ground_truth": {}
    },
    {
        "qid": "sg_negative_05",
        "query": "Who is the Prime Minister of Singapore?",
        "documents": [
            "You recently complained about the rising cost of utilities and COE prices.",
            "You said your son likes to watch the Prime Minister's National Day Rally speech.",
            "You remembered the first Prime Minister, Mr. Lee Kuan Yew, very clearly.",
            "The documents contain conversational references but no factual answer to the query."
        ],
        "ground_truth": {}
    },
    {
        "qid": "sg_negative_06",
        "query": "What time is the next plane landing at Changi Airport?",
        "documents": [
            "Your daughter is scheduled to land at Changi Airport next Tuesday at 4:30 PM.",
            "You like watching the planes fly overhead near the park.",
            "You took a taxi there last time because you were rushing.",
            "The documents do not contain real-time or general flight information."
        ],
        "ground_truth": {}
    },
        {
        "qid": "sg_health_11",
        "query": "The clinic gave me a new inhaler—use how many puffs?",
        "documents": [
            "You were prescribed Salbutamol inhaler at the polyclinic yesterday.",
            "Doctor said: 'Use 2 puffs when breathless, up to every 4–6 hours as needed.'",
            "You keep it in your handbag next to your tissue packet.",
            "You’ve never used an inhaler before."
        ],
        "ground_truth": {1: 2, 0: 1}
    },
    {
        "qid": "sg_health_12",
        "query": "Can I drink herbal tea with my blood thinner?",
        "documents": [
            "You are on warfarin for atrial fibrillation.",
            "The pharmacist warned certain herbal teas (like ginkgo) can interact with warfarin.",
            "You like to drink chrysanthemum tea at night.",
            "Your INR test is due next week."
        ],
        "ground_truth": {1: 2, 0: 1}
    },
    {
        "qid": "sg_health_13",
        "query": "What time should I check my sugar tomorrow morning?",
        "documents": [
            "Your nurse advised to check fasting glucose before breakfast at around 7:00 AM.",
            "You usually wake up at 6:45 AM to boil water for kopi.",
            "You record the reading in your small blue notebook.",
            "You sometimes forget and check after breakfast."
        ],
        "ground_truth": {0: 2, 1: 1, 2: 1}
    },
    {
        "qid": "sg_health_14",
        "query": "I have a bit of cough—should I still go for my flu jab?",
        "documents": [
            "You booked your flu shot at the CC health booth this Saturday.",
            "Mild cough without fever is usually okay for vaccination; postpone if you develop fever or feel very unwell.",
            "You had 37.1°C last night, no fever.",
            "You slept better after honey lemon."
        ],
        "ground_truth": {1: 2, 0: 1, 2: 1}
    },
    {
        "qid": "sg_health_15",
        "query": "The physio taught me ankle pumps—how many sets ah?",
        "documents": [
            "Your physiotherapy homework: ankle pumps 10 reps × 3 sets, twice daily.",
            "You prefer to do them while watching the 7 PM drama.",
            "You said it helps with the leg swelling.",
            "You misplaced the exercise paper."
        ],
        "ground_truth": {0: 2}
    },

    # ============== Family & Relationships (New) ==============
    {
        "qid": "sg_family_09",
        "query": "My sister’s 70th birthday—when did we say we are meeting?",
        "documents": [
            "Family dinner at Serangoon Gardens this Sunday, 6:30 PM.",
            "Your nephew booked a table for 10 at a zi char restaurant.",
            "You planned to bring a small pandan cake.",
            "You are taking Bus 317 with Aunty Lian."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_family_10",
        "query": "Did I argue with my daughter yesterday or not?",
        "documents": [
            "You felt upset when she reminded you to stop taking the 'Lingzhi' drink.",
            "Later she called back and both of you apologized.",
            "You agreed to show her all supplements at the next visit.",
            "You sent her a heart emoji before sleeping."
        ],
        "ground_truth": {1: 2, 0: 1, 2: 1}
    },

    # ============== Food & Hawker Culture (New) ==============
    {
        "qid": "sg_food_10",
        "query": "Can I still order fish soup if my stomach is sensitive?",
        "documents": [
            "You told the bot milky soups give you gas.",
            "Clear sliced fish soup without milk and less fried garlic suits you better.",
            "You prefer mee sua over rice when your stomach is upset.",
            "Your favourite stall closes at 2 PM."
        ],
        "ground_truth": {1: 2, 0: 1, 3: 1}
    },
    {
        "qid": "sg_food_11",
        "query": "Deepavali coming—can I eat murukku if I got high cholesterol?",
        "documents": [
            "Your dietitian said fried snacks should be limited; small portions only.",
            "You plan to visit your neighbour’s open house.",
            "You enjoyed murukku last year and had heartburn after.",
            "You also like payasam but avoid extra sugar."
        ],
        "ground_truth": {0: 2, 2: 1}
    },
    {
        "qid": "sg_food_12",
        "query": "Is brown rice really better for me?",
        "documents": [
            "Your doctor explained brown rice has more fibre and may help with sugar control.",
            "You find it a bit hard to chew unless mixed with white rice.",
            "You bought a 5 kg bag of brown rice last month.",
            "Your grandson prefers white rice."
        ],
        "ground_truth": {0: 2, 1: 1}
    },

    # ============== Hobbies & Pastimes (New) ==============
    {
        "qid": "sg_hobby_08",
        "query": "What time the bird-watching group meets?",
        "documents": [
            "The Bishan Park birders meet every Sunday at 7:15 AM by Carpark A.",
            "You joined them once and saw kingfishers.",
            "They share photos on a WhatsApp group.",
            "You plan to bring your small binoculars."
        ],
        "ground_truth": {0: 2}
    },
    {
        "qid": "sg_hobby_09",
        "query": "I want to try calligraphy—need to bring what?",
        "documents": [
            "The CC class says bring a brush, ink, and A3 practice paper.",
            "You already have a brush your son bought in Chinatown.",
            "The first lesson is this Thursday 10 AM.",
            "You worry ink will stain your table."
        ],
        "ground_truth": {0: 2, 2: 1}
    },
    {
        "qid": "sg_hobby_10",
        "query": "Can I play mahjong if my eyes get tired easily?",
        "documents": [
            "You said bright overhead lights make your eyes strain.",
            "Consider taking breaks every 30–45 minutes and use a brighter desk lamp instead of dim ceiling light.",
            "You only play friendly rounds on Sunday afternoons.",
            "You wear your new reading glasses."
        ],
        "ground_truth": {1: 2, 0: 1}
    },

    # ============== Worries & Emotions (New) ==============
    {
        "qid": "sg_worry_08",
        "query": "I keep thinking about my late husband at night—cannot sleep.",
        "documents": [
            "You said your mind becomes busy after lights off.",
            "The counsellor suggested a simple wind-down routine: light stretching, warm shower, and writing thoughts in a small notebook.",
            "You find listening to old Hokkien songs calming.",
            "Your bedtime is usually 10 PM."
        ],
        "ground_truth": {1: 2, 2: 1}
    },
    {
        "qid": "sg_worry_09",
        "query": "Neighbour’s renovation so noisy—how to complain properly?",
        "documents": [
            "You noted drilling past 7 PM last night.",
            "Town Council accepts feedback with unit number, date, and time; HDB guidelines limit noisy works to approved hours.",
            "You kept a diary of the noise for three days.",
            "Security uncle at the void deck suggested calling the hotline."
        ],
        "ground_truth": {1: 2, 2: 1}
    },

    # ============== Routines & Practical Life (New) ==============
    {
        "qid": "sg_routine_09",
        "query": "What time is my rubbish chute washing this week?",
        "documents": [
            "Block notice said chute washing on Thursday at 11:00 AM.",
            "You planned to double-tie your trash bags the night before.",
            "Last month the crew came late.",
            "You keep your pails along the corridor during drying."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_routine_10",
        "query": "Which MRT exit is nearest to the hospital clinic?",
        "documents": [
            "For Tan Tock Seng Hospital, Novena MRT Exit A leads to the underpass.",
            "You prefer sheltered paths when it rains.",
            "You sometimes get confused by Exit B signage.",
            "Your appointment is at 10:30 AM."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_routine_11",
        "query": "When must I refill my medicine?",
        "documents": [
            "Your metformin supply will finish in 6 days (you have 12 tablets left, 2 per day).",
            "The polyclinic pharmacy opens at 8:30 AM weekdays.",
            "You like to avoid Monday crowds.",
            "Your daughter can pick up after work on Thursday."
        ],
        "ground_truth": {0: 2, 1: 1}
    },

    # ============== Technology & Modern Life (New) ==============
    {
        "qid": "sg_tech_08",
        "query": "How to check my bus timing without opening too many apps?",
        "documents": [
            "You pinned 'SG BusNow' to your home screen last month.",
            "From the home screen, tap the widget that shows Bus 131 → it updates arrival times.",
            "You often forget which app is for bus and which for weather.",
            "Your favourite stop is 52121."
        ],
        "ground_truth": {1: 2, 0: 1, 3: 1}
    },
    {
        "qid": "sg_tech_09",
        "query": "My WhatsApp keep forwarding fake news—how to stop auto-download?",
        "documents": [
            "Settings → Storage and Data → Media auto-download → uncheck for Photos/Videos/Documents.",
            "You joined two family groups that forward many clips.",
            "You worry your data plan finishes fast.",
            "Your daughter taught you to long-press and 'Report spam' if needed."
        ],
        "ground_truth": {0: 2, 2: 1}
    },
    {
        "qid": "sg_tech_10",
        "query": "How to see subtitles on the TV news ah?",
        "documents": [
            "Your remote has a 'Sub' or 'CC' button—press to toggle closed captions.",
            "On Channel 8 news, subtitles are available during prime time.",
            "You sit 2 metres from the TV.",
            "Your hearing aid battery was changed yesterday."
        ],
        "ground_truth": {0: 2, 1: 1}
    },

    # ============== Singlish & Casual Chat (New) ==============
    {
        "qid": "sg_singlish_08",
        "query": "Today I walk a bit only already *peng san* (want to faint). Why ah?",
        "documents": [
            "You skipped breakfast and only drank kopi-O.",
            "The weather was humid; you didn’t bring water.",
            "You felt better after resting at the void deck and sipping water.",
            "You have an appointment to review blood pressure next week."
        ],
        "ground_truth": {2: 1, 0: 1, 1: 1}
    },
    {
        "qid": "sg_singlish_09",
        "query": "Later go CC alone very *sian*. Got any lobang (good tip) to make friends?",
        "documents": [
            "Try the befriending corner near the notice board at 3 PM; volunteers introduce newcomers.",
            "You enjoy chatting with other orchid growers.",
            "They also have a simple tea session after the talk.",
            "You feel shy to start conversations."
        ],
        "ground_truth": {0: 2, 2: 1}
    },
    {
        "qid": "sg_singlish_10",
        "query": "My slippers very slippery—later rain sure *kena* fall.",
        "documents": [
            "You mentioned the soles are worn smooth.",
            "The podiatry nurse suggested sandals with better grip and a back strap.",
            "You almost slipped at the wet market last week.",
            "Your daughter offered to buy you a new pair on Sunday."
        ],
        "ground_truth": {1: 2, 0: 1, 2: 1}
    },

    # ============== Nostalgia & Memories (New) ==============
    {
        "qid": "sg_memory_04",
        "query": "Which choir song did I like to sing in school days?",
        "documents": [
            "You loved 'Chan Mali Chan' during secondary school choir practice.",
            "Your teacher made the class perform at National Day.",
            "You still remember the actions for the chorus.",
            "You kept the old programme booklet in a shoebox."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_memory_05",
        "query": "Last Mid-Autumn we went where to see lanterns?",
        "documents": [
            "You and your grandchildren walked around Gardens by the Bay to see the lantern displays.",
            "You bought mooncakes from the pop-up stall.",
            "You took photos with the giant rabbit lantern.",
            "You went home by taxi because it rained."
        ],
        "ground_truth": {0: 2, 2: 1}
    },

    # ============== Community & Neighbours (New) ==============
    {
        "qid": "sg_community_04",
        "query": "The RC said got fire drill—what time ah?",
        "documents": [
            "Block fire drill this Wednesday at 10:00 AM; assembly point at the playground.",
            "CERT volunteers will demonstrate using a fire extinguisher.",
            "You plan to wear comfortable shoes.",
            "Last year you missed it because of a clinic visit."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_community_05",
        "query": "Who helped me carry groceries last time?",
        "documents": [
            "Your neighbour’s son, Alan, carried them from the lift to your door.",
            "You thanked him with a packet of pandan cake.",
            "You met him again at the void deck yesterday.",
            "He studies at ITE College Central."
        ],
        "ground_truth": {0: 2}
    },

    # ============== Financial & Government Schemes (New) ==============
    {
        "qid": "sg_gov_03",
        "query": "My CDC vouchers can use at which shop downstairs?",
        "documents": [
            "The fruit stall and the economic rice stall at your hawker centre both accept CDC vouchers.",
            "The sundry shop auntie prefers PayNow but also accepts the vouchers.",
            "You used $8 worth last week on vegetables.",
            "You keep the QR in your phone gallery."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_gov_04",
        "query": "Do I need to renew my PG card this year?",
        "documents": [
            "Your Pioneer Generation card expiry is stated as 2027.",
            "You only need to replace it if lost or damaged before then.",
            "You last used it at the clinic for subsidy.",
            "You keep it behind your IC in the wallet."
        ],
        "ground_truth": {0: 2, 1: 1}
    },

    # ============== Multi-hop & Temporal Reasoning (New) ==============
    {
        "qid": "sg_multihop_05",
        "query": "Did I drink enough water before my blood test this morning?",
        "documents": [
            "You had to fast from food but water was allowed before the 9 AM test.",
            "You told the bot you drank one full cup at 7:30 AM.",
            "You reached the clinic at 8:50 AM.",
            "You avoided coffee because it may affect fasting."
        ],
        "ground_truth": {0: 1, 1: 2}
    },
    {
        "qid": "sg_multihop_06",
        "query": "When was the last time I changed my hearing aid battery?",
        "documents": [
            "You wrote: 'Changed left hearing aid battery on 3 Oct 2025' in your calendar.",
            "You complained of softer TV volume yesterday.",
            "The spare batteries are in the top drawer.",
            "Your right hearing aid was serviced in June."
        ],
        "ground_truth": {0: 2, 1: 1}
    },
    {
        "qid": "sg_multihop_07",
        "query": "I’m going temple then market—what bus route is easier?",
        "documents": [
            "You usually take Bus 31 to the temple at Paya Lebar first.",
            "From there, Bus 40 goes directly to the wet market near your block.",
            "You prefer not to cross many roads.",
            "You carry a small trolley for groceries."
        ],
        "ground_truth": {1: 2, 0: 1}
    },

    # ============== Ambiguous & Vague (New) ==============
    {
        "qid": "sg_ambiguous_05",
        "query": "Where’s the 'blue bottle' I use at night?",
        "documents": [
            "You keep the blue cap glaucoma eye drops in the fridge door.",
            "You also have a blue bottle of medicated oil on the bedside table.",
            "The nurse said to use the eye drops at 9 PM daily.",
            "You sometimes mix up the two blue bottles."
        ],
        "ground_truth": {0: 2, 2: 1}
    },
    {
        "qid": "sg_ambiguous_06",
        "query": "Did we say Friday or the 'other one' for my appointment?",
        "documents": [
            "Your dental scaling is this Friday at 11 AM.",
            "You also have physiotherapy next Wednesday at 2 PM.",
            "You told the bot: 'I always mix up these two.'",
            "Your daughter can only drive you on Fridays."
        ],
        "ground_truth": {0: 2, 1: 1}
    },

    # ============== No Relevant Answer / False Positive (New) ==============
    {
        "qid": "sg_negative_07",
        "query": "What’s the exchange rate for AUD to SGD right now?",
        "documents": [
            "Your daughter travelled to Australia last month.",
            "You changed $200 at the Money Changer last week.",
            "You keep the receipts in your blue folder.",
            "The documents do not contain real-time exchange rates."
        ],
        "ground_truth": {}
    },
    {
        "qid": "sg_negative_08",
        "query": "What time is the fireworks at Marina Bay tonight?",
        "documents": [
            "You like to watch fireworks on TV instead of going out.",
            "You once went to Marina Bay for countdown with your family.",
            "You bought snacks for New Year’s Eve last year.",
            "The documents do not contain event timing for tonight."
        ],
        "ground_truth": {}
    },
    {
        "qid": "sg_negative_09",
        "query": "Who is the President of Singapore now?",
        "documents": [
            "You watched the National Day Parade on TV.",
            "You discussed politics with your neighbour last week.",
            "You keep newspaper clippings of past presidents.",
            "The documents do not state the current office holder."
        ],
        "ground_truth": {}
    }
]

#### Experiments

In [4]:
model_names = {
    "GTE-Multilingual": "Alibaba-NLP/gte-multilingual-reranker-base",
    "GTE-ModernBERT": "Alibaba-NLP/gte-reranker-modernbert-base",
    "Jina-v2-Base": "jinaai/jina-reranker-v2-base-multilingual",
    "Jina-v1-Turbo": "jinaai/jina-reranker-v1-turbo-en",
    "Jina-v1-Tiny": "jinaai/jina-reranker-v1-tiny-en",
    "BGE-v2-M3": "BAAI/bge-reranker-v2-m3",
    "BGE-Base": "BAAI/bge-reranker-base",
    "MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
}

def load_model(name, path):
    try:
        model = CrossEncoder(model_name_or_path=path, trust_remote_code=True, device=device)
        tokenizer = AutoTokenizer.from_pretrained(path)

        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token or tokenizer.unk_token
        model.model.config.pad_token_id = tokenizer.pad_token_id

        print(f"✅ Loaded {name}")
        return name, model
    except Exception as e:
        print(f"❌ Failed to load {name}: {e}")
        return name, None


In [5]:
models = {}
batch_size = 2  # ✅ Adjust to avoid OOM

print("Loading models in mini-batches...")
model_items = list(model_names.items())

for i in range(0, len(model_items), batch_size):
    batch = model_items[i:i+batch_size]
    with ThreadPoolExecutor(max_workers=batch_size) as executor:
        futures = [executor.submit(load_model, name, path) for name, path in batch]
        for future in as_completed(futures):
            name, model = future.result()
            if model:
                models[name] = model

print(f"\nLoaded {len(models)} models successfully.")


Loading models in mini-batches...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Alibaba-NLP/new-impl:
- configuration.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/598M [00:00<?, ?B/s]

modeling.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Alibaba-NLP/new-impl:
- modeling.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/612M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

✅ Loaded GTE-ModernBERT


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

✅ Loaded GTE-Multilingual


config.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration_bert.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-reranker-v1-turbo-en:
- configuration_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


configuration_xlm_roberta.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual:
- configuration_xlm_roberta.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
`torch_dtype` is deprecated! Use `dtype` instead!


modeling_bert.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-reranker-v1-turbo-en:
- modeling_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_xlm_roberta.py: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/75.6M [00:00<?, ?B/s]

mha.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual:
- mha.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


block.py: 0.00B [00:00, ?B/s]

mlp.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual:
- mlp.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual:
- block.py
- mlp.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


embedding.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual:
- embedding.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


xlm_padding.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual:
- xlm_padding.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual:
- modeling_xlm_roberta.py
- mha.py
- block.py
- embedding.py
- xlm_padding.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/557M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

✅ Loaded Jina-v1-Turbo


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

✅ Loaded Jina-v2-Base


config.json:   0%|          | 0.00/795 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration_bert.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-reranker-v1-tiny-en:
- configuration_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

modeling_bert.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-reranker-v1-tiny-en:
- modeling_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/66.1M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

✅ Loaded Jina-v1-Tiny


tokenizer_config.json: 0.00B [00:00, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

✅ Loaded BGE-v2-M3


config.json:   0%|          | 0.00/799 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/all-MiniLM-L6-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

✅ Loaded MiniLM-L6-v2


tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

✅ Loaded BGE-Base

Loaded 8 models successfully.


In [6]:
def evaluate_reranker(reranker_model, test_cases, top_k=2):
    reciprocal_ranks, latencies = [], []
    hit_count = 0

    for test_case in test_cases:
        query = test_case["query"]
        documents = test_case["documents"]
        ground_truth_indices = set(test_case["ground_truth"].keys())
        pairs = [[query, doc] for doc in documents]

        start = time.time()
        scores = reranker_model.predict(pairs, show_progress_bar=False)
        latencies.append(time.time() - start)
        ranked = np.argsort(scores)[::-1]

        first_rel = next((r for r, idx in enumerate(ranked, 1) if idx in ground_truth_indices), 0)
        reciprocal_ranks.append(1.0 / first_rel if first_rel else 0)
        if any(idx in ground_truth_indices for idx in ranked[:top_k]):
            hit_count += 1

    return {
        "mrr": np.mean(reciprocal_ranks),
        f"hit_rate@{top_k}": hit_count / len(test_cases),
        "avg_latency": np.mean(latencies),
    }

In [7]:
results = []
for name, model in models.items():
    print(f"Evaluating {name}...")
    res = evaluate_reranker(model, test_set)
    res["model"] = name
    results.append(res)

df = pd.DataFrame(results)
df.rename(columns={
    'model': 'Model',
    'mrr': 'MRR',
    'hit_rate@2': 'Hit Rate@2',
    'avg_latency': 'Avg Latency (s)',
}, inplace=True)

print("\n--- Base Metrics ---")
print(df.to_string(index=False, float_format="%.4f"))


Evaluating GTE-ModernBERT...
Evaluating GTE-Multilingual...
Evaluating Jina-v1-Turbo...
Evaluating Jina-v2-Base...
Evaluating Jina-v1-Tiny...
Evaluating BGE-v2-M3...
Evaluating MiniLM-L6-v2...
Evaluating BGE-Base...

--- Base Metrics ---
   MRR  Hit Rate@2  Avg Latency (s)            Model
0.8661      0.9306           0.3843   GTE-ModernBERT
0.8468      0.9191           0.3005 GTE-Multilingual
0.7803      0.8555           0.0619    Jina-v1-Turbo
0.8603      0.9133           1.2080     Jina-v2-Base
0.8189      0.8555           0.0438     Jina-v1-Tiny
0.8314      0.8844           0.7555        BGE-v2-M3
0.6989      0.8266           0.0502     MiniLM-L6-v2
0.8247      0.8960           0.2369         BGE-Base


In [8]:
# Ensure the normalized + heuristic columns exist
if 'Heuristic Score' not in df.columns:
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()

    df['MRR_norm'] = scaler.fit_transform(df[['MRR']])
    df['Hit Rate@2_norm'] = scaler.fit_transform(df[['Hit Rate@2']])
    df['Latency_norm'] = scaler.fit_transform(-df[['Avg Latency (s)']])

    w_mrr, w_hit, w_latency = 0.33333, 0.33333, 0.33333
    df['Heuristic Score'] = (
        w_mrr * df['MRR_norm'] +
        w_hit * df['Hit Rate@2_norm'] +
        w_latency * df['Latency_norm']
    )

    scenarios = {
        "Accuracy-Focused": (0.45, 0.45, 0.1),
        "Balanced": (0.333333, 0.33333, 0.333333),
        "Latency-Focused": (0.25, 0.25, 0.5),
    }
    for label, (wm, wh, wl) in scenarios.items():
        df[label] = wm * df['MRR_norm'] + wh * df['Hit Rate@2_norm'] + wl * df['Latency_norm']

# Now safe to display
df = df.sort_values(by='Heuristic Score', ascending=False)
display(df[['Model', 'MRR', 'Hit Rate@2', 'Avg Latency (s)', 'Heuristic Score']])
display(df[['Model', 'Accuracy-Focused', 'Balanced', 'Latency-Focused']])


Unnamed: 0,Model,MRR,Hit Rate@2,Avg Latency (s),Heuristic Score
0,GTE-ModernBERT,0.866089,0.930636,0.384334,0.769494
1,GTE-Multilingual,0.846821,0.919075,0.300473,0.603266
7,BGE-Base,0.824663,0.895954,0.236934,0.28753
4,Jina-v1-Tiny,0.818882,0.855491,0.043827,0.022326
3,Jina-v2-Base,0.860308,0.913295,1.20801,-0.153705
5,BGE-v2-M3,0.831407,0.884393,0.755535,-0.231995
2,Jina-v1-Turbo,0.780347,0.855491,0.06185,-0.245758
6,MiniLM-L6-v2,0.69894,0.82659,0.050212,-1.051158


Unnamed: 0,Model,Accuracy-Focused,Balanced,Latency-Focused
0,GTE-ModernBERT,1.042649,0.769497,0.574396
1,GTE-Multilingual,0.741699,0.603269,0.504396
7,BGE-Base,0.257458,0.287532,0.309015
4,Jina-v1-Tiny,-0.27682,0.022329,0.236003
3,Jina-v2-Base,0.54809,-0.153709,-0.65499
5,BGE-v2-M3,0.029422,-0.231997,-0.418725
2,Jina-v1-Turbo,-0.622288,-0.245758,0.023187
6,MiniLM-L6-v2,-1.72021,-1.051162,-0.573282
