# Further experiments with the RAG model building using a modular approach. 
 

> **Note:**  
> - The experiments in this file served as the basis of the final RAG model  
> - Tested retrieval/generation parameters (e.g., fuzzy match threshold, number of retrieved documents).  
> - Helped setup the core evaluation metrics   
> - Did not include later metric additions (e.g., Jaccard similarity).  
> - Served as the basis for the final modular RAG implementation, where clearer metric evaluation and where more controlled experiments were conducted.


In [56]:
# imports
import psycopg2
import pandas as pd
from sentence_transformers import SentenceTransformer 
import faiss
from fuzzywuzzy import process
import torch
import os
import re
from fuzzywuzzy import fuzz
from pydantic import Field
import shutil
from fuzzywuzzy import process

In [57]:
# for llamaindex
from llama_index.core import VectorStoreIndex, ServiceContext
from llama_index.core.storage.storage_context import StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core.schema import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding as LlamaIndexHuggingFaceEmbedding
from llama_index.vector_stores.faiss import FaissVectorStore

In [58]:
# for retrieval
from langchain.llms import LlamaCpp
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_core.documents import Document as LangchainDocument
from langchain_core.retrievers import BaseRetriever as LangchainBaseRetriever # Alias LangChain's BaseRetriever
from typing import List, Dict, Any

In [59]:
# evaluation
from sklearn.metrics import precision_score, recall_score
from evaluate import load

In [83]:
SCRIPT_DIR = os.path.dirname(os.path.abspath("My_RAG_model.ipynb"))


In [85]:
SCRIPT_DIR

'C:\\Users\\utsav\\Desktop\\research\\updated'

In [60]:
#  postgres database connection strings
DB_CONFIG = {
    "host": "localhost",
    "database": "postgres",
    "user": "postgres",
    "password": "postgres",
    "port": "5432"
}

# model names
BIOBERT_MODEL_NAME = "dmis-lab/biobert-base-cased-v1.1"
LLAMA3_MODEL_PATH = "Meta-Llama-3.1-8B-Instruct-Q6_K.gguf"
VECTOR_DB_PATH = "./faiss_index"
PREPROCESSED_DATA_CSV = "./preprocessed_faers_data.csv"

In [61]:
# I have 10 reaction categories, but if someone uses a variation of these reaction category in their query, it should still map accurately
category_mappings = {
    "product/administration issues": "Product/Administration Issues", "dermatological/allergic": "Dermatological/Allergic",
    "neurological": "Neurological", "gastrointestinal": "Gastrointestinal", "psychiatric": "Psychiatric",
    "cardiovascular": "Cardiovascular", "respiratory": "Respiratory", "general/systemic disorders": "General/Systemic Disorders",
    "musculoskeletal": "Musculoskeletal", "other organ systems/conditions": "Other Organ Systems/Conditions",
    "dermatological": "Dermatological/Allergic", "allergic": "Dermatological/Allergic", "allergy": "Dermatological/Allergic",
    "skin": "Dermatological/Allergic", "anaphylactic": "Dermatological/Allergic", "immune": "Dermatological/Allergic",
    "nervous system": "Neurological", "brain": "Neurological", "head": "Neurological", "cognitive": "Neurological",
    "mental health": "Psychiatric", "mental": "Psychiatric", "psychological": "Psychiatric", "behavioral": "Psychiatric",
    "stomach": "Gastrointestinal", "gut": "Gastrointestinal", "digestive": "Gastrointestinal",
    "heart": "Cardiovascular", "circulatory": "Cardiovascular",
    "lung": "Respiratory", "breathing": "Respiratory",
    "general symptoms": "General/Systemic Disorders", "systemic": "General/Systemic Disorders", "overall health": "General/Systemic Disorders",
    "pain": "General/Systemic Disorders", "functional impairment": "General/Systemic Disorders",
    "bone": "Musculoskeletal", "joint": "Musculoskeletal", "muscle": "Musculoskeletal", "muscular": "Musculoskeletal",
    "product issues": "Product/Administration Issues", "administration issues": "Product/Administration Issues",
    "drug administration": "Product/Administration Issues", "medication error": "Product/Administration Issues",
    "drug use": "Product/Administration Issues",
    "metabolic": "Other Organ Systems/Conditions", "metabolism": "Other Organ Systems/Conditions",
    "metabolic issues": "Other Organ Systems/Conditions", "infection": "Other Organ Systems/Conditions",
    "infectious": "Other Organ Systems/Conditions", "blood": "Other Organ Systems/Conditions",
    "liver": "Other Organ Systems/Conditions", "kidney": "Other Organ Systems/Conditions", "urinary": "Other Organ Systems/Conditions",
    "eye": "Other Organ Systems/Conditions", "vision": "Other Organ Systems/Conditions", "pregnancy": "Other Organ Systems/Conditions",
    "fetal": "Other Organ Systems/Conditions", "injury": "Other Organ Systems/Conditions", "investigations": "Other Organ Systems/Conditions",
    "social": "Other Organ Systems/Conditions", "ear": "Other Organ Systems/Conditions", "throat": "Other Organ Systems/Conditions",
}

### Test cases built by querying the database

In [79]:
# test cases
test_cases = [
    {
        "query": "What skin reactions are reported for adult females who used desloratadine?",
        "filters": {'sex': 'F', 'age_group': 'adult', 'prod_ai': 'DESLORATADINE', 'reaction_category': 'Dermatological/Allergic'},
        "ground_truth_reactions": { 
            "pruritus",
            "rash",
            "erythema",
            "hyperhidrosis",
            "hypersensitivity",
            "skin disorder",
            "guttate psoriasis",
            "photosensitivity reaction",
            "rash erythematous",
            "skin dystrophy",
            "skin lesion",
            "dermatitis exfoliative generalised",
            "erythema nodosum",
            "generalised erythema",
            "pallor",
            "pruritus generalised",
            "rash maculo-papular",
            "skin plaque",
            "skin ulcer",
            "skin wound"
        },
        "expected_retrieved_pts": {
            "erythema",
            "rash erythematous",
            "photosensitivity reaction",
            "rash maculo-papular",
            "rash",
            "skin dystrophy",
            "hypersensitivity",
            "skin plaque"
}
    },
    {
        "query": "List all dermatological adverse events for males over 60 years old who took FEXOFENADINE.",
        "filters": {'sex': 'M', 'age_group': 'elderly', 'prod_ai': 'FEXOFENADINE', 'reaction_category': 'Dermatological/Allergic'},
        "ground_truth_reactions": {
            "acute generalised exanthematous pustulosis",
            "eczema",
            "erythema",
            "hypersensitivity",
            "lip swelling",
            "pruritus",
            "rash pruritic",
            "skin disorder",
            "skin irritation",
            "skin mass"
        },
        "expected_retrieved_pts": {
            "erythema",
            "rash pruritic",
            "eczema",
            "pruritus",
            "skin irritation",
            "lip swelling",
            "skin disorder",
            "acute generalised exanthematous pustulosis"
        }

    },
    {
        "query": "Are there any gastrointestinal reactions reported for children using CETIRIZINE?",
        "filters": {'age_group': 'child', 'prod_ai': 'CETIRIZINE', 'reaction_category': 'Gastrointestinal'},
        "ground_truth_reactions": {
            "abdominal discomfort",
            "abdominal pain",
            "abdominal pain upper",
            "coeliac disease",
            "diarrhoea",
            "gastrooesophageal reflux disease",
            "nausea",
            "vomiting"
            
        },
        "expected_retrieved_pts": {
                "nausea",
                "gastrooesophageal reflux disease",
                "abdominal pain",
                "diarrhoea",
                "abdominal discomfort",
                "vomiting",
                "abdominal pain upper"

        }
    },
    {
        "query": "What are the common adverse reactions reported for adult patients using LORATADINE, regardless of sex or reaction type?",
        "filters": {'age_group': 'adult', 'prod_ai': 'LORATADINE'}, 
        "ground_truth_reactions": { "abdominal discomfort"
                        ,"abdominal distension"
                        ,"abdominal pain"
                        ,"abdominal pain lower"
                        ,"abdominal pain upper"
                        ,"abdominal rigidity"
                        ,"abnormal behaviour"
                        ,"abnormal dreams"
                        ,"abortion induced"
                        ,"abortion spontaneous"
                        ,"accidental exposure to product"
                        ,"accidental exposure to product by child"
                        ,"accidental overdose"
                        ,"accommodation disorder"
                        ,"activated partial thromboplastin time prolonged"
                        ,"acute kidney injury"
                        ,"adverse drug reaction"
                        ,"adverse reaction"
                        ,"ageusia"
                        ,"aggression"
                        ,"agitated depression"
                        ,"agitation"
                        ,"alanine aminotransferase increased"
                        ,"alcohol interaction"
                        ,"alopecia"
                        ,"alopecia areata"
                        ,"alopecia universalis"
                        ,"altered state of consciousness"
                        ,"amnesia"
                        ,"amnestic disorder"
                        ,"anaemia"
                        ,"anal haemorrhage"
                        ,"anaphylactic reaction"
                        ,"anaphylactic shock"
                        ,"anger"
                        ,"angina pectoris"
                        ,"angioedema"
                        ,"anhedonia"
                        ,"anosmia"
                        ,"anterograde amnesia"
                        ,"anxiety"
                        ,"aphthous ulcer"
                        ,"arrhythmia"
                        ,"arteriospasm coronary"
                        ,"arthralgia"
                        ,"arthritis"
                        ,"aspartate aminotransferase increased"
                        ,"asphyxia"
                        ,"asthenia"
                        ,"asthma"
                        ,"asthma exercise induced"
                        ,"asthmatic crisis"
                        ,"atrial fibrillation"
                        ,"autoimmune disorder"
                        ,"back pain"
                        ,"balance disorder"
                        ,"bell's palsy"
                        ,"bipolar disorder"
                        ,"bladder disorder"
                        ,"blister"
                        ,"blood bicarbonate decreased"
                        ,"blood bilirubin abnormal"
                        ,"blood calcium decreased"
                        ,"blood cholesterol increased"
                        ,"blood immunoglobulin e increased"
                        ,"blood iron decreased"
                        ,"blood magnesium decreased"
                        ,"blood ph decreased"
                        ,"blood potassium decreased"
                        ,"blood pressure decreased"
                        ,"blood pressure fluctuation"
                        ,"blood pressure increased"
                        ,"body temperature decreased"
                        ,"body temperature increased"
                        ,"bone pain"
                        ,"boredom"
                        ,"bradycardia"
                        ,"brain fog"
                        ,"burn oral cavity"
                        ,"burning sensation"
                        ,"calculus urinary"
                        ,"cardiac arrest"
                        ,"cardiac failure"
                        ,"cardiac ventricular thrombosis"
                        ,"cardio-respiratory arrest"
                        ,"cardiovascular disorder"
                        ,"cardiovascular insufficiency"
                        ,"cerebral haemorrhage"
                        ,"cerebral thrombosis"
                        ,"cerebrovascular accident"
                        ,"chest discomfort"
                        ,"chest pain"
                        ,"chills"
                        ,"choking"
                        ,"choking sensation"
                        ,"cholestasis"
                        ,"chronic fatigue syndrome"
                        ,"chronic obstructive pulmonary disease"
                        ,"chronic sinusitis"
                        ,"cold sweat"
                        ,"coma"
                        ,"completed suicide"
                        ,"condition aggravated"
                        ,"confusional state"
                        ,"conjunctivitis"
                        ,"connective tissue disorder"
                        ,"constipation"
                        ,"contraindicated product administered"
                        ,"contusion"
                        ,"convulsions local"
                        ,"coombs negative haemolytic anaemia"
                        ,"cough"
                        ,"covid-19"
                        ,"covid-19 immunisation"
                        ,"cystitis"
                        ,"cystitis noninfective"
                        ,"deafness"
                        ,"death"
                        ,"decreased appetite"
                        ,"deep vein thrombosis"
                        ,"dehydration"
                        ,"depressed level of consciousness"
                        ,"depressed mood"
                        ,"depression"
                        ,"dermatitis bullous"
                        ,"dermatitis exfoliative generalised"
                        ,"diarrhoea"
                        ,"diplopia"
                        ,"discomfort"
                        ,"discouragement"
                        ,"disorientation"
                        ,"disseminated intravascular coagulation"
                        ,"disturbance in attention"
                        ,"dizziness"
                        ,"dizziness postural"
                        ,"drug-induced liver injury"
                        ,"drug abuse"
                        ,"drug administered in wrong device"
                        ,"drug dose titration not performed"
                        ,"drug effect incomplete"
                        ,"drug effect less than expected"
                        ,"drug effective for unapproved indication"
                        ,"drug eruption"
                        ,"drug hypersensitivity"
                        ,"drug ineffective"
                        ,"drug ineffective for unapproved indication"
                        ,"drug interaction"
                        ,"drug intolerance"
                        ,"drug level increased"
                        ,"drug reaction with eosinophilia and systemic symptoms"
                        ,"drug screen positive"
                        ,"drug withdrawal syndrome"
                        ,"dry eye"
                        ,"dry mouth"
                        ,"dry skin"
                        ,"dry throat"
                        ,"dyschezia"
                        ,"dysgeusia"
                        ,"dyskinesia"
                        ,"dyspepsia"
                        ,"dysphagia"
                        ,"dysphonia"
                        ,"dyspnoea"
                        ,"dysstasia"
                        ,"ear discomfort"
                        ,"ear pain"
                        ,"ear swelling"
                        ,"eczema"
                        ,"electrocardiogram qrs complex prolonged"
                        ,"electrocardiogram qt prolonged"
                        ,"eosinophilia"
                        ,"epigastric discomfort"
                        ,"epilepsy"
                        ,"epistaxis"
                        ,"erectile dysfunction"
                        ,"erythema"
                        ,"erythema nodosum"
                        ,"euphoric mood"
                        ,"exercise tolerance decreased"
                        ,"expired product administered"
                        ,"exposure during pregnancy"
                        ,"exposure to fungus"
                        ,"exposure to unspecified agent"
                        ,"extra dose administered"
                        ,"extrasystoles"
                        ,"eye allergy"
                        ,"eye discharge"
                        ,"eye infection"
                        ,"eye irritation"
                        ,"eye oedema"
                        ,"eye pain"
                        ,"eye pruritus"
                        ,"eye swelling"
                        ,"face oedema"
                        ,"facial pain"
                        ,"faecal calprotectin increased"
                        ,"faeces discoloured"
                        ,"fall"
                        ,"fatigue"
                        ,"fear"
                        ,"feeding disorder"
                        ,"feeling abnormal"
                        ,"feeling cold"
                        ,"feeling drunk"
                        ,"feeling hot"
                        ,"feeling jittery"
                        ,"feeling of relaxation"
                        ,"flatulence"
                        ,"flushing"
                        ,"foetal death"
                        ,"food allergy"
                        ,"foreign body in mouth"
                        ,"foreign body in throat"
                        ,"fungal infection"
                        ,"gait disturbance"
                        ,"galactorrhoea"
                        ,"gastric disorder"
                        ,"gastric ulcer"
                        ,"gastroenteritis viral"
                        ,"gastrointestinal disorder"
                        ,"gastrointestinal haemorrhage"
                        ,"gastrointestinal tract irritation"
                        ,"gastrooesophageal reflux disease"
                        ,"general physical health deterioration"
                        ,"generalised erythema"
                        ,"generalised tonic-clonic seizure"
                        ,"glycosylated haemoglobin decreased"
                        ,"glycosylated haemoglobin increased"
                        ,"guttate psoriasis"
                        ,"haematemesis"
                        ,"haematochezia"
                        ,"haematotoxicity"
                        ,"haemorrhage"
                        ,"haemorrhage intracranial"
                        ,"hallucination"
                        ,"hallucination, visual"
                        ,"hangover"
                        ,"head discomfort"
                        ,"head injury"
                        ,"headache"
                        ,"heart rate abnormal"
                        ,"heart rate increased"
                        ,"heart rate irregular"
                        ,"helicobacter infection"
                        ,"hepatic cytolysis"
                        ,"hepatic encephalopathy"
                        ,"hepatic enzyme abnormal"
                        ,"hepatic failure"
                        ,"hepatic function abnormal"
                        ,"hepatic pain"
                        ,"hepatitis"
                        ,"hepatocellular injury"
                        ,"hepatotoxicity"
                        ,"hla-b*27 positive"
                        ,"hot flush"
                        ,"hyperacusis"
                        ,"hyperbilirubinaemia"
                        ,"hyperhidrosis"
                        ,"hyperkalaemia"
                        ,"hyperprolactinaemia"
                        ,"hypersensitivity"
                        ,"hypertension"
                        ,"hypertensive crisis"
                        ,"hypoaesthesia"
                        ,"hypocoagulable state"
                        ,"hypogeusia"
                        ,"hypopnoea"
                        ,"hyposmia"
                        ,"hypotension"
                        ,"hypotonia"
                        ,"hypoxia"
                        ,"ileus"
                        ,"ill-defined disorder"
                        ,"illness"
                        ,"immune thrombocytopenia"
                        ,"impaired work ability"
                        ,"imprisonment"
                        ,"inappropriate schedule of product administration"
                        ,"incorrect dosage administered"
                        ,"incorrect dose administered"
                        ,"incorrect product administration duration"
                        ,"incorrect route of product administration"
                        ,"increased appetite"
                        ,"increased upper airway secretion"
                        ,"infection"
                        ,"inflammation"
                        ,"inflammatory bowel disease"
                        ,"influenza"
                        ,"injection site erythema"
                        ,"injection site pruritus"
                        ,"injection site warmth"
                        ,"insomnia"
                        ,"intentional overdose"
                        ,"intentional product misuse"
                        ,"intentional self-injury"
                        ,"internal haemorrhage"
                        ,"international normalised ratio increased"
                        ,"irritability"
                        ,"jaundice"
                        ,"joint range of motion decreased"
                        ,"joint swelling"
                        ,"labyrinthitis"
                        ,"lacrimation increased"
                        ,"laryngeal oedema"
                        ,"lethargy"
                        ,"leukaemia"
                        ,"leukaemia recurrent"
                        ,"leukocytosis"
                        ,"libido decreased"
                        ,"lice infestation"
                        ,"lid lag"
                        ,"ligament disorder"
                        ,"ligament pain"
                        ,"limb discomfort"
                        ,"lip oedema"
                        ,"lip pain"
                        ,"lip swelling"
                        ,"localised oedema"
                        ,"loss of consciousness"
                        ,"lymphadenopathy"
                        ,"madarosis"
                        ,"major depression"
                        ,"malaise"
                        ,"mania"
                        ,"maternal exposure during pregnancy"
                        ,"medication error"
                        ,"mental fatigue"
                        ,"mental impairment"
                        ,"mental status changes"
                        ,"metamorphopsia"
                        ,"middle insomnia"
                        ,"migraine"
                        ,"migraine with aura"
                        ,"miosis"
                        ,"mobility decreased"
                        ,"mood swings"
                        ,"mouth haemorrhage"
                        ,"multiple organ dysfunction syndrome"
                        ,"muscle contractions involuntary"
                        ,"muscle oedema"
                        ,"muscle spasms"
                        ,"muscle twitching"
                        ,"muscular weakness"
                        ,"musculoskeletal chest pain"
                        ,"musculoskeletal discomfort"
                        ,"musculoskeletal pain"
                        ,"musculoskeletal stiffness"
                        ,"myalgia"
                        ,"mydriasis"
                        ,"narcolepsy"
                        ,"nasal congestion"
                        ,"nasal dryness"
                        ,"nasal obstruction"
                        ,"nasal pruritus"
                        ,"nasal septum perforation"
                        ,"nasopharyngitis"
                        ,"nausea"
                        ,"nephrolithiasis"
                        ,"nervous system disorder"
                        ,"nervousness"
                        ,"neuralgia"
                        ,"neuropathy peripheral"
                        ,"neurosis"
                        ,"neutrophil count decreased"
                        ,"nightmare"
                        ,"no adverse event"
                        ,"obstructive airways disorder"
                        ,"ocular hyperaemia"
                        ,"ocular icterus"
                        ,"oedema"
                        ,"oedema genital"
                        ,"oedema peripheral"
                        ,"off label use"
                        ,"oliguria"
                        ,"onycholysis"
                        ,"optic ischaemic neuropathy"
                        ,"oral discomfort"
                        ,"oral mucosal blistering"
                        ,"oral pruritus"
                        ,"oropharyngeal discomfort"
                        ,"oropharyngeal pain"
                        ,"orthostatic hypotension"
                        ,"overdose"
                        ,"oxygen saturation decreased"
                        ,"pain"
                        ,"pain in extremity"
                        ,"pain in jaw"
                        ,"pallor"
                        ,"palpitations"
                        ,"pancytopenia"
                        ,"panic attack"
                        ,"papule"
                        ,"paraesthesia"
                        ,"paraesthesia oral"
                        ,"paranasal sinus discomfort"
                        ,"paranasal sinus hyposecretion"
                        ,"paranoia"
                        ,"parasitic gastroenteritis"
                        ,"parkinsonism"
                        ,"pelvic haemorrhage"
                        ,"periarthritis"
                        ,"periorbital swelling"
                        ,"peripheral swelling"
                        ,"peyronie's disease"
                        ,"pharyngeal oedema"
                        ,"pharyngeal swelling"
                        ,"pharyngitis"
                        ,"photophobia"
                        ,"photosensitivity reaction"
                        ,"piloerection"
                        ,"platelet count abnormal"
                        ,"platelet count decreased"
                        ,"platelet count increased"
                        ,"pneumonia aspiration"
                        ,"poisoning"
                        ,"poisoning deliberate"
                        ,"pollakiuria"
                        ,"polycythaemia"
                        ,"polycythaemia vera"
                        ,"poor quality sleep"
                        ,"postmenopausal haemorrhage"
                        ,"potentiating drug interaction"
                        ,"premature ejaculation"
                        ,"prescribed overdose"
                        ,"presyncope"
                        ,"product administered at inappropriate site"
                        ,"product administration error"
                        ,"product after taste"
                        ,"product commingling"
                        ,"product dose omission issue"
                        ,"product expiration date issue"
                        ,"product formulation issue"
                        ,"product lot number issue"
                        ,"product name confusion"
                        ,"product odour abnormal"
                        ,"product physical issue"
                        ,"product prescribing error"
                        ,"product prescribing issue"
                        ,"product quality issue"
                        ,"product shape issue"
                        ,"product size issue"
                        ,"product solubility abnormal"
                        ,"product substitution issue"
                        ,"product taste abnormal"
                        ,"product use complaint"
                        ,"product use in unapproved indication"
                        ,"product use issue"
                        ,"productive cough"
                        ,"prothrombin time prolonged"
                        ,"pruritus"
                        ,"pruritus generalised"
                        ,"psychiatric symptom"
                        ,"pulmonary embolism"
                        ,"pulmonary sarcoidosis"
                        ,"pulse abnormal"
                        ,"pupillary reflex impaired"
                        ,"pupils unequal"
                        ,"pyrexia"
                        ,"rales"
                        ,"rash"
                        ,"rash erythematous"
                        ,"rash macular"
                        ,"rash maculo-papular"
                        ,"rash maculovesicular"
                        ,"rash pruritic"
                        ,"rash pustular"
                        ,"rash vesicular"
                        ,"reaction to excipient"
                        ,"rectal haemorrhage"
                        ,"renal colic"
                        ,"renal failure"
                        ,"respiration abnormal"
                        ,"respiratory disorder"
                        ,"respiratory distress"
                        ,"respiratory rate increased"
                        ,"restlessness"
                        ,"retching"
                        ,"rhabdomyolysis"
                        ,"rheumatic disorder"
                        ,"rhinalgia"
                        ,"rhinitis allergic"
                        ,"rhinorrhoea"
                        ,"sacral pain"
                        ,"salivary gland enlargement"
                        ,"scar"
                        ,"sciatica"
                        ,"seasonal allergy"
                        ,"sedation"
                        ,"seizure"
                        ,"sinus congestion"
                        ,"sinusitis"
                        ,"skin burning sensation"
                        ,"skin disorder"
                        ,"skin dystrophy"
                        ,"skin haemorrhage"
                        ,"skin induration"
                        ,"skin lesion"
                        ,"skin plaque"
                        ,"skin ulcer"
                        ,"skin wound"
                        ,"sleep disorder"
                        ,"slow speech"
                        ,"sneezing"
                        ,"snoring"
                        ,"somnolence"
                        ,"speech disorder"
                        ,"speech sound disorder"
                        ,"spinal pain"
                        ,"spleen disorder"
                        ,"splenomegaly"
                        ,"stevens-johnson syndrome"
                        ,"stress"
                        ,"stress cardiomyopathy"
                        ,"suicidal ideation"
                        ,"suicide attempt"
                        ,"supraventricular tachycardia"
                        ,"suspected suicide attempt"
                        ,"swelling"
                        ,"swelling face"
                        ,"swelling of eyelid"
                        ,"swollen tongue"
                        ,"syncope"
                        ,"syndactyly"
                        ,"systemic lupus erythematosus"
                        ,"tachycardia"
                        ,"talipes"
                        ,"tenderness"
                        ,"tendon disorder"
                        ,"tendon pain"
                        ,"tension headache"
                        ,"terminal dribbling"
                        ,"terminal state"
                        ,"therapeutic product cross-reactivity"
                        ,"therapeutic product effect decreased"
                        ,"therapeutic product effect incomplete"
                        ,"therapeutic product effect variable"
                        ,"therapeutic product ineffective"
                        ,"therapeutic response unexpected"
                        ,"therapy cessation"
                        ,"therapy change"
                        ,"thinking abnormal"
                        ,"throat irritation"
                        ,"throat tightness"
                        ,"thrombocytopenia"
                        ,"thrombosis"
                        ,"thyroid disorder"
                        ,"tinnitus"
                        ,"tongue pruritus"
                        ,"tongue spasm"
                        ,"tonsillar hypertrophy"
                        ,"tonsillolith"
                        ,"toxic skin eruption"
                        ,"toxicity to various agents"
                        ,"transaminases increased"
                        ,"transient ischaemic attack"
                        ,"treatment failure"
                        ,"tremor"
                        ,"trismus"
                        ,"tuberculosis"
                        ,"underdose"
                        ,"unevaluable event"
                        ,"urge incontinence"
                        ,"urinary incontinence"
                        ,"urinary retention"
                        ,"urinary tract infection"
                        ,"urticaria"
                        ,"vaccination site vesicles"
                        ,"ventricular extrasystoles"
                        ,"ventricular tachycardia"
                        ,"vertigo"
                        ,"victim of chemical submission"
                        ,"violence-related symptom"
                        ,"viral infection"
                        ,"vision blurred"
                        ,"visual impairment"
                        ,"vitamin d deficiency"
                        ,"vitreous floaters"
                        ,"vocal cord disorder"
                        ,"vomiting"
                        ,"weight decreased"
                        ,"weight increased"
                        ,"wheezing"
                        ,"white blood cell count increased"
                        ,"withdrawal syndrome"
                        ,"wound"
                        ,"wrong dose"
                        ,"wrong patient received product"
                        ,"wrong product administered"
                        ,"wrong technique in product usage process"
        },
        "expected_retrieved_pts": { 
                "epistaxis",
                "hypoaesthesia",
                "paranasal sinus hyposecretion",
                "rash erythematous",
                "product quality issue",
                "drug hypersensitivity",
                "pharyngeal swelling",
                "ocular hyperaemia",
                "irritability"

        }
    },
    {
        "query": "List any adverse effects of Pseudoephedrine in infants.",
        "filters": {'age_group': 'infant', 'prod_ai': 'PSEUDOEPHEDRINE'},
        "ground_truth_reactions": set(), 
        "expected_retrieved_pts": set() 
    },
    {
        "query": "What are the psychiatric adverse events for young adults using MONTELUKAST?",
        "filters": {'age_group': 'adolescent', 'prod_ai': 'MONTELUKAST', 'reaction_category': 'Psychiatric'},
        "ground_truth_reactions": set(),
        "expected_retrieved_pts": set()
    },
    {
        "query": "Report any adverse reactions to a fictional drug called 'ZYX-987' for any patient group.",
        "filters": {'prod_ai': 'ZYX-987'}, 
        "ground_truth_reactions": set(), 
        "expected_retrieved_pts": set()
    },
    {
        "query": "Show respiratory reactions to DESLORATADINE in males under 12 years old.",
        "filters": {'sex': 'M', 'age_group': 'child', 'prod_ai': 'DESLORATADINE', 'reaction_category': 'Respiratory'},
        "ground_truth_reactions": set(),
        "expected_retrieved_pts": set()
    },
    # --- Start of 20 New Test Cases ---
    {
        "query": "What are the cardiovascular side effects of CETIRIZINE in elderly patients?",
        "filters": {'age_group': 'elderly', 'prod_ai': 'CETIRIZINE', 'reaction_category': 'Cardiovascular'},
        "ground_truth_reactions": { "angina pectoris",
                    "arrhythmia",
                    "arrhythmia supraventricular",
                    "atrial fibrillation",
                    "atrial tachycardia",
                    "atrioventricular block",
                    "atrioventricular block complete",
                    "bradycardia",
                    "bradyphrenia",
                    "bradypnoea",
                    "cardiac arrest",
                    "cardiac disorder",
                    "cardiac failure",
                    "cardiac failure chronic",
                    "cardiospasm",
                    "electrocardiogram qt prolonged",
                    "heart rate increased",
                    "heart rate irregular",
                    "hypertension",
                    "myocardial infarction",
                    "palpitations",
                    "presyncope",
                    "syncope",
                    "tachycardia",
                    "ventricular tachycardia"
        },
        "expected_retrieved_pts": {
            "palpitations",
            "bradypnoea",
            "atrial tachycardia",
            "ventricular tachycardia",
            "tachycardia"
        }
    },
    {
        "query": "Show neurological reactions for adult males taking LORATADINE.",
        "filters": {'sex': 'M', 'age_group': 'adult', 'prod_ai': 'LORATADINE', 'reaction_category': 'Neurological'},
        "ground_truth_reactions": { "ageusia",
                                    "anosmia",
                                    "asthenia",
                                    "dizziness",
                                    "dizziness postural",
                                    "dyskinesia",
                                    "headache",
                                    "impaired work ability",
                                    "insomnia",
                                    "muscle contractions involuntary",
                                    "muscle oedema",
                                    "muscle spasms",
                                    "muscular weakness",
                                    "musculoskeletal chest pain",
                                    "musculoskeletal discomfort",
                                    "myalgia",
                                    "paraesthesia",
                                    "paranoia",
                                    "parkinsonism",
                                    "rhabdomyolysis",
                                    "somnolence"
        },
        "expected_retrieved_pts": {
            "muscle spasms",
            "somnolence",
            "dizziness",
            "insomnia",
            "paraesthesia"
        }
    },
    {
        "query": "Are there any general systemic disorders reported for infants on DESLORATADINE?",
        "filters": {'age_group': 'infant', 'prod_ai': 'DESLORATADINE', 'reaction_category': 'General/Systemic Disorders'},
        "ground_truth_reactions": set(),
        "expected_retrieved_pts": set()
    },
    {
        "query": "What are the musculoskeletal adverse events for females using FEXOFENADINE?",
        "filters": {'sex': 'F', 'prod_ai': 'FEXOFENADINE', 'reaction_category': 'Musculoskeletal'},
        "ground_truth_reactions": { "arthralgia",
                            "arthritis",
                            "back pain",
                            "costochondritis",
                            "joint swelling",
                            "pain in extremity"
        },
        "expected_retrieved_pts": {
            "arthralgia",
            "costochondritis",
            "arthritis",
            "joint swelling",
            "pain in extremity",
            "back pain"
        }
    },
    {
        "query": "Report product administration issues for any patient taking MONTELUKAST.",
        "filters": {'prod_ai': 'MONTELUKAST', 'reaction_category': 'Product/Administration Issues'},
        "ground_truth_reactions": set(),
        "expected_retrieved_pts": set()
    },
    {
        "query": "What are the most common reactions to CETIRIZINE in adolescent females?",
        "filters": {'sex': 'F', 'age_group': 'adolescent', 'prod_ai': 'CETIRIZINE'}, # No category filter
        "ground_truth_reactions": { "abdominal pain"
                            ,"accidental overdose"
                            ,"accommodation disorder"
                            ,"agitation"
                            ,"amenorrhoea"
                            ,"amnesia"
                            ,"analgesic drug level increased"
                            ,"anaphylactic reaction"
                            ,"asthenia"
                            ,"back pain"
                            ,"bronchial obstruction"
                            ,"bronchitis"
                            ,"c-reactive protein increased"
                            ,"cardio-respiratory arrest"
                            ,"cardiovascular disorder"
                            ,"chest pain"
                            ,"chromaturia"
                            ,"completed suicide"
                            ,"condition aggravated"
                            ,"confusional state"
                            ,"coronavirus infection"
                            ,"dark circles under eyes"
                            ,"decreased appetite"
                            ,"depression"
                            ,"dialysis"
                            ,"diarrhoea"
                            ,"disturbance in attention"
                            ,"dizziness"
                            ,"drug abuse"
                            ,"drug hypersensitivity"
                            ,"drug ineffective"
                            ,"drug ineffective for unapproved indication"
                            ,"drug interaction"
                            ,"dyskinesia"
                            ,"dyspnoea"
                            ,"electrocardiogram qt prolonged"
                            ,"enuresis"
                            ,"epidermolysis bullosa"
                            ,"expired product administered"
                            ,"exposure to toxic agent"
                            ,"extra dose administered"
                            ,"eye pain"
                            ,"eye swelling"
                            ,"fatigue"
                            ,"foreign body in respiratory tract"
                            ,"hallucination, visual"
                            ,"headache"
                            ,"heart rate increased"
                            ,"hospitalisation"
                            ,"hydrothorax"
                            ,"hyperbilirubinaemia"
                            ,"hypertension"
                            ,"hyporesponsive to stimuli"
                            ,"illness"
                            ,"inappropriate schedule of product administration"
                            ,"incoherent"
                            ,"incorrect dose administered"
                            ,"incorrect product administration duration"
                            ,"intentional overdose"
                            ,"intentional product misuse"
                            ,"intentional self-injury"
                            ,"intrusive thoughts"
                            ,"jaundice"
                            ,"leukopenia"
                            ,"loss of consciousness"
                            ,"lymphopenia"
                            ,"mania"
                            ,"maternal exposure during pregnancy"
                            ,"muscle spasms"
                            ,"mydriasis"
                            ,"nausea"
                            ,"off label use"
                            ,"overdose"
                            ,"pallor"
                            ,"palpitations"
                            ,"panic attack"
                            ,"pleural disorder"
                            ,"pneumonia"
                            ,"poisoning"
                            ,"presyncope"
                            ,"product use in unapproved indication"
                            ,"pruritus"
                            ,"pyrexia"
                            ,"rash"
                            ,"renal failure"
                            ,"seizure"
                            ,"sluggishness"
                            ,"somnolence"
                            ,"sopor"
                            ,"suicidal ideation"
                            ,"suicide attempt"
                            ,"swelling face"
                            ,"syncope"
                            ,"tachycardia"
                            ,"thirst"
                            ,"toxic epidermal necrolysis"
                            ,"toxicity to various agents"
                            ,"transaminases increased"
                            ,"tremor"
                            ,"unevaluable event"
                            ,"urine amphetamine positive"
                            ,"victim of chemical submission"
                            ,"vision blurred"
                            ,"visual impairment"
                            ,"vomiting"
                            ,"white blood cell count increased"
                            ,"wrong product administered"
        },
        "expected_retrieved_pts": {
            "pruritus",
            "rash",
            "drug hypersensitivity"
        }
    },
    {
        "query": "Are there any adverse effects for a drug called 'XYZ-123'?",
        "filters": {'prod_ai': 'XYZ-123'}, 
        "ground_truth_reactions": set(),
        "expected_retrieved_pts": set()
    },
    {
        "query": "Show me reactions related to the liver for adult males taking LORATADINE.",
        "filters": {'sex': 'M', 'age_group': 'adult', 'prod_ai': 'LORATADINE', 'reaction_category': 'Other Organ Systems/Conditions'}, 
        "ground_truth_reactions": { "hepatic encephalopathy",
                "hepatic enzyme abnormal",
                "hepatic failure",
                "hepatitis",
                "hepatocellular injury",
                "drug-induced liver injury"
        },
        "expected_retrieved_pts":  {
                "epistaxis",
                "hepatocellular injury",
                "hepatitis",
                "cold sweat",
                "hyperkalaemia",
                "toxicity to various agents",
                "haematochezia"
            }
    },
    {
        "query": "What are the respiratory issues for children using FEXOFENADINE?",
        "filters": {'age_group': 'child', 'prod_ai': 'FEXOFENADINE', 'reaction_category': 'Respiratory'},
        "ground_truth_reactions": { "dyspnoea"
        },
        "expected_retrieved_pts": { "dyspnoea"
        }
    },
    {
        "query": "List allergic reactions to DESLORATADINE in elderly females.",
        "filters": {'sex': 'F', 'age_group': 'elderly', 'prod_ai': 'DESLORATADINE', 'reaction_category': 'Dermatological/Allergic'},
        "ground_truth_reactions": { "eczema",
                "hyperhidrosis",
                "hypersensitivity",
                "pallor",
                "pruritus"
        },
        "expected_retrieved_pts":{
            "hypersensitivity",
            "hyperhidrosis",
            "eczema",
            "pruritus",
            "pallor"
        }
    },
    {
        "query": "Are there any general disorders for adult males taking MONTELUKAST?",
        "filters": {'sex': 'M', 'age_group': 'adult', 'prod_ai': 'MONTELUKAST', 'reaction_category': 'General/Systemic Disorders'},
        "ground_truth_reactions": set(),
        "expected_retrieved_pts": set()
    },
    {
        "query": "What are the gastrointestinal effects of cetirizine in infants?",
        "filters": {'age_group': 'infant', 'prod_ai': 'CETIRIZINE', 'reaction_category': 'Gastrointestinal'},
        "ground_truth_reactions": { "abdominal discomfort",
                    "abdominal distension",
                    "abdominal pain",
                    "abnormal faeces"
        },
        "expected_retrieved_pts": {
                "abdominal pain",
                "abdominal distension",
                "abnormal faeces"
            }
    },
    {
        "query": "Report any psychiatric issues with LORATADINE in children.",
        "filters": {'age_group': 'child', 'prod_ai': 'LORATADINE', 'reaction_category': 'Psychiatric'},
        "ground_truth_reactions": { "abnormal behaviour"
                    ,"aggression"
                    ,"agitation"
                    ,"altered state of consciousness"
                    ,"antisocial behaviour"
                    ,"anxiety"
                    ,"choking"
                    ,"confusional state"
                    ,"delirium"
                    ,"depressed level of consciousness"
                    ,"depression"
                    ,"hallucination"
                    ,"loss of consciousness"
                    ,"panic reaction"
                    ,"psychomotor hyperactivity"
                    ,"self-injurious ideation"
                    ,"shock"
                    ,"suicidal ideation"
                    ,"suicide attempt"
        },
        "expected_retrieved_pts": {
            "antisocial behaviour",
            "suicidal ideation",
            "confusional state",
            "anxiety",
            "psychomotor hyperactivity",
            "abnormal behaviour",
            "agitation",
            "delirium",
            "panic reaction"
        }
    },
    {
        "query": "Are there any cardiovascular reactions for adolescent females using DESLORATADINE?",
        "filters": {'sex': 'F', 'age_group': 'adolescent', 'prod_ai': 'DESLORATADINE', 'reaction_category': 'Cardiovascular'},
        "ground_truth_reactions": set(),
        "expected_retrieved_pts": set()
    },
    {
        "query": "What are the common reactions for adult patients taking FEXOFENADINE?",
        "filters": {'age_group': 'adult', 'prod_ai': 'FEXOFENADINE'}, # no sex or category filter
        "ground_truth_reactions": { "abdominal discomfort"
            ,"abdominal distension"
            ,"abdominal pain"
            ,"abdominal pain upper"
            ,"abnormal dreams"
            ,"abortion spontaneous"
            ,"accidental exposure to product"
            ,"accidental overdose"
            ,"acute generalised exanthematous pustulosis"
            ,"adverse drug reaction"
            ,"affective disorder"
            ,"agitation"
            ,"allergic reaction to excipient"
            ,"alopecia"
            ,"alopecia areata"
            ,"alopecia universalis"
            ,"altered visual depth perception"
            ,"amnesia"
            ,"anaphylactic reaction"
            ,"anger"
            ,"angina pectoris"
            ,"angioedema"
            ,"angle closure glaucoma"
            ,"anxiety"
            ,"apathy"
            ,"aphonia"
            ,"arrhythmia"
            ,"arthralgia"
            ,"arthritis"
            ,"asthenia"
            ,"asthma"
            ,"atrial fibrillation"
            ,"atrial septal defect"
            ,"back pain"
            ,"balance disorder"
            ,"blepharospasm"
            ,"blister"
            ,"blood pressure decreased"
            ,"blood pressure increased"
            ,"bradycardia"
            ,"brain fog"
            ,"breast cancer"
            ,"burning feet syndrome"
            ,"burning sensation"
            ,"c-kit gene mutation"
            ,"chest discomfort"
            ,"chest pain"
            ,"chills"
            ,"choking"
            ,"chromosomal deletion"
            ,"clonic convulsion"
            ,"clostridium difficile infection"
            ,"cognitive disorder"
            ,"cold sweat"
            ,"concomitant disease aggravated"
            ,"condition aggravated"
            ,"confusional state"
            ,"conjunctivitis"
            ,"constipation"
            ,"contraindicated product administered"
            ,"costochondritis"
            ,"cough"
            ,"crying"
            ,"cyanosis"
            ,"cystitis"
            ,"deafness"
            ,"decreased appetite"
            ,"dehydration"
            ,"dependence"
            ,"depressed mood"
            ,"depression"
            ,"depression suicidal"
            ,"dermatitis contact"
            ,"diaphragmalgia"
            ,"diarrhoea"
            ,"discomfort"
            ,"discouragement"
            ,"disorientation"
            ,"disturbance in attention"
            ,"dizziness"
            ,"dizziness postural"
            ,"drug effect less than expected"
            ,"drug effective for unapproved indication"
            ,"drug hypersensitivity"
            ,"drug ineffective"
            ,"drug ineffective for unapproved indication"
            ,"drug interaction"
            ,"drug screen false positive"
            ,"dry eye"
            ,"dry skin"
            ,"dysarthria"
            ,"dysgeusia"
            ,"dyskinesia"
            ,"dyspepsia"
            ,"dysphagia"
            ,"dysphonia"
            ,"dyspnoea"
            ,"dysuria"
            ,"ear pain"
            ,"electrocardiogram qt prolonged"
            ,"embolic stroke"
            ,"emotional distress"
            ,"epiglottic oedema"
            ,"epiglottitis"
            ,"epistaxis"
            ,"erectile dysfunction"
            ,"erythema"
            ,"euphoric mood"
            ,"exercise tolerance decreased"
            ,"expired product administered"
            ,"exposure during pregnancy"
            ,"exposure to unspecified agent"
            ,"extra dose administered"
            ,"extrasystoles"
            ,"eye injury"
            ,"eye irritation"
            ,"eye pain"
            ,"eye swelling"
            ,"eyelid oedema"
            ,"face oedema"
            ,"facial paralysis"
            ,"faeces discoloured"
            ,"fatigue"
            ,"fear of death"
            ,"feeding disorder"
            ,"feeling abnormal"
            ,"feeling cold"
            ,"feeling drunk"
            ,"feeling hot"
            ,"feeling jittery"
            ,"flatulence"
            ,"fluid retention"
            ,"flushing"
            ,"foetal death"
            ,"foetal exposure during pregnancy"
            ,"foreign body"
            ,"foreign body in respiratory tract"
            ,"gait disturbance"
            ,"generalised tonic-clonic seizure"
            ,"genital haemorrhage"
            ,"glossitis"
            ,"grip strength decreased"
            ,"groin pain"
            ,"gynaecomastia"
            ,"haematuria"
            ,"hallucination"
            ,"headache"
            ,"heart rate increased"
            ,"heart rate irregular"
            ,"hepatic failure"
            ,"hepatitis"
            ,"hepatosplenomegaly"
            ,"hot flush"
            ,"hyperhidrosis"
            ,"hypersensitivity"
            ,"hypertension"
            ,"hypertensive crisis"
            ,"hypertonic bladder"
            ,"hypnagogic hallucination"
            ,"hypoacusis"
            ,"hypoaesthesia oral"
            ,"hypocalcaemia"
            ,"hypohidrosis"
            ,"hypokalaemia"
            ,"hyponatraemia"
            ,"hypoplastic left heart syndrome"
            ,"hypothermia"
            ,"ill-defined disorder"
            ,"illness"
            ,"inappropriate schedule of product administration"
            ,"incorrect dose administered"
            ,"incorrect route of product administration"
            ,"infection"
            ,"inflammation"
            ,"influenza"
            ,"initial insomnia"
            ,"injury"
            ,"insomnia"
            ,"intentional overdose"
            ,"irritability"
            ,"joint swelling"
            ,"lacrimation increased"
            ,"leukopenia"
            ,"leukoplakia oral"
            ,"lip dry"
            ,"lip swelling"
            ,"loss of consciousness"
            ,"loss of personal independence in daily activities"
            ,"lower respiratory tract infection"
            ,"lung disorder"
            ,"lymphadenopathy"
            ,"malaise"
            ,"mastocytosis"
            ,"maternal drugs affecting foetus"
            ,"maternal exposure during pregnancy"
            ,"medication error"
            ,"menstruation irregular"
            ,"migraine"
            ,"miosis"
            ,"mood swings"
            ,"mouth swelling"
            ,"mouth ulceration"
            ,"muscle spasms"
            ,"muscle strain"
            ,"muscle twitching"
            ,"musculoskeletal stiffness"
            ,"myalgia"
            ,"mydriasis"
            ,"myoclonus"
            ,"myopathy"
            ,"nasal congestion"
            ,"nasal discomfort"
            ,"nasal inflammation"
            ,"nasal pruritus"
            ,"nausea"
            ,"neck pain"
            ,"negative thoughts"
            ,"nervousness"
            ,"night sweats"
            ,"nightmare"
            ,"no adverse event"
            ,"non-cardiac chest pain"
            ,"odynophagia"
            ,"oedema"
            ,"oedema peripheral"
            ,"oesophageal mucosal tear"
            ,"off label use"
            ,"oral pruritus"
            ,"oropharyngeal pain"
            ,"overdose"
            ,"pain"
            ,"pain in extremity"
            ,"pain of skin"
            ,"palpitations"
            ,"panic attack"
            ,"paraesthesia"
            ,"paranasal sinus discomfort"
            ,"parosmia"
            ,"periorbital pain"
            ,"peripheral swelling"
            ,"pharyngeal erythema"
            ,"pharyngeal swelling"
            ,"pharyngitis streptococcal"
            ,"photophobia"
            ,"photopsia"
            ,"pneumonitis"
            ,"polyneuropathy"
            ,"post-tussive vomiting"
            ,"prescribed overdose"
            ,"product administered to patient of inappropriate age"
            ,"product administration error"
            ,"product complaint"
            ,"product contamination microbial"
            ,"product formulation issue"
            ,"product quality issue"
            ,"product residue present"
            ,"product size issue"
            ,"product solubility abnormal"
            ,"product substitution issue"
            ,"product use complaint"
            ,"product use in unapproved indication"
            ,"productive cough"
            ,"pruritus"
            ,"pruritus generalised"
            ,"psychomotor hyperactivity"
            ,"pyrexia"
            ,"rash"
            ,"rash erythematous"
            ,"rash papular"
            ,"rash pruritic"
            ,"reaction to excipient"
            ,"renal pain"
            ,"rhabdomyolysis"
            ,"rhinorrhoea"
            ,"scleritis"
            ,"seizure"
            ,"self-injurious ideation"
            ,"sense of oppression"
            ,"serotonin syndrome"
            ,"sinus headache"
            ,"sinusitis"
            ,"skin disorder"
            ,"skin exfoliation"
            ,"skin irritation"
            ,"skin lesion"
            ,"skin reaction"
            ,"sleep disorder"
            ,"sleep talking"
            ,"sleep terror"
            ,"sluggishness"
            ,"sneezing"
            ,"somnolence"
            ,"suicidal behaviour"
            ,"suicidal ideation"
            ,"suicide attempt"
            ,"suspected product quality issue"
            ,"swelling"
            ,"swelling face"
            ,"swelling of eyelid"
            ,"swollen tongue"
            ,"syncope"
            ,"systemic mastocytosis"
            ,"systemic scleroderma"
            ,"tachycardia"
            ,"testicular pain"
            ,"therapeutic product effect decreased"
            ,"therapeutic product effect delayed"
            ,"therapeutic product effect incomplete"
            ,"therapeutic response decreased"
            ,"therapeutic response unexpected"
            ,"therapy cessation"
            ,"thinking abnormal"
            ,"throat irritation"
            ,"throat tightness"
            ,"thrombosis"
            ,"tinnitus"
            ,"tongue disorder"
            ,"tremor"
            ,"tryptase increased"
            ,"tympanic membrane perforation"
            ,"upper-airway cough syndrome"
            ,"upper respiratory tract infection"
            ,"urinary retention"
            ,"urticaria"
            ,"urticaria aquagenic"
            ,"urticaria cholinergic"
            ,"urticaria vesiculosa"
            ,"urticarial vasculitis"
            ,"vaccination site vesicles"
            ,"ventricular septal defect"
            ,"vertigo"
            ,"viral infection"
            ,"vision blurred"
            ,"visual impairment"
            ,"vitreous detachment"
            ,"vitreous floaters"
            ,"vomiting"
            ,"wheezing"
            ,"withdrawal syndrome"
            ,"wrong product administered"
            ,"wrong technique in product usage process"
        },
        "expected_retrieved_pts":   {
                "epistaxis",
                "rash pruritic",
                "rash",
                "irritability",
                "erythema",
                "odynophagia",
                "pruritus"
            }
    },
    {
        "query": "List any adverse effects of a combination drug like 'CETIRIZINE\\PSEUDOEPHEDRINE' for any age or sex.",
        "filters": {'prod_ai': 'CETIRIZINE\\PSEUDOEPHEDRINE'},
        "ground_truth_reactions": {"abdominal pain"
        ,"abnormal dreams"
        ,"angioedema"
        ,"anxiety"
        ,"asthenia"
        ,"bezoar"
        ,"blood glucose abnormal"
        ,"blood pressure decreased"
        ,"cardiac flutter"
        ,"chronic spontaneous urticaria"
        ,"cold sweat"
        ,"coma"
        ,"condition aggravated"
        ,"constipation"
        ,"decreased appetite"
        ,"diarrhoea"
        ,"dizziness"
        ,"drug abuse"
        ,"drug effect less than expected"
        ,"drug ineffective"
        ,"drug ineffective for unapproved indication"
        ,"drug screen positive"
        ,"dry mouth"
        ,"dysphonia"
        ,"dysuria"
        ,"epistaxis"
        ,"expired product administered"
        ,"fatigue"
        ,"feeling abnormal"
        ,"feeling hot"
        ,"feeling jittery"
        ,"foreign body in respiratory tract"
        ,"gait disturbance"
        ,"hangover"
        ,"headache"
        ,"heart rate increased"
        ,"hyperhidrosis"
        ,"hypertension"
        ,"hypotension"
        ,"inappropriate schedule of product administration"
        ,"incorrect dose administered"
        ,"incorrect product administration duration"
        ,"insomnia"
        ,"intentional overdose"
        ,"malaise"
        ,"micturition disorder"
        ,"nausea"
        ,"nervousness"
        ,"odynophagia"
        ,"off label use"
        ,"palpitations"
        ,"paradoxical drug reaction"
        ,"paranasal sinus hypersecretion"
        ,"product administered to patient of inappropriate age"
        ,"product dose omission issue"
        ,"product use in unapproved indication"
        ,"pruritus"
        ,"rash macular"
        ,"rectal haemorrhage"
        ,"respiratory disorder"
        ,"self-injurious ideation"
        ,"sinus tachycardia"
        ,"skin burning sensation"
        ,"somnolence"
        ,"stridor"
        ,"suicide attempt"
        ,"swelling face"
        ,"therapeutic product effect decreased"
        ,"therapeutic product effect delayed"
        ,"therapeutic product effect incomplete"
        ,"tinnitus"
        ,"tremor"
        ,"underdose"
        ,"urine amphetamine positive"
        ,"urine output decreased"
        ,"urticaria"
        ,"vertigo"
        ,"vomiting"
        ,"wrong technique in product usage process"},
        "expected_retrieved_pts": {
            "prescription drug used without a prescription",
            "rash pruritic",
            "irritability",
            "erythema",
            "oral pruritus",
            "hidradenitis"
        }
    },
    {
        "query": "Show me musculoskeletal reactions for elderly females using MONTELUKAST.",
        "filters": {'sex': 'F', 'age_group': 'elderly', 'prod_ai': 'MONTELUKAST', 'reaction_category': 'Musculoskeletal'},
        "ground_truth_reactions":set(),
        "expected_retrieved_pts": set()
    },
    {
        "query": "Are there any adverse effects for a drug not in the database, like 'NONEXISTENTDRUG'?",
        "filters": {'prod_ai': 'NONEXISTENTDRUG'},
        "ground_truth_reactions": set(),
        "expected_retrieved_pts": set()
    },
    {
        "query": "What are the most common reactions reported for adolescents using LORATADINE?",
        "filters": {'age_group': 'adolescent', 'prod_ai': 'LORATADINE'},
        "ground_truth_reactions": { "abdominal pain upper"
        ,"accidental exposure to product"
        ,"accidental overdose"
        ,"acute kidney injury"
        ,"aggression"
        ,"alopecia areata"
        ,"alopecia totalis"
        ,"altered state of consciousness"
        ,"amnesia"
        ,"analgesic drug level increased"
        ,"anxiety"
        ,"asthenia"
        ,"biopsy liver"
        ,"blood creatine phosphokinase increased"
        ,"body temperature decreased"
        ,"burn oral cavity"
        ,"cheilitis"
        ,"chemical submission"
        ,"chest discomfort"
        ,"chest pain"
        ,"chills"
        ,"cholestatic liver injury"
        ,"coeliac disease"
        ,"colitis ulcerative"
        ,"condition aggravated"
        ,"confusional state"
        ,"cough"
        ,"depressive symptom"
        ,"diabetes insipidus"
        ,"disorientation"
        ,"dizziness"
        ,"drug abuse"
        ,"drug ineffective"
        ,"drug ineffective for unapproved indication"
        ,"drug interaction"
        ,"drug level increased"
        ,"dry mouth"
        ,"dysphagia"
        ,"dyspnoea"
        ,"dystonia"
        ,"eosinophilia"
        ,"erythema"
        ,"expired product administered"
        ,"exposure to toxic agent"
        ,"extra dose administered"
        ,"eye movement disorder"
        ,"eye swelling"
        ,"facial pain"
        ,"fall"
        ,"fatigue"
        ,"feeling abnormal"
        ,"feeling cold"
        ,"feeling hot"
        ,"foaming at mouth"
        ,"foreign body in respiratory tract"
        ,"gait disturbance"
        ,"gait inability"
        ,"generalised tonic-clonic seizure"
        ,"hallucination"
        ,"hallucination, auditory"
        ,"headache"
        ,"heart rate irregular"
        ,"hypersensitivity"
        ,"hypoaesthesia"
        ,"hypotension"
        ,"idiosyncratic drug reaction"
        ,"inappropriate schedule of product administration"
        ,"incorrect dose administered"
        ,"incorrect product administration duration"
        ,"insomnia"
        ,"intentional overdose"
        ,"intentional product use issue"
        ,"intentional self-injury"
        ,"lethargy"
        ,"leukopenia"
        ,"lip dry"
        ,"lip swelling"
        ,"lower limb fracture"
        ,"lymphocytosis"
        ,"malaise"
        ,"medication error"
        ,"mental disorder"
        ,"metabolic acidosis"
        ,"movement disorder"
        ,"mydriasis"
        ,"myoglobin blood increased"
        ,"nausea"
        ,"neurological examination normal"
        ,"neutropenia"
        ,"no adverse event"
        ,"oedema"
        ,"off label use"
        ,"oral discomfort"
        ,"oropharyngeal pain"
        ,"overdose"
        ,"paranoia"
        ,"persecutory delusion"
        ,"pharyngeal oedema"
        ,"poisoning deliberate"
        ,"product use in unapproved indication"
        ,"pruritus"
        ,"pyrexia"
        ,"rash"
        ,"rash erythematous"
        ,"recalled product"
        ,"recalled product administered"
        ,"restlessness"
        ,"retching"
        ,"road traffic accident"
        ,"salivary hypersecretion"
        ,"secretion discharge"
        ,"seizure"
        ,"skin lesion"
        ,"sluggishness"
        ,"somnambulism"
        ,"somnolence"
        ,"sopor"
        ,"substance use disorder"
        ,"suicide attempt"
        ,"swelling face"
        ,"syncope"
        ,"therapeutic product effect decreased"
        ,"therapy cessation"
        ,"thirst"
        ,"throat irritation"
        ,"toxicity to various agents"
        ,"tubulointerstitial nephritis"
        ,"unresponsive to stimuli"
        ,"urticaria"
        ,"viral rash"
        ,"vomiting"
        ,"white blood cell count decreased"
        ,"wrong technique in product usage process"
        },
        "expected_retrieved_pts": {
                "hypersensitivity",
                "chills",
                "somnambulism",
                "pruritus",
                "therapy cessation",
                "erythema",
                "rash"
            }
    },
    {
        "query": "Report any product issues for children taking DESLORATADINE.",
        "filters": {'age_group': 'child', 'prod_ai': 'DESLORATADINE', 'reaction_category': 'Product/Administration Issues'},
        "ground_truth_reactions": { "expired product administered"
        ,"off label use"
        },
        "expected_retrieved_pts": { "expired product administered"
        ,"off label use"}
    },
    {
        "query": "What are the neurological reactions for adult females using FEXOFENADINE?",
        "filters": {'sex': 'F', 'age_group': 'adult', 'prod_ai': 'FEXOFENADINE', 'reaction_category': 'Neurological'},
        "ground_truth_reactions": {"amnesia"
            ,"aphonia"
            ,"asthenia"
            ,"balance disorder"
            ,"disorientation"
            ,"disturbance in attention"
            ,"dizziness"
            ,"dizziness postural"
            ,"dyskinesia"
            ,"gait disturbance"
            ,"headache"
            ,"insomnia"
            ,"loss of personal independence in daily activities"
            ,"muscle spasms"
            ,"muscle twitching"
            ,"musculoskeletal stiffness"
            ,"myalgia"
            ,"myopathy"
            ,"paraesthesia"
            ,"parosmia"
            ,"polyneuropathy"
            ,"rhabdomyolysis"
            ,"somnolence"},
        "expected_retrieved_pts": {
                "myalgia",
                "muscle spasms",
                "polyneuropathy",
                "dizziness",
                "somnolence",
                "insomnia",
                "paraesthesia",
                "headache",
                "dyskinesia"
            }
    },
    {
        "query": "Show me general disorders for males under 12 years old taking CETIRIZINE.",
        "filters": {'sex': 'M', 'age_group': 'child', 'prod_ai': 'CETIRIZINE', 'reaction_category': 'General/Systemic Disorders'},
        "ground_truth_reactions": { "condition aggravated"
            ,"feeling abnormal"
            ,"oral discomfort"
            ,"tongue discomfort"
        },
        "expected_retrieved_pts": {
            "mood swings",
            "feeling abnormal",
            "oral discomfort",
            "abnormal behaviour",
            "aggression",
            "distractibility",
            "agitation",
            "ocular hypertension",
            "anxiety",
            "headache",
            "anger"
        }
    },
    {
        "query": "Are there any psychiatric adverse events for infants using MONTELUKAST?",
        "filters": {'age_group': 'infant', 'prod_ai': 'MONTELUKAST', 'reaction_category': 'Psychiatric'},
        "ground_truth_reactions": set(),
        "expected_retrieved_pts": set()
    },
    {
        "query": "List all cardiovascular reactions for elderly patients taking LORATADINE.",
        "filters": {'age_group': 'elderly', 'prod_ai': 'LORATADINE', 'reaction_category': 'Cardiovascular'},
        "ground_truth_reactions": { "angina pectoris"
            ,"arrhythmia"
            ,"atrial fibrillation"
            ,"bradyphrenia"
            ,"cardiac disorder"
            ,"cardiac pacemaker insertion"
            ,"electrocardiogram qt prolonged"
            ,"heart rate increased"
            ,"hypertension"
            ,"nodal arrhythmia"
            ,"palpitations"
            ,"presyncope"
            ,"sinus bradycardia"
            ,"syncope"
            ,"tachycardia"
            ,"tachyphrenia"
            ,"torsade de pointes"
        },
        "expected_retrieved_pts": {
        "palpitations",
        "presyncope",
        "tachyphrenia"
    }
	}
]

In [63]:
# data extraction method
def extract_data_from_postgres(db_config, table_name='merged_faers_data_new'): 
    """Extracts data from PostgreSQL and returns a pandas DataFrame."""
    conn = None
    try:
        conn = psycopg2.connect(**db_config)
        cursor = conn.cursor()
        query = f"""
        SELECT primaryid, pt, drugname, prod_ai, age, sex, reaction_category
            FROM {table_name}
            WHERE pt IS NOT NULL AND prod_ai IS NOT NULL
        """
        df = pd.read_sql(query, conn)
        return df
    except Exception as e:
        print(f"Error extracting data from {table_name}: {e}")
        return None
    finally:
        if conn:
            conn.close()

In [64]:
def preprocess_data(df):
    """
    preprocessings for RAG model preparation
    """
    documents = []
    

    if 'reaction_category' in df.columns:
        df['reaction_category'] = df['reaction_category'].replace('[null]', 'Unknown Category').str.strip().str.title()
    
    df.fillna({
        'age': 'unknown', 'sex': 'unknown', 'drugname': 'unknown',
        'prod_ai': 'unknown', 'pt': 'unknown reaction', 'primaryid': 'unknown_id'
    }, inplace=True)

    #  handeling the age group logic 
    df['age_group'] = "unknown age group"
    for index, row in df.iterrows():
        if pd.notna(row['age']) and row['age'] != 'unknown':
            try:
                age_val = float(row['age'])
                if age_val <= 2:
                    df.at[index, 'age_group'] = "infant"
                elif age_val <= 11: 
                    df.at[index, 'age_group'] = "child"
                elif age_val <= 17: 
                    df.at[index, 'age_group'] = "adolescent"
                elif age_val >= 18 and age_val <= 64:
                    df.at[index, 'age_group'] = "adult"
                else: 
                    df.at[index, 'age_group'] = "elderly"
            except ValueError:
                pass

    df['age_group'] = df['age_group'].astype(str)
    df['age_group'] = df['age_group'].fillna('Unknown') 

    required_cols = ['primaryid', 'drugname', 'prod_ai', 'pt', 'reaction_category', 'age', 'sex', 'age_group']
    for col in required_cols:
        if col not in df.columns:
            raise ValueError(f"missing {col}")

    for _, row in df.iterrows():
        document_content = (
            f"Adverse Drug Reaction Report:\n"
            f"Product Active Ingredient: {row['prod_ai']}. "
            f"Drug Name: {row['drugname']}. "
            f"Patient Age: {row['age']} years ({row['age_group']}). "
            f"Patient Sex: {row['sex']}. "
            f"Reported Reaction: {row['pt']}. "
            f"Reaction System Category: {row['reaction_category']}."
        )
        
        metadata = {
            'primaryid': row['primaryid'], 'drugname': row['drugname'], 'prod_ai': row['prod_ai'],
            'pt': row['pt'], 'reaction_category': row['reaction_category'],
            'age': row['age'], 'sex': row['sex'], 'age_group': row['age_group']
        }
        documents.append(Document(text=document_content, metadata=metadata))

    return documents, df

In [65]:
# additional drugname normalization
def normalize_drug_name(drug_name):
    normalized_names = []
    components = drug_name.split('\\')
    for component in components:
        component = component.strip()
        component = re.sub(r' HYDROCHLORIDE', '', component, flags=re.IGNORECASE)
        component = re.sub(r' DIHYDROCHLORIDE', '', component, flags=re.IGNORECASE)
        component = re.sub(r' SULFATE', '', component, flags=re.IGNORECASE)
        component = re.sub(r' SODIUM', '', component, flags=re.IGNORECASE)
        normalized_names.append(component.strip())
    return " ".join(normalized_names).strip()

## Fuzzy Matching for Drug Names

mapping a user’s raw drug name query to the correct `prod_ai` (active ingredient) in the dataset.  
- first checks for exact matches against the known normalized list.  
- if no exact match, uses fuzzy string matching (`fuzz.token_set_ratio`) to handle typos or partial matches.  
- returns the single-ingredient active ingredient where possible (avoiding combination drugs), otherwise the first available match.


In [66]:
def fuzzy_match_drug(query_drug_name_raw, available_prod_ai_for_fuzzy, normalized_prod_ai_map, threshold=90):
    lower_query_drug = query_drug_name_raw.lower()
    
    # checking for the if there is an exact match with the normalized drugname list
    if lower_query_drug in [name.lower() for name in available_prod_ai_for_fuzzy]:
        print(f"DEBUG: Exact match of raw query '{query_drug_name_raw}' to normalized '{lower_query_drug}'.")
        original_ais = normalized_prod_ai_map.get(lower_query_drug.upper(), []) 
        if original_ais:
            single_ingredient_ai = next((ai for ai in original_ais if '\\' not in ai), None)
            return single_ingredient_ai if single_ingredient_ai else original_ais[0]
        return None

    # performing fuzzy matching of the drugname
    best_match_tuple = None
    if available_prod_ai_for_fuzzy:
        # i have tried querying by slightly missspelling the drug name, fuzzy match should handle partial matches
        best_match_tuple = process.extractOne(
            lower_query_drug, 
            available_prod_ai_for_fuzzy, 
            scorer=fuzz.token_set_ratio, 
            score_cutoff=threshold 
        )
    # original active ingredient lookup for matched key
    if best_match_tuple:
        normalized_matched_drug_key = best_match_tuple[0]
        score = best_match_tuple[1]
        
        print(f"DEBUG: matched '{query_drug_name_raw}' (score: {score}) to '{normalized_matched_drug_key}'.")
        
        original_ais = normalized_prod_ai_map.get(normalized_matched_drug_key.upper(), []) 
        # preferring single active ingredient if available
        if original_ais:
            single_ingredient_ai = next((ai for ai in original_ais if '\\' not in ai), None)
            return single_ingredient_ai if single_ingredient_ai else original_ais[0]
    
    print(f"DEBUG: no fuzzy match found '{query_drug_name_raw}' with threshold {threshold}.")
    return None

## Create Embeddings and Build/Load FAISS Index

This function either:
1. loads an existing FAISS index (and associated LlamaIndex stores) from disk, or  
2. builds a new one from scratch using BioBERT embeddings via `LlamaIndexHuggingFaceEmbedding`.


In [67]:
def create_embeddings_and_index(llama_index_documents, model_name, faiss_index_dir, embedding_dimension=768):
    """
    creating Biobert embeddings and build/load the faiss index
    """
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    # embedding model
    embed_model = LlamaIndexHuggingFaceEmbedding(model_name=model_name, device=device)

    faiss_file = os.path.join(faiss_index_dir, "faiss.index")
    docstore_dir = os.path.join(faiss_index_dir, "docstore")
    index_store_dir = os.path.join(faiss_index_dir, "index_store")
    # trying to load the existing faiss index if it exitsts
    if os.path.exists(faiss_file) and os.path.exists(docstore_dir) and os.path.exists(index_store_dir):
        print(f"loading {faiss_index_dir}...")
        try:
            faiss_index = faiss.read_index(faiss_file)
            vector_store = FaissVectorStore(faiss_index=faiss_index)
            # restoring storage context
            storage_context = StorageContext.from_defaults(
                vector_store=vector_store,
                docstore=SimpleDocumentStore.from_persist_dir(docstore_dir),
                index_store=SimpleIndexStore.from_persist_dir(index_store_dir),
            )
            # creating vector index from stored vectors
            index = VectorStoreIndex.from_documents(
                [],
                storage_context=storage_context,
                embed_model=embed_model,
                show_progress=False
            )
            return index, embed_model
        except Exception as e:
            print(f"Error loading FAISS index: {e}. rebuilding index.")
            shutil.rmtree(faiss_index_dir, ignore_errors=True)
            pass
    # case when new FAISS index is created
    print(f"FAISS index not found at {faiss_index_dir}, building new one")
    # flat L2 indexing
    faiss_index = faiss.IndexFlatL2(embedding_dimension)
    vector_store = FaissVectorStore(faiss_index=faiss_index)
    
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    # new vector store
    index = VectorStoreIndex.from_documents(
        llama_index_documents,
        storage_context=storage_context,
        embed_model=embed_model,
        show_progress=True
    )
    print(f"built and saving to {faiss_index_dir}...")
    
    if not os.path.exists(faiss_index_dir):
        os.makedirs(faiss_index_dir)
    faiss.write_index(faiss_index, faiss_file)
    
    index.storage_context.persist(persist_dir=faiss_index_dir)

    return index, embed_model

## Custom LlamaIndex Retriever With Metadata Filtering

The default FAISS + LlamaIndex retrieval doesn’t support filtering by custom metadata fields (e.g., `prod_ai`, `sex`, `age_group`) stored in the dataframe.  
This subclass of `LangchainBaseRetriever` adds a **post-retrieval filtering step**:
- Pulls a large set of top semantic matches (`similarity_top_k=2000`).
- Filters them based on key–value pairs in `current_filters`.
- Returns only the top `desired_k` matches after filtering.


In [68]:
class LlamaIndexFilteredRetriever(LangchainBaseRetriever):
    """
    for post retieval filtering as we have metadata in the dataframe and that is not directly supported by faiss vector store
    """
    llama_index_index: Any = Field(...)
    # final number of results to return after filtering
    desired_k: int = Field(7) 
    current_filters: Dict[str, Any] = Field(default_factory=dict)

    def __init__(self, llama_index_index: Any, desired_k: int = 15, **kwargs: Any): 
        super().__init__(
            llama_index_index=llama_index_index,
            desired_k=desired_k,
            **kwargs
        )

    def _get_relevant_documents(
        self, query: str, *, run_manager=None
        # returning a large batch of smeantically similar matches
    ) -> List[LangchainDocument]:
        base_retriever = self.llama_index_index.as_retriever(
            similarity_top_k=2000
        )
        retrieved_nodes_raw = base_retriever.retrieve(query)
        print(f"DEBUG: initial semantic retrieval returning {len(retrieved_nodes_raw)} nodes.")
        #applying metadata filters if provided
        filtered_nodes = []
        if self.current_filters:
            print(f"DEBUG: applying metadata filters: {self.current_filters}")
            for node_with_score in retrieved_nodes_raw:
                doc = node_with_score.node
                metadata = doc.metadata
                match = True
                #comparing each filter key and value with metadata node
                for key, value in self.current_filters.items():
                    metadata_value = str(metadata.get(key, '')).lower()
                    filter_value = str(value).lower()

                    if metadata_value != filter_value:
                        match = False
                        break
                
                if match:
                    filtered_nodes.append(node_with_score)
            print(f"DEBUG: after filtering, {len(filtered_nodes)} nodes remain.")
        else:
            filtered_nodes = retrieved_nodes_raw
        # limiting the code to desired value
        final_retrieved_nodes = filtered_nodes[:self.desired_k]
        print(f"DEBUG: retrieved nodes after limiting ({self.desired_k}): {len(final_retrieved_nodes)}")
        # converting llamaindex nodes to langchain documents
        langchain_docs = []
        for node_with_score in final_retrieved_nodes:
            langchain_docs.append(LangchainDocument(
                page_content=node_with_score.node.text,
                metadata=node_with_score.node.metadata
            ))
        return langchain_docs

## Parsing Query for Structured Filters

given a free-text user query (e.g., “Show me adverse reactions for elderly female patients taking cetirizine”),  
this function extracts structured filters for:
- **Sex** (`F` or `M`)
- **Age group** (`infant`, `child`, `adolescent`, `adult`, `elderly`)
- **Drug active ingredient** (`prod_ai`) — using both fuzzy matching and keyword extraction
- **Reaction category** — via manual phrase-to-category mapping

The output is a dictionary of filters that can be applied to narrow retrieval results.


In [69]:
def parse_query_for_filters(user_query, available_prod_ai_for_fuzzy, normalized_prod_ai_map):
    # to parse the user query and apply filters to them
    filters = {}
    lower_query = user_query.lower()
    print(f"DEBUG: received query: '{user_query}'")
    print(f"DEBUG: lower_query: '{lower_query}'")

    # sex extraction
    if re.search(r'\bfemale\b|\bwomen\b|\bfemales\b|\bwomens\b', lower_query):
        filters["sex"] = "F"
    elif re.search(r'\bmale\b|\bmen\b|\bmales\b|\bmens\b', lower_query):
        filters["sex"] = "M"

    # age group extraction 
    age_extracted = False
    
    # for conditions when the query specifies over x years old
    over_age_match = re.search(r'(?:over|older than)\s*(\d+)\s*years?\s*old', lower_query)
    if over_age_match:
        age_limit = int(over_age_match.group(1))
        if age_limit >= 60: 
            filters["age_group"] = "elderly"
        elif age_limit >= 18:
            filters["age_group"] = "adult"
        age_extracted = True

    #  for conditions when the query specifies under x years old
    if not age_extracted:
        under_age_match = re.search(r'(?:under|less than)\s*(\d+)\s*years?\s*old', lower_query)
        if under_age_match:
            age_limit_upper_exclusive = int(under_age_match.group(1)) 

            if age_limit_upper_exclusive <= 3: 
                filters["age_group"] = "infant"
            elif age_limit_upper_exclusive <= 12:
                filters["age_group"] = "child"
            elif age_limit_upper_exclusive <= 18: 
                filters["age_group"] = "adolescent"
            elif age_limit_upper_exclusive <= 65: 
                filters["age_group"] = "adult"
            else: 
                filters["age_group"] = "elderly" 
            age_extracted = True

    # if specific age is given
    if not age_extracted:
        exact_age_match = re.search(r'\b(\d+)\s*years?\s*old\b', lower_query)
        if exact_age_match:
            age_val = int(exact_age_match.group(1))
            if age_val <= 2: filters["age_group"] = "infant"
            elif age_val <= 11: filters["age_group"] = "child"
            elif age_val <= 17: filters["age_group"] = "adolescent"
            elif age_val <= 64: filters["age_group"] = "adult"
            else: filters["age_group"] = "elderly"
            age_extracted = True

    # fallback
    if not age_extracted:
        if "infant" in lower_query:
            filters["age_group"] = "infant"
        elif "child" in lower_query or "children" in lower_query or "pediatric" in lower_query:
            filters["age_group"] = "child"
        elif "adolescent" in lower_query or "teen" in lower_query:
            filters["age_group"] = "adolescent"
        elif "adult" in lower_query:
            filters["age_group"] = "adult"
        elif "elderly" in lower_query or "senior" in lower_query or "over 65" in lower_query:
            filters["age_group"] = "elderly"


    potential_drug_name_raw = None
    
    # extracting the drug name
    best_overall_score = 0
    matched_drug_from_list = None

    # the word in the list were being misclassified as drugnames, when I was running the code so added them to be in not in list
    query_words = [word for word in re.findall(r'\b[a-zA-Z0-9-]+\b', lower_query) if word not in ["show", "report", "list", "adverse", "events", "reactions", "to", "for", "any", "patient", "group", "who", "took"]]

    for known_drug_normalized in available_prod_ai_for_fuzzy:
        # checking if known drugname is in the substring with perfect match
        if known_drug_normalized.lower() in lower_query:
            score = 100 
        else:
            # flexible matching
            score = fuzz.token_sort_ratio(known_drug_normalized.lower(), lower_query) 

        # finding a better match
        if score > best_overall_score: 
            best_overall_score = score
            matched_drug_from_list = known_drug_normalized
            
            if score >= 95: 
                break
    
    # strong match found using fuzzy match
    if matched_drug_from_list and best_overall_score >= 80:
        potential_drug_name_raw = matched_drug_from_list
        print(f"DEBUG:  potential drug name (direct fuzzy match in query - score {best_overall_score}): '{potential_drug_name_raw}'")
    else:
        # keyword based extraction as a fallback if fuzzymatch fails
        match_after_keyword = re.search(r'(?:used|taken|with|for|called|drug(?: name)?|to|a|an|the|of)\s+([a-zA-Z0-9\s\\-]+?)(?:\s+(?:in|for|what|common|adverse|reactions|side effects|\?|$|\'|\.|,|$))', lower_query)
        if match_after_keyword:
            keyword_extracted_drug = match_after_keyword.group(1).strip()
            # again using the not in list
            common_query_words_strict = ["show", "report", "list", "drug", "reactions", "adverse", "common", "events", "side", "effects", "what", "to", "a", "an", "the", "of", "patient", "group"]
            if keyword_extracted_drug.lower() not in common_query_words_strict:
                potential_drug_name_raw = keyword_extracted_drug
                print(f"DEBUG:  potential drug name (after keyword fallback): '{potential_drug_name_raw}'")
            else:
                print(f"DEBUG: discarding '{keyword_extracted_drug}' because it is incommon query word.")

    # confirmation of the drugname
    matched_drug_ai = None
    if potential_drug_name_raw:
        # testing with threshold of 90
        matched_drug_ai = fuzzy_match_drug(potential_drug_name_raw, available_prod_ai_for_fuzzy, normalized_prod_ai_map, threshold=90) 
        
    if matched_drug_ai:
        print(f"fuzzy matched '{potential_drug_name_raw}' to active ingredient: {matched_drug_ai}")
        filters['prod_ai'] = matched_drug_ai
    else:
        print(f"DEBUG: no prod_ai matched: '{user_query}'")

    # filtering for the manual category mapping
    found_category = None
    for phrase, category_name in category_mappings.items():
        if re.search(r'\b' + re.escape(phrase) + r'\b', lower_query):
            filters["reaction_category"] = category_name
            found_category = category_name
            break

    print(f"DEBUG: returning filters: {filters}")
    return filters

In [10]:
torch.cuda.empty_cache()

## LLM and Prompt Setup for RAG Pipeline

These are the global variables and constants that are initialized once when `_initialize_rag_components()` is called.  
- Stores references to the LLM instance, retriever, and embedding/index objects for reuse.
- Define a strict prompt template (`QA_CHAIN_PROMPT_CLEAN`) instructing the LLM to extract **only** reaction terms from the retrieved context, without any extra information.


In [72]:
# LLM and prompt setup
# global RAG components
llm_instance = None
_qa_chain_global = None
_dynamic_filtered_retriever_global = None
_available_prod_ai_for_fuzzy_global = None
_normalized_prod_ai_map_global = None

QA_CHAIN_PROMPT_CLEAN = PromptTemplate.from_template(
"""Context:
{context}
Question: {question}
Extract and list ALL unique reported reactions *STRICTLY AND ONLY* from the "Reported Reaction" field within the provided Context.
DO NOT include any other information, such as drug names, patient demographics (age, sex), or reaction categories.
Each reaction must be on a NEW, SEPARATE LINE, prefixed with a hyphen and a single space (e.g., "- reaction name").
DO NOT provide any introductory phrases, concluding remarks, explanations, questions, or code blocks.
If no relevant reactions are found in the Context, respond ONLY with "No reactions found."

Reported Reactions:
"""
)

# global RAG state variables
_rag_initialized = False
_data_df_global = None 
_llama_index_index_global = None 
_llama_index_embed_model_global = None # Store embed model globally
_available_prod_ai_for_fuzzy_global = None 
_normalized_prod_ai_map_global = None 
_dynamic_filtered_retriever_global = None 
_qa_chain_global = None 



In [None]:
## Initializing the RAG components

In [73]:
def _initialize_rag_components():
    """Initializes all heavy RAG components (data, index, LLM chain)."""
    global _rag_initialized, _data_df_global, _llama_index_index_global, \
           _available_prod_ai_for_fuzzy_global, _normalized_prod_ai_map_global, \
           _dynamic_filtered_retriever_global, _qa_chain_global, llm_instance, \
           _llama_index_embed_model_global # Added embed model to global

    if _rag_initialized:
        print("RAG components already initialized.")
        return

    print("Initializing RAG components for the first time...")
    
    # load data if it exists already
    if os.path.exists(PREPROCESSED_DATA_CSV):
        print(f"Loading preprocessed data from {PREPROCESSED_DATA_CSV}...")
        try:
            data_df_preprocessed = pd.read_csv(PREPROCESSED_DATA_CSV)
            # converting each row of dataframe to llamaindex document object
            processed_documents = []
            for _, row in data_df_preprocessed.iterrows():
                doc_text = (
                    f"Adverse Drug Reaction Report:\n"
                    f"Product Active Ingredient: {row['prod_ai']}. "
                    f"Drug Name: {row['drugname']}. "
                    f"Patient Age: {row['age']} years ({row['age_group']}). "
                    f"Patient Sex: {row['sex']}. "
                    f"Reported Reaction: {row['pt']}. "
                    f"Reaction System Category: {row['reaction_category']}."
                )
                metadata = {
                    'primaryid': row['primaryid'], 'drugname': row['drugname'], 'prod_ai': row['prod_ai'],
                    'pt': row['pt'], 'reaction_category': row['reaction_category'],
                    'age': row['age'], 'sex': row['sex'], 'age_group': row['age_group']
                }
                processed_documents.append(Document(text=doc_text, metadata=metadata))
            _data_df_global = data_df_preprocessed # Assign the loaded DF to global
            print("Preprocessed data loaded successfully from CSV.")
        except Exception as e:
            print(f"Error loading preprocessed data from CSV: {e}. Falling back to PostgreSQL extraction and reprocessing.")
            # if CSV loading fails, extract from DB
            data_df_raw = extract_data_from_postgres(DB_CONFIG, table_name='merged_faers_data_new')
            if data_df_raw is None:
                raise Exception("Failed to extract raw data from PostgreSQL. Check DB_CONFIG and database status.")
            # simplified preprocess_data call
            processed_documents, _data_df_global = preprocess_data(data_df_raw.copy())
            # saving to csv after reprocessing
            _data_df_global.to_csv(PREPROCESSED_DATA_CSV, index=False)
            print(f"Reprocessed data and saved to {PREPROCESSED_DATA_CSV}.")
    else:
        # if csv is not found, extracting fully from the database
        data_df_raw = extract_data_from_postgres(DB_CONFIG, table_name='merged_faers_data_new')
        if data_df_raw is None:
            raise Exception("Failed to extract raw data from PostgreSQL. Check DB_CONFIG and database status.")
        processed_documents, _data_df_global = preprocess_data(data_df_raw.copy())
        _data_df_global.to_csv(PREPROCESSED_DATA_CSV, index=False)
        print(f"Preprocessed data and saved to {PREPROCESSED_DATA_CSV}.")

    if processed_documents is None or _data_df_global is None:
        raise Exception("Failed to prepare data for RAG components.")

    # normalized drugname mapping for fuzzy search
    available_prod_ai = _data_df_global['prod_ai'].unique().tolist()
    _normalized_prod_ai_map_global = {}
    for original_ai in _data_df_global['prod_ai'].unique():
        components_to_normalize = original_ai.split('\\')
        for comp in components_to_normalize:
            normalized_base_name = normalize_drug_name(comp)
            if normalized_base_name:
                normalized_base_name = normalized_base_name.upper()
                if normalized_base_name not in _normalized_prod_ai_map_global:
                    _normalized_prod_ai_map_global[normalized_base_name] = []
                if original_ai not in _normalized_prod_ai_map_global[normalized_base_name]:
                    _normalized_prod_ai_map_global[normalized_base_name].append(original_ai)
    _available_prod_ai_for_fuzzy_global = list(_normalized_prod_ai_map_global.keys())

    # creating/loading embeddings and FAISS vectorIndex
    _llama_index_index_global, _llama_index_embed_model_global = create_embeddings_and_index(
        processed_documents, BIOBERT_MODEL_NAME, VECTOR_DB_PATH, embedding_dimension=768
    )
    if _llama_index_index_global is None:
        raise Exception("Failed to create/load LlamaIndex. Check model paths and disk space.")

    # metatdata filtered retriever initialization
    _dynamic_filtered_retriever_global = LlamaIndexFilteredRetriever(
        llama_index_index=_llama_index_index_global,
        desired_k=15
    )

    # initializing the llm and binding to the retrievalqa chain
    # the stop sequences were built by observing the output from the llm multiple time so that the final output came as desired

    llm_instance = LlamaCpp(
        model_path=LLAMA3_MODEL_PATH,
        temperature=0.2, 
        max_tokens=500, 
        n_ctx=2048,
        n_gpu_layers=-1, 
        verbose=False,
        stop=[
            "```", "\n```", "\n\n```", "```python", "```json", "```text", "```yaml", "```bash",
            "\n\nNote:", "\nNote:", 
            "\n\nAnswer:", "\nAnswer:",
            "\n\nExplanation:", "\nExplanation:",
            "\n\nResponse:", "\nResponse:",
            "\n\nBased on the provided Context,",
            "\n\nList all reported reactions",
            "List of Adverse Events for Males over 60 years old who took Fexofenadine:", 
            "No further information is required.", 
            "The list above already contains all the relevant reactions.", 
            "\"No reactions found.\" was removed from the output as it is not applicable to this problem.",
            "\"No reactions found.\" was removed from the output", 
            "not applicable to this problem.",
            "was removed from the output",
            "It should be removed from the list.",
            "The list is now correct.",
            "There are ", 
            "\n\nIf no relevant reactions are found",
            "\n\nTherefore, the Answer is correct.",
            "\n\nThis indicates that the system has correctly identified",
            "\n\nFinally, the system has correctly prefixed",
            "\n- dermatological/Allergic.", 
            "\n- respiratory.", 
            "\n- neurological.",
            "\n\nI hope this helps.",
            "\n\nLet me know if you have any other questions.",
            "\n\nIf there are no reactions found,",
            "\n\nHere are the reactions:",
            "\n\nHere is a list of reactions:",
            "\n\nHere's the information:",
            "adverse reaction report,
            "\n\n\n\n",
        ],
    )
    if torch.cuda.is_available():
        print(f"CUDA is available. LlamaCpp will attempt to use GPU with n_gpu_layers={llm_instance.n_gpu_layers}.")
        print(f"Current CUDA device: {torch.cuda.get_device_name(0)}")
    else:
        print("CUDA is NOT available. LlamaCpp will run on CPU. This may be slow for large models.")

    _qa_chain_global = RetrievalQA.from_chain_type(
        llm_instance, 
        retriever=_dynamic_filtered_retriever_global,
        return_source_documents=True,
        chain_type="stuff",
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT_CLEAN, "document_variable_name": "context"} 
    )
    
    _rag_initialized = True
    print("RAG components initialized successfully.")

## Extracting and Cleaning Reaction Terms from LLM Output

After the LLM generates its list of reactions, this function:
- Parses bullet-point lines (`"- reaction"`) from the output.
- Filters out common metadata terms that are **not** actual reactions (e.g., patient age, sex, categories).
- Aggressively cleans formatting artifacts from the LLM output.


In [74]:
# function for extracting and filtering reactions (used by run_rag_query and for direct console output)
def _extract_and_filter_reactions(llm_output: str) -> set:
    cleaned_reactions = set()
    lines = llm_output.split('\n')

    # common non-reaction metadata terms to filter out (case-insensitive)
    non_reaction_keywords = {
        "patient age", "patient sex", "drug name", "product active ingredient",
        "reaction system category", "unknown patient age", "unknown patient sex",
        "unknown reaction system category", "prescription drug used without a prescription",
        "cetirizine 10 mg tablets", "cetirizine hydrochloride", 
        "adult patient", "elderly patient", "female patient", "male patient",
        "product/administration issues", "dermatological/allergic", "neurological",
        "gastrointestinal", "psychiatric", "cardiovascular", "respiratory",
        "general/systemic disorders", "musculoskeletal", "other organ systems/conditions","adverse reaction report",
    }

    for cat_phrase in category_mappings.keys():
        non_reaction_keywords.add(cat_phrase.lower())
    for cat_value in category_mappings.values():
        non_reaction_keywords.add(cat_value.lower())
    # grabbing the known preferred terms from the global dataframe for normalizatoin
    global _data_df_global
    known_pts = set(_data_df_global['pt'].str.lower().unique()) if _data_df_global is not None else set()
    # looping through the LLM output lines and only looking at those that start with "-"
    for line in lines:
        line = line.strip()
        if line.startswith('- ') and len(line) > 2:
            potential_reaction = line[2:].strip().lower()

            is_non_reaction = False
            # skipping if non reaction keyword

            for keyword in non_reaction_keywords:
                if keyword in potential_reaction:
                    is_non_reaction = True
                    break
            # cleanups
            if not is_non_reaction:
                potential_reaction = re.sub(r'\s*\|\s*$', '', potential_reaction).strip() 
                potential_reaction = re.sub(r'\s+', ' ', potential_reaction).strip() 

                # cleanup the LLM formatting artifacts
                if known_pts: 
                    best_match, score = process.extractOne(potential_reaction, known_pts, scorer=fuzz.ratio) # using fuzz.ration for exactness
                    if score >= 90: # high threshold for confident normalization
                        cleaned_reactions.add(best_match)
                    else:
                        # if no strong match keeping as it is

                        cleaned_reactions.add(potential_reaction)
                else:
                    # if not a known pt, keeping the clear reaction term
                    cleaned_reactions.add(potential_reaction)
    return cleaned_reactions


## Method to run the RAG model

In [75]:
def run_rag_query(query: str, progress: Any = None) -> Dict[str, str]:
    """
    processes a user query through the RAG model and returns
    the extracted reactions and source metadata. 
    """
    
    if progress and hasattr(progress, 'update'):
        progress.update(0, desc="Initializing RAG components (if first run).")
    
    _initialize_rag_components() 
    # default fallbacks
    model_response = "Error: Something went wrong."
    sources_used_text = "No sources available."

    try:
        # parsing filter from the query
        if progress and hasattr(progress, 'update'):
            progress.update(0.2, desc="Parsing query and extracting filters.")
        # extracting the search filters
        retriever_filters = parse_query_for_filters(query, _available_prod_ai_for_fuzzy_global, _normalized_prod_ai_map_global)
        # if valid drug is not found, setting up a custom response
        if 'prod_ai' not in retriever_filters:
            model_response = "No reactions found for this drug. The drug name was not recognized or is not in the database."
            sources_used_text = "No source documents were retrieved as the drug was not recognized."
            return model_response, sources_used_text 
        # document retrieval
        if progress and hasattr(progress, 'update'):
            progress.update(0.4, desc="Retrieving relevant documents.")
        # applying the filters to retrieve and fetch the matching documnets
        _dynamic_filtered_retriever_global.current_filters = retriever_filters
        initial_retrieved_docs_for_llm = _dynamic_filtered_retriever_global._get_relevant_documents(query)
        # condition if no matching document is found
        if not initial_retrieved_docs_for_llm:
            model_response = "No reactions found for the specified criteria."
            sources_used_text = "No source documents were returned as no relevant documents were found after filtering."
            return model_response, sources_used_text 
        # llm generation stage
        if progress and hasattr(progress, 'update'):
            progress.update(0.7, desc="Generating response with LLM.")
        # passing to the global qa chain to get an extraction
        result = _qa_chain_global({"query": query})
        # post-processing of the result
        if progress and hasattr(progress, 'update'):
            progress.update(0.9, desc="Post-processing LLM output.")
        # raw llm outputs
        raw_llm_result = result["result"]
        # cleaning and filtering the extracted reactions
        generated_reactions = _extract_and_filter_reactions(raw_llm_result)
        # sorting and formatting as bullet points
        final_response_list = sorted(list(generated_reactions))
        if final_response_list:
            model_response = "\n".join([f"- {r}" for r in final_response_list])
        else:
            if "no reactions found" in raw_llm_result.lower():
                model_response = "No reactions found for the specified criteria."
            else:
                model_response = "No reactions found in the provided context documents."

        # building a reliable list of source documents
        if "source_documents" in result and result["source_documents"]:
            sources_list = []
            for i, doc in enumerate(result["source_documents"]):
                sources_list.append(f"Document {i+1}:\n  Content snippet: {doc.page_content[:200]}...\n  Metadata: {doc.metadata}")
            sources_used_text = "\n\n".join(sources_list)
        else:
            sources_used_text = "No source documents were returned."

    except Exception as e:
        model_response = f"An internal error occurred: {str(e)}"
        sources_used_text = "No sources available due to error."
        print(f"Error in run_rag_query: {e}")
    # completion phase
    if progress and hasattr(progress, 'update'):
        progress.update(1.0, desc="Done!")
    return model_response, sources_used_text 

# evaluation metrics initialization
bertscore = load("bertscore")
rouge = load("rouge")
bleu = load("bleu")

RAG model evaluation block

In [76]:
def evaluate_rag_model(qa_chain, retriever_instance, test_cases): 
    """
    evaluates the RAG model on a set of defined test cases defined i
    """
    print("\n--- Starting RAG Model Evaluation ---")

    # RAG components are assumed to be initialized globally before calling 
    print("RAG components ready for evaluation.")

    # variables to store the test case metrics to calculate the average
    all_retrieval_precisions = []
    all_retrieval_recalls = []
    all_hit_rates = []
    all_bert_f1_scores = []
    all_rouge_l_scores = []
    all_bleu_scores = []
    # looping through each test cases and evaluating everyone
    for i, test_case in enumerate(test_cases):
        print(f"\n--- Running Test Case {i+1}/{len(test_cases)} ---")
        query = test_case["query"]
        ground_truth_reactions = test_case["ground_truth_reactions"]
        expected_retrieved_pts = test_case["expected_retrieved_pts"]
        # debug to print what is being evaluated
        print(f"Query: {query}")
        print(f"Ground Truth Reactions: {ground_truth_reactions}")
        print(f"Expected Retrieved PTs (for retrieval eval): {expected_retrieved_pts}")

        # parsing the query to extract filters
        retriever_filters = parse_query_for_filters(query, _available_prod_ai_for_fuzzy_global, _normalized_prod_ai_map_global)
        
        model_response = "Error: Something went wrong."
        sources_used_text = "No sources available."
        actual_retrieved_docs_for_llm = [] 

        try:
            # if a valid prod_ai is not found, skipping the retrieval and generation, here the actual retrieved pts will be an empty set
            if 'prod_ai' not in retriever_filters:
                model_response = "No reactions found for this drug. The drug name was not recognized or is not in the database."
                sources_used_text = "No source documents were retrieved as the drug was not recognized."
               
                # apply filters to the retriever instance
                retriever_instance.current_filters = retriever_filters
                # retrieving the relevant documents
                actual_retrieved_docs_for_llm = retriever_instance._get_relevant_documents(query)

                if not actual_retrieved_docs_for_llm:
                    model_response = "No reactions found for the specified criteria."
                    sources_used_text = "No source documents were returned as no relevant documents were found after filtering."
                else:
                    # generating the response from the llm using QA chain
                    result = qa_chain({"query": query})
                    raw_llm_result = result["result"]
                    
                    # extracting and cleaning up the generated outputs
                    generated_reactions = _extract_and_filter_reactions(raw_llm_result)
                    
                    final_response_list = sorted(list(generated_reactions))
                    if final_response_list:
                        model_response = "\n".join([f"- {r}" for r in final_response_list])
                    else:
                        if "no reactions found" in raw_llm_result.lower():
                            model_response = "No reactions found for the specified criteria."
                        else:
                            model_response = "No reactions found in the provided context documents."

                    if "source_documents" in result and result["source_documents"]:
                        sources_list = []
                        for j, doc in enumerate(result["source_documents"]):
                            sources_list.append(f"Document {j+1}:\n  Content snippet: {doc.page_content[:200]}...\n  Metadata: {doc.metadata}")
                        sources_used_text = "\n\n".join(sources_list)
                    else:
                        sources_used_text = "No source documents were returned."

        except Exception as e:
            model_response = f"An internal error occurred: {str(e)}"
            sources_used_text = "No sources available due to error."
            print(f"Error during RAG execution in evaluate_rag_model: {e}")

        # checking the model response for manual review
        print(f"\nModel Response:\n{model_response}")
        print(f"\nSources Used:\n{sources_used_text}")

        # collecting the retrieved pts for retrieval metrics
        retrieved_pts_from_rag = set()
        for doc in actual_retrieved_docs_for_llm: 
            if 'pt' in doc.metadata:
                retrieved_pts_from_rag.add(doc.metadata['pt'].lower())

        print(f"Actual Retrieved PTs (from RAG sources): {retrieved_pts_from_rag}")

        # extracting the generated reactoins for final response
        generated_reactions_for_eval = _extract_and_filter_reactions(model_response) 
        print(f"Generated Reactions (from LLM output): {generated_reactions_for_eval}")

        # Metric calculations
        if len(retrieved_pts_from_rag) > 0:
            relevant_retrieved_count = len(expected_retrieved_pts.intersection(retrieved_pts_from_rag))
            retrieval_precision = relevant_retrieved_count / len(retrieved_pts_from_rag)
            all_retrieval_precisions.append(retrieval_precision)
        else:
            retrieval_precision = 0.0 # no documents retrieved, so 0 precision
            all_retrieval_precisions.append(retrieval_precision)
        print(f"Retrieval Precision: {retrieval_precision:.2f}")

        # Context Recall
        if len(expected_retrieved_pts) > 0:
            relevant_retrieved_count = len(expected_retrieved_pts.intersection(retrieved_pts_from_rag))
            retrieval_recall = relevant_retrieved_count / len(expected_retrieved_pts)
            all_retrieval_recalls.append(retrieval_recall)
        else:
            # if no ground truth relevant docs, recall is 1 if no docs retrieved, 0 otherwise
            retrieval_recall = 1.0 if len(retrieved_pts_from_rag) == 0 else 0.0
            all_retrieval_recalls.append(retrieval_recall)
        print(f"Retrieval Recall: {retrieval_recall:.2f}")

        # Hit Rate
        hit_rate = 1 if len(expected_retrieved_pts.intersection(retrieved_pts_from_rag)) > 0 else 0
        all_hit_rates.append(hit_rate)
        print(f"Hit Rate: {hit_rate}")

        # generation metrics
        predictions_list_for_bert = list(generated_reactions_for_eval)
        references_list_for_bert = list(ground_truth_reactions)

        if not predictions_list_for_bert or not references_list_for_bert:
            print("Skipping BERTScore, ROUGE, BLEU for this test case due to empty generated/ground truth reactions.")
            all_bert_f1_scores.append(0.0)
            all_rouge_l_scores.append(0.0)
            all_bleu_scores.append(0.0)
        else:
            try:
                results_bert = bertscore.compute(predictions=predictions_list_for_bert, references=[references_list_for_bert]*len(predictions_list_for_bert), lang="en")
                bert_f1 = sum(results_bert['f1']) / len(results_bert['f1']) 
                all_bert_f1_scores.append(bert_f1)
                print(f"BERTScore F1: {bert_f1:.2f}")
            except Exception as e:
                print(f"Error calculating BERTScore: {e}")
                all_bert_f1_scores.append(0.0)

            single_prediction_str = " ".join(sorted(predictions_list_for_bert))
            single_reference_str = " ".join(sorted(references_list_for_bert))

            try:
                results_rouge = rouge.compute(predictions=[single_prediction_str], references=[single_reference_str])
                rouge_l = results_rouge['rougeL']
                all_rouge_l_scores.append(rouge_l)
                print(f"ROUGE-L: {rouge_l:.2f}")
            except Exception as e:
                print(f"Error calculating ROUGE: {e}")
                all_rouge_l_scores.append(0.0)

            try:
                tokenized_prediction = single_prediction_str.split()
                tokenized_reference = single_reference_str.split()

                results_bleu = bleu.compute(predictions=[tokenized_prediction], references=[[tokenized_reference]])
                bleu_score = results_bleu['bleu']
                all_bleu_scores.append(bleu_score)
                print(f"BLEU Score: {bleu_score:.2f}")
            except Exception as e:
                print(f"Error calculating BLEU: {e}")
                all_bleu_scores.append(0.0)

    print("\n--- Overall RAG Model Evaluation Results ---")
    if all_retrieval_precisions:
        print(f"Average Retrieval Precision: {sum(all_retrieval_precisions) / len(all_retrieval_precisions):.2f}")
    if all_retrieval_recalls:
        print(f"Average Retrieval Recall: {sum(all_retrieval_recalls) / len(all_retrieval_recalls):.2f}")
    if all_hit_rates:
        print(f"Average Hit Rate: {sum(all_hit_rates) / len(all_hit_rates):.2f}")
    if all_bert_f1_scores:
        print(f"Average BERTScore F1: {sum(all_bert_f1_scores) / len(all_bert_f1_scores):.2f}")
    if all_rouge_l_scores:
        print(f"Average ROUGE-L: {sum(all_rouge_l_scores) / len(all_rouge_l_scores):.2f}")
    if all_bleu_scores:
        print(f"Average BLEU Score: {sum(all_bleu_scores) / len(all_bleu_scores):.2f}")

    return {}

### Main method to execute the pipeline

In [80]:
# main method block for direct console interaction or evaluation 
if __name__ == "__main__":
    print("\n--- Starting RAG Model ---")
    print("Initializing RAG components (this may take a few minutes on first run)...")
    try:
        # initializing global components
        _initialize_rag_components() 
    except Exception as e:
        # failure in initialization case
        print(f"FATAL ERROR during RAG initialization: {e}")
        sys.exit(1) 
    # prompt for using the model (to evaluate or in the console mode)
    print("\n--- RAG System Ready. Choose mode: 'console' or 'evaluate' ---")
    mode = input("Enter 'console' for direct interaction or 'evaluate' to run evaluation: ").lower()

    if mode == 'console':
        print("\n--- Starting Console Interaction Mode ---")
        while True:
            user_query = input("\nEnter your query (e.g., 'What are common adverse reactions in adult males that used cetirizine?'):\n")
            if user_query.lower() == 'exit':
                break
            
         
            model_response, sources_used_text = run_rag_query(user_query, progress=type('obj', (object,), {'update' : lambda *args, **kwargs: None})())

            print("\n--- Model Response ---")
            print(model_response)

            print("\n--- Sources Used (Metadata) ---")
            print(sources_used_text)
    # running the pre-defined set of test cases
    elif mode == 'evaluate':
        print("\n--- Starting Automated RAG Model Evaluation ---")
        evaluation_results = evaluate_rag_model(
            _qa_chain_global, 
            _dynamic_filtered_retriever_global, 
            test_cases
        )
        print("\n--- Automated RAG Model Evaluation Complete ---")


    else:
        print("Invalid mode selected. Exiting.")



--- Starting RAG Model ---
Initializing RAG components (this may take a few minutes on first run)...
RAG components already initialized.

--- RAG System Ready. Choose mode: 'console' or 'evaluate' ---


Enter 'console' for direct interaction or 'evaluate' to run evaluation:  evaluate



--- Starting Automated RAG Model Evaluation ---

--- Starting RAG Model Evaluation ---
RAG components ready for evaluation.

--- Running Test Case 1/32 ---
Query: What skin reactions are reported for adult females who used desloratadine?
Ground Truth Reactions: {'pruritus', 'hyperhidrosis', 'rash erythematous', 'photosensitivity reaction', 'erythema nodosum', 'pallor', 'skin plaque', 'hypersensitivity', 'skin ulcer', 'dermatitis exfoliative generalised', 'generalised erythema', 'skin wound', 'skin disorder', 'pruritus generalised', 'rash maculo-papular', 'guttate psoriasis', 'erythema', 'rash', 'skin dystrophy', 'skin lesion'}
Expected Retrieved PTs (for retrieval eval): {'rash erythematous', 'photosensitivity reaction', 'rash maculo-papular', 'hypersensitivity', 'erythema', 'skin plaque', 'rash', 'skin dystrophy'}
DEBUG: received query: 'What skin reactions are reported for adult females who used desloratadine?'
DEBUG: lower_query: 'what skin reactions are reported for adult females 

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BERTScore F1: 1.00
ROUGE-L: 0.56
Error calculating BLEU: Predictions and/or references don't match the expected format.
Expected format:
Feature option 0: {'predictions': Value(dtype='string', id='sequence'), 'references': Sequence(feature=Value(dtype='string', id='sequence'), length=-1, id='references')}
Feature option 1: {'predictions': Value(dtype='string', id='sequence'), 'references': Value(dtype='string', id='sequence')},
Input predictions: ['erythema', 'erythema', 'nodosum', ..., 'erythematous', 'rash', 'maculo-papular'],
Input references: [['dermatitis', 'exfoliative', 'generalised', 'erythema', 'erythema', 'nodosum', 'generalised', 'erythema', 'guttate', 'psoriasis', 'hyperhidrosis', 'hypersensitivity', 'pallor', 'photosensitivity', 'reaction', 'pruritus', 'pruritus', 'generalised', 'rash', 'rash', 'erythematous', 'rash', 'maculo-papular', 'skin', 'disorder', 'skin', 'dystrophy', 'skin', 'lesion', 'skin', 'plaque', 'skin', 'ulcer', 'skin', 'wound']]

--- Running Test Case 2/32