Run few shot inference against EHR FHIR PostGre SQL database












In [None]:
from google.colab import drive
drive.mount('/content/drive')

DATA_PATH = "/content/drive/MyDrive/210_Capstone/210_Data/210_DataSets/210_DataSets-EHR"
DEV_PATH = "/content/drive/MyDrive/210_Capstone/210_Factory/210_dev"
FAISS_DB_PATH = DEV_PATH + "/vectorstores/medintellagent_faiss_v1"
POSTGRES_DB_PATH = DEV_PATH + "/synthea_ehr_backup.sql"
LLM_MODEL = "gpt-4o-mini"
EMBEDDING_MODEL = "text-embedding-3-large"



Mounted at /content/drive


In [None]:
%%capture
!pip -q install --upgrade openai langchain langchain-community langchain-openai faiss-cpu


In [None]:
# Database configuration
DB_NAME = "synthea_ehr"
DB_USER = "postgres"
DB_PASSWORD = "postgres"
DB_HOST = "localhost"

# LLM Configuration
LLM_MODEL = "gpt-4o-mini"
EMBEDDING_MODEL = "text-embedding-3-large"

In [None]:
import os
try:
    from google.colab import userdata
    key = userdata.get('OPENAI_API_KEY')
    if key: os.environ['OPENAI_API_KEY'] = key
except Exception:
    pass

if not os.environ.get("OPENAI_API_KEY"):
    import getpass
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter OPENAI_API_KEY: ")

# Initialize OpenAI client
from openai import OpenAI
client = OpenAI()

In [None]:
from google.colab import drive
import os
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

# Define the vector store path
vectorstore_path = "/content/drive/MyDrive/210_Capstone/210_Factory/210_dev/vectorstores/medintellagent_faiss_v1.backup_20251009_140444"

# Perform pre-flight checks before loading
if not os.path.isdir(vectorstore_path):
    print(f"‚ùå FATAL ERROR: The directory '{vectorstore_path}' does not exist.")

else:
    # The LangChain FAISS loader expects a folder path and looks for index.faiss inside it
    faiss_index_file = os.path.join(vectorstore_path, "index.faiss")

    if not os.path.isfile(faiss_index_file):
        print(f"‚ùå FATAL ERROR: The file '{faiss_index_file}' was not found inside the directory.")
    else:
        print("‚úÖ Checks passed. Directory and index file found. Attempting to load...")
        try:
            embeddings = OpenAIEmbeddings()

            # Load the vector store
            db = FAISS.load_local(vectorstore_path, embeddings, allow_dangerous_deserialization=True)

            print(" Vector store loaded successfully!")

        except Exception as e:
            print(f"An error occurred during Faiss loading: {e}")

‚úÖ Checks passed. Directory and index file found. Attempting to load...


  embeddings = OpenAIEmbeddings()


 Vector store loaded successfully!


In [None]:
# Load Updated FAISS Vector Store
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

print("üîÑ Loading FAISS vector store...")
embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL)
vs = FAISS.load_local(FAISS_DB_PATH, embeddings, allow_dangerous_deserialization=True)

total_examples = len(vs.docstore._dict)
print(f"‚úÖ FAISS loaded successfully")
print(f"üìä Total examples available: {total_examples}")

# Test that immunizations examples are available
test_query = "What vaccines have I received?"
similar_docs = vs.similarity_search_with_score(test_query, k=3)

immunizations_examples = 0
for doc, score in similar_docs:
    if 'immunizations' in doc.metadata.get('tables', []):
        immunizations_examples += 1

print(f"üíâ Immunizations examples found: {immunizations_examples}/3 in top results")

if immunizations_examples > 0:
    print("‚úÖ Vector store includes immunizations support")
else:
    print("  Vector store may need immunizations examples")

üîÑ Loading FAISS vector store...
‚úÖ FAISS loaded successfully
üìä Total examples available: 90
üíâ Immunizations examples found: 3/3 in top results
‚úÖ Vector store includes immunizations support


In [None]:
# Prompt Building Helpers

# PREFIX to include immunizations table and vaccine query guidance
PREFIX = (
    "Return a single PostgreSQL SELECT only.\n"
    "Use only tables: patients, encounters, conditions, observations, medication_requests, procedures, immunizations.\n"
    "Use only parameter :patient_id. Prefer DISTINCT ON with ORDER BY for 'latest per X'; no CTEs or window functions.\n"
    "Do not mix GROUP BY with DISTINCT ON. If aggregation is needed (e.g., pairing BP), use GROUP BY + MAX(CASE...).\n"
    "Important: medication_requests has no rxnorm_code (use med_name only). Encounters has no location.\n"
    "If the question mentions 'blood pressure' or 'BP', return only systolic (8480-6) and diastolic (8462-4) results and prefer paired rows grouped by effective_datetime.\n"
    "For vaccine/immunization questions, use the immunizations table (columns: date, patient_id, display, code, base_cost).\n"
    "\n"
    "Schema hints:\n"
    "  conditions(display, code, onset_datetime, abatement_datetime, encounter_id, patient_id, condition_id)\n"
    "  observations(display, loinc_code, value_num, value_unit, effective_datetime, encounter_id, patient_id, observation_id)\n"
    "  medication_requests(med_name, dose, route, start_datetime, end_datetime, refills, encounter_id, patient_id, med_request_id)\n"
    "  encounters(start_datetime, end_datetime, reason_text, class, encounter_id, patient_id)\n"
    "  procedures(display, code, performed_datetime, encounter_id, patient_id, procedure_id)\n"
    "  immunizations(date, patient_id, encounter_id, code, display, base_cost)\n"
    "Output only the raw SQL, no markdown fences."
)

def get_few_shots(user_question: str, k: int = 3):
    """Retrieve similar examples from FAISS vector store"""
    docs = vs.similarity_search(user_question, k=k)
    examples = []
    for doc in docs:
        examples.append({
            "question": doc.page_content,
            "sql": doc.metadata.get("sql", "")
        })
    return examples

def format_examples(examples):
    return "\n".join([f"Question: {ex['question']}\nSQL:\n{ex['sql']}\n" for ex in examples])

def build_prompt(user_question: str, k: int = 3) -> str:
    examples = get_few_shots(user_question, k=k)
    return f"{PREFIX}\n{format_examples(examples)}\nQuestion: {user_question}\nSQL:"

print("‚úÖ Updated prompt system with immunizations support")

‚úÖ Updated prompt system with immunizations support


In [None]:
# SQL generation + a tiny safety check
import re

SELECT_ONLY = re.compile(r"^\s*select\b", re.IGNORECASE | re.DOTALL)

def clean_sql(text: str) -> str:
    s = text.strip()

    # strip a leading "SQL:" line if present
    if s.lower().startswith("sql:"):
        s = s[4:].strip()

    # strip fenced code blocks like ```sql ... ``` or ``` ... ```
    m = re.match(r"^```(?:\s*sql)?\s*([\s\S]*?)\s*```$", s, flags=re.IGNORECASE)
    if m:
        s = m.group(1).strip()

    # strip stray backticks if the model emitted them oddly
    if s.startswith("```") and "```" in s[3:]:
        s = s.split("```", 1)[1].rsplit("```", 1)[0].strip()

    # remove BOM or weird invisibles
    s = s.replace("\ufeff", "").replace("\u200b", "").strip()
    return s

def is_safe_select(text: str) -> bool:
    sql = clean_sql(text)

    trimmed = sql.strip()
    if ";" in trimmed[:-1]:
        return False

    if not SELECT_ONLY.match(trimmed):
        return False

    banned = (" insert ", " update ", " delete ", " drop ", " alter ",
              " create ", " grant ", " revoke ", " truncate ")
    low = f" {trimmed.lower()} "
    return not any(b in low for b in banned)

def generate_sql(user_question: str, k: int = 3, max_tokens: int = 400):
    prompt = build_prompt(user_question, k=k)
    resp = client.chat.completions.create(
        model=LLM_MODEL,
        temperature=0,
        messages=[
            {"role":"system","content":"A precise SQL generator for a patient portal."},
            {"role":"user","content": prompt}
        ],
        max_tokens=max_tokens,
    )
    sql = resp.choices[0].message.content.strip()
    return sql

In [None]:
# Demo
demo_q = "Which medications am I currently taking?"
sql = generate_sql(demo_q, k=3)
print(sql, "\n\nSAFE:", is_safe_select(sql))

SELECT DISTINCT ON (mr.patient_id, mr.med_name)
  mr.patient_id,
  mr.med_name AS medication,
  mr.dose,
  mr.route,
  mr.start_datetime,
  mr.end_datetime,
  mr.refills
FROM medication_requests mr
WHERE mr.patient_id = :patient_id
  AND (mr.end_datetime IS NULL OR mr.end_datetime >= NOW())
ORDER BY mr.patient_id,
         mr.med_name,
         COALESCE(mr.end_datetime, mr.start_datetime) DESC NULLS LAST; 

SAFE: True


# Load PostgreSQL EHR FHIR Database

In [None]:
%%capture
!apt-get -y update
!apt-get -y install postgresql postgresql-contrib

!service postgresql start
!sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'postgres';"
!sudo -u postgres createdb synthea_ehr

!echo "PostgreSQL installed, service started, user password set to 'postgres', and DB 'synthea_ehr' created."


In [None]:
import subprocess
import os

# Database connection
DB_NAME = "synthea_ehr"
DB_USER = "postgres"
DB_PASSWORD = "postgres"
DB_HOST = "localhost"

# Path on Google Drive to the backup file
BACKUP_PATH = DEV_PATH + "/synthea_ehr_backup.sql.backup_20251009_140104"

def restore_database():
    """Restores the synthea_ehr database from updated backup file"""
    global BACKUP_PATH # Moved global declaration to the top
    try:
        print(" Starting database restore...")
        print("=" * 60)

        # First, drop and re-create the database to ensure a clean state
        print(" Preparing clean database environment...")
        env = os.environ.copy()
        env['PGPASSWORD'] = DB_PASSWORD

        # Command to drop the database (ignore errors if DB doesn't exist)
        drop_command = [
            'dropdb',
            '--host', DB_HOST,
            '--username', DB_USER,
            '--if-exists',  # Don't error if database doesn't exist
            DB_NAME
        ]
        result = subprocess.run(drop_command, env=env, capture_output=True, text=True)
        if result.returncode == 0:
            print("   ‚úÖ Existing database dropped successfully")
        else:
            print("  No existing database to drop - continuing anyway)")

        # Command to create the database
        create_command = [
            'createdb',
            '--host', DB_HOST,
            '--username', DB_USER,
            DB_NAME
        ]
        result = subprocess.run(create_command, env=env, check=True, capture_output=True, text=True)
        print("   ‚úÖ New database created successfully")

        print("  Restoring data from backup...")
        print(f"    Backup file: {BACKUP_PATH}")

        # Verify backup file exists
        if not os.path.exists(BACKUP_PATH):
            print(f"    Updated backup not found: {BACKUP_PATH}")
            # Fallback to original backup location
            fallback_backup = DEV_PATH + "/synthea_ehr_backup.sql"
            if os.path.exists(fallback_backup):
                print(f"   Using fallback backup: {fallback_backup}")
                BACKUP_PATH = fallback_backup
            else:
                raise FileNotFoundError(f"Neither updated nor original backup file found")

        print(f"   Backup file size: {os.path.getsize(BACKUP_PATH) / (1024*1024):.1f} MB")

        # Use subprocess to run the psql command to restore the backup
        restore_command = [
            'psql',
            '--host', DB_HOST,
            '--username', DB_USER,
            '--dbname', DB_NAME,
            '--file', BACKUP_PATH,
            '--quiet'  # Reduce output noise
        ]

        print("   üîÑ Restoring database ...")
        result = subprocess.run(restore_command, env=env, check=True, capture_output=True, text=True)

        print(" DATABASE RESTORE SUCCESSFUL!")
        print("=" * 60)
        print("‚úÖ Database is ready for SQL-RAG queries")

        # Quick verification
        verify_database()

    except FileNotFoundError as e:
        print(f"‚ùå File Error: {e}")
    except subprocess.CalledProcessError as e:
        print("‚ùå Database Restore Error:")
        print(f"   Command: {' '.join(e.cmd)}")
        print(f"   Return code: {e.returncode}")
        print(f"   STDOUT: {e.stdout}")
        print(f"   STDERR: {e.stderr}")
    except Exception as e:
        print(f"‚ùå Unexpected error: {e}")

In [None]:
def verify_database():
    """Enhanced verification that includes immunizations table"""
    try:
        import psycopg2

        conn = psycopg2.connect(
            dbname=DB_NAME,
            user=DB_USER,
            password=DB_PASSWORD,
            host=DB_HOST,
            port=5432
        )

        cursor = conn.cursor()

        # Check key tables exist and have data
        tables_to_check = ['patients', 'conditions', 'medication_requests', 'observations', 'procedures', 'immunizations']

        print(" Database Verification:")
        total_records = 0

        for table in tables_to_check:
            try:
                cursor.execute(f"SELECT COUNT(*) FROM {table};")
                count = cursor.fetchone()[0]
                total_records += count

                # Special formatting for immunizations
                if table == 'immunizations':
                    if count > 0:
                        print(f"   {table}: {count:,} records")
                    else:
                        print(f"    {table}: {count:,} records")
                else:
                    print(f"   {table}: {count:,} records")

            except Exception as e:
                if table == 'immunizations':
                    print(f"     {table}: Table not found")
                else:
                    print(f"     {table}: Error - {e}")

        print(f" Total records across all tables: {total_records:,}")
        print("‚úÖ Database verification completed successfully!")
        print("=" * 60)

        # Additional check for vaccine-ready patient
        try:
            conn = psycopg2.connect(
                dbname=DB_NAME, user=DB_USER, password=DB_PASSWORD, host=DB_HOST, port=5432
            )
            cursor = conn.cursor()


           # Get a sample patient ID for testing
            cursor.execute("SELECT patient_id FROM patients LIMIT 1;")
            sample_patient = cursor.fetchone()
            if sample_patient:
                print(f" Sample patient ID for testing: {sample_patient[0]}")

            cursor.execute("""
                SELECT COUNT(*)
                FROM immunizations
                WHERE patient_id = '8c8e1c9a-b310-43c6-33a7-ad11bad21c40';
            """)

            vaccine_count = cursor.fetchone()[0]

            if vaccine_count > 0:
                print(f" Test patient has {vaccine_count} vaccine records ready!")
            else:
                print("  Test patient vaccine data not loaded yet")

            conn.close()
        except Exception:
            pass

    except Exception as e:
        print(f"  Database verification failed: {e}")

In [None]:
def load_immunizations_data():
    """FIXED: Load immunizations data matching existing table structure"""
    try:
        # Path to immunizations CSV
        immunizations_csv = DATA_PATH + "/synthea_sample_data_csv_latest/immunizations.csv"

        if not os.path.exists(immunizations_csv):
            print(f"  Immunizations CSV not found: {immunizations_csv}")
            return False

        import pandas as pd
        import psycopg2
        from psycopg2.extras import execute_values

        # Read CSV
        df = pd.read_csv(immunizations_csv)

        # Connect to database
        conn = psycopg2.connect(
            dbname=DB_NAME, user=DB_USER, password=DB_PASSWORD, host=DB_HOST, port=5432
        )
        cursor = conn.cursor()

        # Check existing table structure
        cursor.execute("""
            SELECT column_name
            FROM information_schema.columns
            WHERE table_name = 'immunizations' AND table_schema = 'public'
            ORDER BY ordinal_position;
        """)
        existing_columns = [col[0] for col in cursor.fetchall()]

        if existing_columns:
            print(f" Existing table structure: {', '.join(existing_columns)}")

            # Clear existing data first to avoid duplicates
            cursor.execute("DELETE FROM immunizations;")
            print(" Cleared existing immunizations data")

            # Map CSV columns to database columns
            # CSV: [DATE, PATIENT, ENCOUNTER, CODE, DESCRIPTION, BASE_COST]
            # DB:  [date, patient_id, encounter_id, code, display, base_cost]

            records = []
            for _, row in df.iterrows():
                record = (
                    pd.to_datetime(row['DATE']) if pd.notna(row['DATE']) else None,
                    row['PATIENT'],
                    row.get('ENCOUNTER', None),
                    row['CODE'],
                    row['DESCRIPTION'],
                    float(row['BASE_COST']) if pd.notna(row['BASE_COST']) else None
                )
                records.append(record)

            # Insert data using existing table structure
            execute_values(
                cursor,
                """INSERT INTO immunizations
                   (date, patient_id, encounter_id, code, display, base_cost)
                   VALUES %s""",
                records
            )

        else:
            # Table doesn't exist, create it first (without immunization_id)
            print(" Creating immunizations table...")
            cursor.execute("""
                CREATE TABLE immunizations (
                    date TIMESTAMP WITH TIME ZONE,
                    patient_id VARCHAR(255) NOT NULL,
                    encounter_id VARCHAR(255),
                    code VARCHAR(255),
                    display VARCHAR(500),
                    base_cost DECIMAL(10,2),
                    FOREIGN KEY (patient_id) REFERENCES patients(patient_id)
                );
            """)

            # Create indexes
            cursor.execute("""
                CREATE INDEX IF NOT EXISTS idx_immunizations_patient_id ON immunizations(patient_id);
                CREATE INDEX IF NOT EXISTS idx_immunizations_date ON immunizations(date);
            """)

            # Prepare and insert data
            records = []
            for _, row in df.iterrows():
                record = (
                    pd.to_datetime(row['DATE']) if pd.notna(row['DATE']) else None,
                    row['PATIENT'],
                    row.get('ENCOUNTER', None),
                    row['CODE'],
                    row['DESCRIPTION'],
                    float(row['BASE_COST']) if pd.notna(row['BASE_COST']) else None
                )
                records.append(record)

            execute_values(
                cursor,
                """INSERT INTO immunizations
                   (date, patient_id, encounter_id, code, display, base_cost)
                   VALUES %s""",
                records
            )

        conn.commit()
        cursor.close()
        conn.close()

        print(f"‚úÖ Successfully loaded {len(records)} immunization records")
        print(" Immunizations table ready for vaccine queries!")
        return True

    except Exception as e:
        print(f"‚ùå Error loading immunizations: {e}")
        import traceback
        print(f" Debug info: {traceback.format_exc()}")
        return False

# Execute the restore
restore_database()

# Load immunizations data if CSV is available
print(" üíâ  Checking for immunizations data...")
print("=" * 60)
load_immunizations_data()

 Starting database restore...
 Preparing clean database environment...
   ‚úÖ Existing database dropped successfully
   ‚úÖ New database created successfully
  Restoring data from backup...
    Backup file: /content/drive/MyDrive/210_Capstone/210_Factory/210_dev/synthea_ehr_backup.sql.backup_20251009_140104
   Backup file size: 16.9 MB
   üîÑ Restoring database ...
 DATABASE RESTORE SUCCESSFUL!
‚úÖ Database is ready for SQL-RAG queries
 Database Verification:
   patients: 111 records
   conditions: 4,140 records
   medication_requests: 4,926 records
   observations: 60,597 records
   procedures: 17,993 records
   immunizations: 1,549 records
 Total records across all tables: 89,316
‚úÖ Database verification completed successfully!
 Sample patient ID for testing: 8c8e1c9a-b310-43c6-33a7-ad11bad21c40
 Test patient has 6 vaccine records ready!
 üíâ  Checking for immunizations data...
 Existing table structure: date, patient_id, encounter_id, code, display, base_cost
 Cleared existing im

True

In [None]:
# PRODUCTION-READY SQL EXECUTION SYSTEM
!pip -q install psycopg2-binary

import re
import psycopg2
import psycopg2.extras
import time
import logging

# Configure logging for production
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Enhanced SQL Safety and Processing
_SELECT_ONLY = re.compile(r"^\s*select\b", re.IGNORECASE | re.DOTALL)
_BANNED = (" insert ", " update ", " delete ", " drop ", " alter ",
           " create ", " grant ", " revoke ", " truncate ", " copy ", " do ")

# More precise regex to match :name parameters
_PARAM = re.compile(r'(?<!:):([a-zA-Z_]\w*)')

def _clean_sql(text: str) -> str:
    """Remove code fences / labels and invisible chars - PRODUCTION VERSION."""
    s = (text or "").strip()
    if s.lower().startswith("sql:"):
        s = s[4:].strip()
    m = re.match(r"^```(?:\s*sql)?\s*([\s\S]*?)\s*```$", s, flags=re.IGNORECASE)
    if m:
        s = m.group(1).strip()
    return s.replace("\ufeff","").replace("\u200b","").strip()

def _is_safe_select(sql: str) -> bool:
    """Enhanced SQL safety validation - PRODUCTION VERSION."""
    s = sql.strip()
    if not s:
        return False

    # Check for multiple statements (allow single trailing semicolon only)
    if ";" in s[:-1]:
        logger.warning("SQL contains multiple statements - blocked for security")
        return False

    if not _SELECT_ONLY.match(s):
        logger.warning("SQL does not start with SELECT - blocked for security")
        return False

    # Check for dangerous keywords
    low = f" {s.lower()} "
    dangerous_found = [keyword.strip() for keyword in _BANNED if keyword in low]
    if dangerous_found:
        logger.warning(f"SQL contains dangerous keywords: {dangerous_found} - blocked for security")
        return False

    # Additional production checks
    if "information_schema" not in s.lower():
        if ":patient_id" not in s and "%(patient_id)s" not in s:
            logger.warning("SQL query does not include patient_id parameter - this may return too much data")

    return True


[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/4.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[90m‚ï∫[0m[90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.5/4.2 MB[0m [31m43.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m4.2/4.2 MB[0m [31m66.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
def _to_psycopg2_named(sql: str) -> str:
    """
    Convert :name placeholders to %(name)s for psycopg2
    This function handles parameter format conversion more robustly
    """
    # Not to have mixed formats by standardizing everything to psycopg2 format
    converted = sql

    # Find all parameter patterns and convert them
    def replace_param(match):
        param_name = match.group(1)
        return f"%({param_name})s"

    converted = _PARAM.sub(replace_param, converted)

    # Log the conversion for debugging
    if ":patient_id" in sql or ":patient_id" in converted:
        logger.debug(f"Parameter conversion: '{sql}' -> '{converted}'")

    return converted

In [None]:
def get_database_connection():
    """Get database connection with production settings."""
    try:
        conn = psycopg2.connect(
            dbname=DB_NAME,
            user=DB_USER,
            password=DB_PASSWORD,
            host=DB_HOST,
            port=5432,
            connect_timeout=10,  # 10 second connection timeout
            options="-c statement_timeout=30000"  # 30 second query timeout
        )
        return conn
    except psycopg2.Error as e:
        logger.error(f"Database connection failed: {e}")
        raise

In [None]:
def execute_sql(sql: str, params: dict = None, timeout_ms: int = 30000, max_rows: int = 1000):
    """
    Execute a single SELECT query safely and return rows as a list of dicts

    Args:
      sql: SQL string (can use :param style placeholders, e.g., :patient_id)
      params: dict of parameters if placeholders are used
      timeout_ms: statement timeout in milliseconds (default 30000 = 30 seconds)
      max_rows: maximum number of rows to return (default 1000)

    Returns:
      List[Dict]: each row as a dict (limited to max_rows)
    """
    start_time = time.time()

    # Clean and validate SQL
    raw = _clean_sql(sql)
    if not _is_safe_select(raw):
        raise ValueError("Blocked: SQL must be a single SELECT without DDL/DML keywords.")

    # Convert parameters to handle parameter format consistently
    query = _to_psycopg2_named(raw)
    params = params or {}

    # Additional debug logging for the vaccine query issue
    if "vaccine" in raw.lower():
        logger.debug(f"VACCINE QUERY DEBUG:")
        logger.debug(f"  Original SQL: {raw}")
        logger.debug(f"  Converted SQL: {query}")
        logger.debug(f"  Parameters: {params}")

    logger.info(f"Executing SQL query for patient: {params.get('patient_id', 'N/A')}")

    conn = None
    try:
        # Get connection with production settings
        conn = get_database_connection()

        # Set query timeout
        with conn.cursor() as cursor:
            cursor.execute(f"SET statement_timeout = {timeout_ms}")

        # Execute query with explicit parameter handling
        with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
            if params:
                cur.execute(query, params)
            else:
                cur.execute(query)

            # Fetch results with limit
            results = []
            rows_fetched = 0

            while rows_fetched < max_rows:
                batch = cur.fetchmany(100)  # Fetch in batches of 100
                if not batch:
                    break
                results.extend([dict(r) for r in batch])
                rows_fetched += len(batch)

            # Log performance metrics
            execution_time = time.time() - start_time
            logger.info(f"Query completed: {len(results)} rows in {execution_time:.2f}s")

            if rows_fetched >= max_rows:
                logger.warning(f"Result set truncated to {max_rows} rows")

            return results

    except psycopg2.Error as e:
        execution_time = time.time() - start_time
        logger.error(f"PostgreSQL error after {execution_time:.2f}s: {e}")

        # Enhanced error handling for parameter format issues
        if "argument formats can't be mixed" in str(e):
            logger.error("PARAMETER FORMAT ERROR DETECTED:")
            logger.error(f"  Query that failed: {query}")
            logger.error(f"  Parameters: {params}")
            logger.error("  This indicates a parameter binding format issue")

        raise Exception(f"Database error for patient {params.get('patient_id', 'N/A')}: {e}")
    except Exception as e:
        execution_time = time.time() - start_time
        logger.error(f"Unexpected error after {execution_time:.2f}s: {e}")
        raise
    finally:
        if conn:
            conn.close()

In [None]:
def get_patient_ids(limit: int = 10) -> list:
    """Get a list of patient IDs for testing - PRODUCTION HELPER."""
    try:
        sql = "SELECT patient_id FROM patients LIMIT %(limit)s;"
        results = execute_sql(sql, {"limit": limit})
        patient_ids = [row['patient_id'] for row in results]
        logger.info(f"Retrieved {len(patient_ids)} patient IDs for testing")
        return patient_ids
    except Exception as e:
        logger.error(f"Error retrieving patient IDs: {e}")
        return []

def validate_patient_exists(patient_id: str) -> bool:
    """Validate that a patient exists in the database - PRODUCTION HELPER."""
    try:
        sql = "SELECT COUNT(*) as count FROM patients WHERE patient_id = %(patient_id)s;"
        results = execute_sql(sql, {"patient_id": patient_id})
        exists = results[0]['count'] > 0
        logger.debug(f"Patient {patient_id} exists: {exists}")
        return exists
    except Exception as e:
        logger.error(f"Error validating patient: {e}")
        return False

# Production logging
logger.info("üè• Ready for patient queries")

In [None]:
# Simple test

rows = execute_sql("SELECT COUNT(*) AS n FROM patients;")
print(rows)




[{'n': 111}]


In [None]:
# PRODUCTION DATABASE CONNECTION TEST
print(" Testing Production Database Connection")
print("=" * 50)

try:
    # Test 1: Basic connection
    print(" Test 1: Database Connection")
    rows = execute_sql("SELECT COUNT(*) AS total_patients FROM patients;")
    total_patients = rows[0]['total_patients']
    print(f"   ‚úÖ Connected successfully")
    print(f"   Total patients in database: {total_patients:,}")

    # Test 2: Get sample patient IDs
    print(" Test 2: Sample Patient IDs")
    patient_ids = get_patient_ids(5)
    for i, pid in enumerate(patient_ids, 1):
        print(f"   Patient {i}: {pid}")

    # Test 3: Validate schema
    print("  Test 3: Database Schema Validation")
    schema_check = execute_sql("""
        SELECT t.table_name, -- Select table_name
               COUNT(*) as record_count
        FROM information_schema.tables t
        LEFT JOIN LATERAL (
            SELECT COUNT(*) FROM patients WHERE t.table_name = 'patients'
            UNION ALL
            SELECT COUNT(*) FROM conditions WHERE t.table_name = 'conditions'
            UNION ALL
            SELECT COUNT(*) FROM medication_requests WHERE t.table_name = 'medication_requests'
            UNION ALL
            SELECT COUNT(*) FROM observations WHERE t.table_name = 'observations'
            UNION ALL
            SELECT COUNT(*) FROM procedures WHERE t.table_name = 'procedures'
            UNION ALL -- Added immunizations to schema check
            SELECT COUNT(*) FROM immunizations WHERE t.table_name = 'immunizations'
        ) counts ON true
        WHERE table_schema = 'public'
        AND table_name IN ('patients', 'conditions', 'medication_requests', 'observations', 'procedures', 'immunizations') -- Added immunizations
        GROUP BY t.table_name -- Added GROUP BY clause
        ORDER BY table_name;
    """)

    if schema_check:
        for table in schema_check:
            # Check for expected tables and indicate count if possible
            table_name = table['table_name']
            record_count = table['record_count'] # Record count from the lateral join
            print(f"   {table_name}: Available with {record_count:,} records")
    else:
        # Fallback individual table checks (Less detailed but confirms existence)
        key_tables = ['patients', 'conditions', 'medication_requests', 'observations', 'procedures', 'immunizations'] # Added immunizations
        print("     Detailed schema check failed, performing basic table existence check:")
        for table in key_tables:
            try:
                count_result = execute_sql(f"SELECT COUNT(*) as count FROM {table} LIMIT 1;")
                print(f"   {table}: Available ({count_result[0]['count']:,} records)")
            except Exception as e:
                print(f"   ‚ùå {table}: Error - {e}")


    # Test 4: Sample query with patient parameter
    if patient_ids:
        print(" Test 4: Patient-Specific Query Test")
        test_patient = patient_ids[0]
        print(f"    Testing with patient: {test_patient}")

        # Test medication query
        med_results = execute_sql("""
            SELECT med_name, dose, start_datetime, end_datetime
            FROM medication_requests
            WHERE patient_id = %(patient_id)s
            LIMIT 3;
        """, {"patient_id": test_patient})

        print(f"   Medications found: {len(med_results)}")
        for med in med_results[:2]:  # Show first 2
            status = "Active" if not med['end_datetime'] else "Ended"
            print(f"      ‚Ä¢ {med['med_name']} {med['dose']} ({status})")

        # Test vaccine query using immunizations table
        print("    Testing Vaccine Query ")
        vaccine_results = execute_sql("""
             SELECT display, date, code
             FROM immunizations
             WHERE patient_id = %(patient)s -- Use 'patient' column name
             ORDER BY date DESC
             LIMIT 3;
        """, {"patient": test_patient})

        print(f"   Vaccines found: {len(vaccine_results)}")
        for vac in vaccine_results[:2]: # Show first 2
             date_str = vac['date'].strftime('%Y-%m-%d') if vac['date'] else 'No date'
             print(f"      ‚Ä¢ {vac['display']} - {date_str}")


    print("‚úÖ Database connection is working")

except Exception as e:
    print(f"\\n‚ùå DATABASE TEST FAILED: {e}")




 Testing Production Database Connection
 Test 1: Database Connection
   ‚úÖ Connected successfully
   Total patients in database: 111
 Test 2: Sample Patient IDs
   Patient 1: 8c8e1c9a-b310-43c6-33a7-ad11bad21c40
   Patient 2: 782001bc-f712-50ae-04f5-9a488f3ef4aa
   Patient 3: 80e7f50a-3e99-d5ac-cf97-f8a4b4f9e6c7
   Patient 4: edc17058-55fb-08c7-12df-ece93a402e50
   Patient 5: 9f9dbdcb-23a1-82cc-b7bc-e0e420a95bd1
  Test 3: Database Schema Validation
   conditions: Available with 6 records
   immunizations: Available with 6 records
   medication_requests: Available with 6 records
   observations: Available with 6 records
   patients: Available with 6 records
   procedures: Available with 6 records
 Test 4: Patient-Specific Query Test
    Testing with patient: 8c8e1c9a-b310-43c6-33a7-ad11bad21c40
   Medications found: 3
      ‚Ä¢ Acetaminophen 325 MG Oral Tablet None (Active)
      ‚Ä¢ Naproxen sodium 220 MG Oral Tablet None (Active)
    Testing Vaccine Query 
   Vaccines found: 3
      

In [None]:
rows = execute_sql("select patient_id from patients;")
print(rows[:3])



[{'patient_id': '8c8e1c9a-b310-43c6-33a7-ad11bad21c40'}, {'patient_id': '782001bc-f712-50ae-04f5-9a488f3ef4aa'}, {'patient_id': '80e7f50a-3e99-d5ac-cf97-f8a4b4f9e6c7'}]


In [None]:
execute_sql("""
SELECT column_name
FROM information_schema.columns
WHERE table_schema='public' AND table_name='encounters'
ORDER BY 1;
""")

[{'column_name': 'class'},
 {'column_name': 'encounter_id'},
 {'column_name': 'end_datetime'},
 {'column_name': 'patient_id'},
 {'column_name': 'reason_text'},
 {'column_name': 'start_datetime'}]

In [None]:
execute_sql("""
SELECT table_name, column_name
FROM information_schema.columns
WHERE table_schema='public'
ORDER BY 1;
""")

[{'table_name': 'conditions', 'column_name': 'onset_datetime'},
 {'table_name': 'conditions', 'column_name': 'patient_id'},
 {'table_name': 'conditions', 'column_name': 'encounter_id'},
 {'table_name': 'conditions', 'column_name': 'code'},
 {'table_name': 'conditions', 'column_name': 'display'},
 {'table_name': 'conditions', 'column_name': 'condition_id'},
 {'table_name': 'conditions', 'column_name': 'abatement_datetime'},
 {'table_name': 'encounters', 'column_name': 'reason_text'},
 {'table_name': 'encounters', 'column_name': 'start_datetime'},
 {'table_name': 'encounters', 'column_name': 'end_datetime'},
 {'table_name': 'encounters', 'column_name': 'encounter_id'},
 {'table_name': 'encounters', 'column_name': 'patient_id'},
 {'table_name': 'encounters', 'column_name': 'class'},
 {'table_name': 'immunizations', 'column_name': 'base_cost'},
 {'table_name': 'immunizations', 'column_name': 'code'},
 {'table_name': 'immunizations', 'column_name': 'date'},
 {'table_name': 'immunizations', 

In [None]:
# With parameter (canonical pattern)
q = """
SELECT DISTINCT ON (mr.patient_id, mr.med_name)
  mr.patient_id,
  mr.med_name AS medication,
  mr.dose,
  mr.route,
  mr.start_datetime,
  mr.end_datetime,
  mr.refills
FROM medication_requests mr
WHERE mr.patient_id = :patient_id
  AND (mr.end_datetime IS NULL OR mr.end_datetime >= NOW())
ORDER BY mr.patient_id,
         mr.med_name,
         COALESCE(mr.end_datetime, mr.start_datetime) DESC NULLS LAST;

"""
rows = execute_sql(q, {"patient_id": '8c8e1c9a-b310-43c6-33a7-ad11bad21c40'})
print(rows[:3])

[{'patient_id': '8c8e1c9a-b310-43c6-33a7-ad11bad21c40', 'medication': 'Acetaminophen 325 MG Oral Tablet', 'dose': None, 'route': None, 'start_datetime': None, 'end_datetime': None, 'refills': None}, {'patient_id': '8c8e1c9a-b310-43c6-33a7-ad11bad21c40', 'medication': 'Naproxen sodium 220 MG Oral Tablet', 'dose': None, 'route': None, 'start_datetime': None, 'end_datetime': None, 'refills': None}]


In [None]:
# IMMUNIZATIONS INTEGRATION VERIFICATION
print(" Verifying immunizations integration system...")
print("=" * 60)

# Verify that the updated FAISS vector store includes immunizations examples
def verify_immunizations_integration():
    """Verify that immunizations examples are properly integrated"""

    print(" Testing immunizations vector store integration...")

    # Test vaccine-related queries
    vaccine_queries = [
        "What vaccines have I received?",
        "Show me my immunization history",
        "When was my last flu shot?",
        "What COVID vaccines have I had?",
        "List all my shots"
    ]

    immunizations_found = 0
    total_tests = 0

    for query in vaccine_queries:
        print(f"\n Testing: '{query}'")
        try:
            # Get similar examples from FAISS
            docs_and_scores = vs.similarity_search_with_score(query, k=3)

            test_results = []
            for rank, (doc, score) in enumerate(docs_and_scores, 1):
                tables = doc.metadata.get("tables", [])
                uses_immunizations = 'immunizations' in tables

                if uses_immunizations:
                    immunizations_found += 1

                table_indicator = "‚úÖ immunizations" if uses_immunizations else "üìã other tables"
                test_results.append(f"   #{rank} score={score:.4f} ({table_indicator})")

                total_tests += 1

            for result in test_results:
                print(result)

        except Exception as e:
            print(f"   ‚ùå Error testing query: {e}")

    success_rate = (immunizations_found / total_tests * 100) if total_tests > 0 else 0
    print(f"\n Integration Results:")
    print(f"    Immunizations examples found: {immunizations_found}/{total_tests}")
    print(f"    Success rate: {success_rate:.1f}%")

    if success_rate >= 60:
        print("‚úÖ IMMUNIZATIONS INTEGRATION SUCCESSFUL!")
        return True
    else:
        print("  Low immunizations integration")
        return False

# Verify database has immunizations table and data
def verify_immunizations_database():
    """Verify immunizations table exists and has data"""

    print("\n Testing immunizations database integration...")

    try:
        # Test immunizations table exists
        table_check = execute_sql("""
            SELECT COUNT(*) as record_count
            FROM immunizations;
        """)

        record_count = table_check[0]['record_count']
        print(f"    Immunizations table: {record_count:,} records")

        if record_count > 0:
            # Test with specific patient
            test_patient = '8c8e1c9a-b310-43c6-33a7-ad11bad21c40'
            patient_vaccines = execute_sql("""
                SELECT display, date, code
                FROM immunizations
                WHERE patient_id = %(patient_id)s
                ORDER BY date DESC
                LIMIT 5;
            """, {"patient_id": test_patient})

            print(f"    Test patient vaccines: {len(patient_vaccines)} found")

            for i, vaccine in enumerate(patient_vaccines, 1):
                date_str = vaccine['date'].strftime('%Y-%m-%d') if vaccine['date'] else 'No date'
                print(f"      {i}. {vaccine['display']} - {date_str}")

            return len(patient_vaccines) > 0
        else:
            print("    No immunizations data found")
            return False

    except Exception as e:
        print(f"   ‚ùå Database error: {e}")
        return False

# Run verification tests
vector_store_ok = verify_immunizations_integration()
database_ok = verify_immunizations_database()

print(f"\n INTEGRATION STATUS:")
print(f"    Vector Store: {'‚úÖ Ready' if vector_store_ok else '‚ùå Needs Update'}")
print(f"    Database: {'‚úÖ Ready' if database_ok else '‚ùå Needs Data'}")

if vector_store_ok and database_ok:
    print("\n IMMUNIZATIONS FULLY INTEGRATED!")

    # Test end-to-end workflow
    print("\n Testing end-to-end vaccine query...")
    try:
        test_sql = generate_sql("What vaccines have I received?", k=5)
        print("Generated SQL:")
        print("-" * 40)
        print(test_sql)
        print("-" * 40)

        if "immunizations" in test_sql.lower():
            print("‚úÖ LLM correctly uses immunizations table!")
        else:
            print("  LLM not using immunizations table")

    except Exception as e:
        print(f"‚ùå End-to-end test failed: {e}")

else:
    print("\n  INTEGRATION INCOMPLETE:")

print("\n" + "="*60)

 Verifying immunizations integration system...
 Testing immunizations vector store integration...

 Testing: 'What vaccines have I received?'
   #1 score=0.0010 (‚úÖ immunizations)
   #2 score=0.2647 (‚úÖ immunizations)
   #3 score=0.3152 (‚úÖ immunizations)

 Testing: 'Show me my immunization history'
   #1 score=0.1375 (‚úÖ immunizations)
   #2 score=0.1601 (‚úÖ immunizations)
   #3 score=0.6545 (‚úÖ immunizations)

 Testing: 'When was my last flu shot?'
   #1 score=0.0000 (‚úÖ immunizations)
   #2 score=0.7976 (üìã other tables)
   #3 score=0.9069 (üìã other tables)

 Testing: 'What COVID vaccines have I had?'
   #1 score=0.1106 (‚úÖ immunizations)
   #2 score=0.4147 (‚úÖ immunizations)
   #3 score=0.5655 (‚úÖ immunizations)

 Testing: 'List all my shots'




   #1 score=0.4720 (‚úÖ immunizations)
   #2 score=1.1060 (üìã other tables)
   #3 score=1.1150 (‚úÖ immunizations)

 Integration Results:
    Immunizations examples found: 12/15
    Success rate: 80.0%
‚úÖ IMMUNIZATIONS INTEGRATION SUCCESSFUL!

 Testing immunizations database integration...
    Immunizations table: 1,549 records
    Test patient vaccines: 5 found
      1. Influenza  seasonal  injectable  preservative free - 2024-10-16
      2. Influenza  seasonal  injectable  preservative free - 2024-04-10
      3. Influenza  seasonal  injectable  preservative free - 2021-04-07
      4. COVID-19 vaccine  vector-nr  rS-Ad26  PF  0.5 mL - 2021-04-07
      5. Influenza  seasonal  injectable  preservative free - 2018-04-04

 INTEGRATION STATUS:
    Vector Store: ‚úÖ Ready
    Database: ‚úÖ Ready

 IMMUNIZATIONS FULLY INTEGRATED!

 Testing end-to-end vaccine query...
Generated SQL:
----------------------------------------
SELECT DISTINCT ON (i.patient_id, i.display, i.date)
  i.display AS

In [None]:
# ANSWER_PATIENT_QUESTION FUNCTION WITH IMMUNIZATIONS SUPPORT
print(" Creating updated answer_patient_question function ...")
print("=" * 60)

def answer_patient_question(user_question: str, patient_id: str, k: int = 3, max_tokens: int = 400, validate_patient: bool = True):
    """
    VERSION: Answer patient question using SQL-RAG.

    Args:
        user_question: Natural language question from patient
        patient_id: Patient identifier
        k: Number of similar examples to retrieve from FAISS
        max_tokens: Maximum tokens for LLM response
        validate_patient: Whether to validate patient exists

    Returns:
        tuple: (generated_sql, query_results)
    """
    try:
        # Patient validation
        if validate_patient:
            if not validate_patient_exists(patient_id):
                logger.warning(f"Patient validation failed for: {patient_id}")
                return None, []

        # Generate SQL using updated system with immunizations support
        sql = generate_sql(user_question, k=k, max_tokens=max_tokens)

        if not sql or not is_safe_select(sql):
            logger.error(f"Generated unsafe or invalid SQL: {sql}")
            return None, []

        # Execute SQL with patient parameter
        results = execute_sql(sql, {"patient_id": patient_id})

        # Log for debugging
        logger.info(f"Query for patient {patient_id[:8]}...: {len(results)} results")

        # Special handling for vaccine queries
        if any(keyword in user_question.lower() for keyword in ['vaccine', 'immunization', 'shot', 'vaccination']):
            if 'immunizations' not in sql.lower():
                logger.warning("Vaccine query not using immunizations table - FAISS may need updating")
            else:
                logger.info("‚úÖ Vaccine query correctly using immunizations table")

        return sql, results

    except Exception as e:
        logger.error(f"Error in answer_patient_question: {e}")
        return None, []

def patient_query_with_summary(user_question: str, patient_id: str, k: int = 5, max_tokens: int = 800):
    """
    COMPLETE WORKFLOW: Query + Results + Natural Language Summary

    """
    print(f" Processing: {user_question}")
    print(f" Patient: {patient_id}")
    print("-" * 50)

    start_time = time.time()

    try:
        # Generate SQL and get results
        sql, rows = answer_patient_question(user_question, patient_id, k=k, max_tokens=max_tokens)

        if sql is None:
            return {
                'success': False,
                'error': 'Failed to generate SQL query',
                'question': user_question,
                'patient_id': patient_id
            }

        print(f" Found {len(rows)} records")

        # Convert to DataFrame for analysis
        if rows:
            df = pd.DataFrame(rows)
            print(f"üìã Data columns: {list(df.columns)}")
        else:
            df = pd.DataFrame()

        # Generate patient-friendly summary
        if 'summarize_df_with_llm' in globals():
            summary = summarize_df_with_llm(
                df,
                patient_id=patient_id,
                user_question=user_question,
                model=LLM_MODEL,
                max_tokens=600
            )
        else:
            # Fallback summary
            if not df.empty:
                summary = f"Found {len(df)} records. Data includes: {', '.join(df.columns)}"
            else:
                summary = f"No data found for your question: '{user_question}'"

        execution_time = time.time() - start_time

        # Return comprehensive results
        result = {
            'success': True,
            'question': user_question,
            'patient_id': patient_id,
            'sql_generated': sql,
            'records_found': len(rows),
            'data_summary': summary,
            'raw_data': rows,
            'execution_time': execution_time
        }

        print(f"‚úÖ Query completed in {execution_time:.2f}s")

        return result

    except Exception as e:
        execution_time = time.time() - start_time
        logger.error(f"Complete workflow error: {e}")

        return {
            'success': False,
            'error': str(e),
            'question': user_question,
            'patient_id': patient_id,
            'execution_time': execution_time
        }

print("\n Testing updated function with vaccine query...")
test_patient = '8c8e1c9a-b310-43c6-33a7-ad11bad21c40'
test_question = "What vaccines have I received?"

try:
    sql, results = answer_patient_question(test_question, test_patient, k=5, max_tokens=800)
    print(f" Generated SQL uses immunizations: {'‚úÖ Yes' if 'immunizations' in sql.lower() else '‚ùå No'}")
    print(f" Results found: {len(results)}")

    if results:
        print(" SUCCESS: Updated function working with immunizations!")
        for i, result in enumerate(results[:3], 1):
            vaccine_name = result.get('display') or result.get('vaccine_name', 'Unknown')
            date_field = result.get('date') or result.get('vaccination_date')
            date_str = date_field.strftime('%Y-%m-%d') if date_field else 'No date'
            print(f"   {i}. {vaccine_name} - {date_str}")
    else:
        print("  No results - check immunizations data loading")

except Exception as e:
    print(f"‚ùå Test failed: {e}")

print("\n" + "="*60)

 Creating updated answer_patient_question function ...

 Testing updated function with vaccine query...
 Generated SQL uses immunizations: ‚úÖ Yes
 Results found: 6
 SUCCESS: Updated function working with immunizations!
   1. COVID-19 vaccine  vector-nr  rS-Ad26  PF  0.5 mL - 2021-04-07
   2. Influenza  seasonal  injectable  preservative free - 2024-10-16
   3. Influenza  seasonal  injectable  preservative free - 2024-04-10



In [None]:
#  VERIFY FAISS - Test Query Generation
print(" Verifying FAISS update and query generation...")
print("=" * 60)

def test_updated_faiss_queries():
    """Test that FAISS generates proper table queries"""

    # Test various vaccine-related questions
    test_questions = [
        "What vaccines have I received?",
        "Show me my immunization history",
        "When was my last flu shot?",
        "What COVID vaccines have I had?",
        "List all my shots"
    ]

    print(" Testing vaccine query generation with FAISS...")

    for i, question in enumerate(test_questions, 1):
        print(f"\n Test {i}: {question}")
        print("-" * 40)

        try:
            # Get few-shot examples
            examples = get_few_shots(question, k=3)
            print(f" Retrieved {len(examples)} examples from FAISS:")

            for j, example in enumerate(examples, 1):
                example_sql = example.get('sql', '')
                uses_immunizations = 'immunizations' in example_sql.lower()
                table_indicator = "‚úÖ immunizations" if uses_immunizations else "üìã other table"
                print(f"   {j}. {example['question'][:50]}... ({table_indicator})")

            # Generate SQL with regular system
            generated_sql = generate_sql(question, k=3, max_tokens=600)
            uses_immunizations_table = 'immunizations' in generated_sql.lower()

            print(f" Generated SQL uses immunizations table: {'‚úÖ YES' if uses_immunizations_table else '‚ùå NO'}")

            if uses_immunizations_table:
                print(" Generated SQL preview:")
                print(f"   {generated_sql[:150]}...")
            else:
                print("  SQL still uses other tables - may need more examples")

        except Exception as e:
            print(f"‚ùå Error testing question {i}: {e}")

# Run the verification tests
test_updated_faiss_queries()

# Test end-to-end with actual patient
print(f"\n END-TO-END TEST with real patient data")
print("=" * 50)

test_patient_id = '8c8e1c9a-b310-43c6-33a7-ad11bad21c40'
test_question = "What vaccines have I received?"

try:
    print(f" Patient: {test_patient_id[:8]}...")
    print(f" Question: {test_question}")

    # Test the complete workflow
    sql, results = answer_patient_question(test_question, test_patient_id, k=5, max_tokens=800)

    if sql:
        uses_immunizations = 'immunizations' in sql.lower()
        print(f" Generated SQL uses immunizations: {'‚úÖ YES' if uses_immunizations else '‚ùå NO'}")
        print(f" Results found: {len(results)}")

        if results and len(results) > 0:
            print(" Sample results:")
            for i, result in enumerate(results[:3], 1):
                vaccine_name = (result.get('display') or
                              result.get('vaccine_name') or
                              'Unknown vaccine')
                date_field = (result.get('date') or
                             result.get('vaccination_date') or
                             result.get('performed_datetime'))
                date_str = date_field.strftime('%Y-%m-%d') if date_field else 'No date'
                print(f"   {i}. {vaccine_name} - {date_str}")
        else:
            print("  No results returned - check SQL generation")

        # Show the actual SQL for debugging
        print(f"\n Actual SQL generated:")
        print("-" * 30)
        print(sql)
        print("-" * 30)

    else:
        print("‚ùå SQL generation failed")

except Exception as e:
    print(f"‚ùå End-to-end test error: {e}")

print("\n" + "="*60)

 Verifying FAISS update and query generation...
 Testing vaccine query generation with FAISS...

 Test 1: What vaccines have I received?
----------------------------------------
 Retrieved 3 examples from FAISS:
   1. What vaccines have I received?... (‚úÖ immunizations)
   2. What immunizations have I had?... (‚úÖ immunizations)
   3. What COVID vaccines have I received?... (‚úÖ immunizations)
 Generated SQL uses immunizations table: ‚úÖ YES
 Generated SQL preview:
   SELECT DISTINCT ON (i.patient_id, i.display, i.date)
  i.display AS vaccine_name,
  i.date AS vaccination_date,
  i.code AS vaccine_code
FROM immuniza...

 Test 2: Show me my immunization history
----------------------------------------
 Retrieved 3 examples from FAISS:
   1. Show me my immunization record... (‚úÖ immunizations)
   2. Show me my vaccination history... (‚úÖ immunizations)
   3. What immunizations have I had?... (‚úÖ immunizations)
 Generated SQL uses immunizations table: ‚úÖ YES
 Generated SQL preview:
  

# Tie LLM output to return results from PostGre SQL database

In [None]:
execute_sql("select p.patient_id from patients p")



[{'patient_id': '8c8e1c9a-b310-43c6-33a7-ad11bad21c40'},
 {'patient_id': '782001bc-f712-50ae-04f5-9a488f3ef4aa'},
 {'patient_id': '80e7f50a-3e99-d5ac-cf97-f8a4b4f9e6c7'},
 {'patient_id': 'edc17058-55fb-08c7-12df-ece93a402e50'},
 {'patient_id': '9f9dbdcb-23a1-82cc-b7bc-e0e420a95bd1'},
 {'patient_id': 'be874504-c868-ebfd-9a77-df6b1e5ff6cc'},
 {'patient_id': '30e48e16-2df7-207e-7a3d-1650ef0c1ed8'},
 {'patient_id': '57b21dea-ff00-6c3e-92d9-91c7627f53b2'},
 {'patient_id': 'a3d34c1f-5421-e078-38ec-1498a5941dbe'},
 {'patient_id': 'e83fe1b3-f94f-5591-f851-1da300e24e99'},
 {'patient_id': 'e6705c33-7349-8b12-484d-3b1f93227178'},
 {'patient_id': '2da86d63-34ae-b887-ddff-8f6f1e6990f1'},
 {'patient_id': '04181caa-fcc1-c6c8-743e-a903eff368de'},
 {'patient_id': '20802592-1c31-7339-4c4c-2fe648e1a716'},
 {'patient_id': '406e8bad-81b5-7624-5b8a-4aeeb74028f5'},
 {'patient_id': 'a331b5bc-cbea-a205-a8bf-dbf3255ef36a'},
 {'patient_id': '641efcda-7397-4172-c6ac-8231342fa53e'},
 {'patient_id': 'e64918a6-528c-

In [None]:
sql = (
    "SELECT e.patient_id, e.start_datetime, e.end_datetime, "
    "e.class AS encounter_class, e.reason_text AS reason "
    "FROM encounters e "
    "WHERE e.reason_text IS NOT NULL;"
)
execute_sql(sql)



[]

In [None]:
import pandas as pd

patient_id = '0fca905f-391c-08d3-4b93-b53f69b9da53'
user_q = "What has been my highest weight"

sql, rows = answer_patient_question(user_q, patient_id, k=5, max_tokens=1000)
print("Generated SQL:\n", sql, "\n")
print("Rows:", len(rows))
if rows:
    display(pd.DataFrame(rows).head(10))

Generated SQL:
 SELECT DISTINCT ON (o.patient_id)
  o.patient_id,
  COALESCE(o.display, o.loinc_code) AS vital_name,
  o.value_num AS value,
  o.value_unit AS unit,
  o.effective_datetime
FROM observations AS o
WHERE o.patient_id = :patient_id
  AND (
    o.loinc_code = '29463-7'
    OR LOWER(o.display) IN ('body weight', 'weight')
  )
ORDER BY o.patient_id, o.value_num DESC NULLS LAST; 

Rows: 1


Unnamed: 0,patient_id,vital_name,value,unit,effective_datetime
0,0fca905f-391c-08d3-4b93-b53f69b9da53,Body Weight,102,kg,2018-03-12 14:19:34+00:00


# Tie LLM output to return results from PostGre SQL database (post data frame, LLM generates a summary)

In [None]:
import io

def df_to_csv_for_llm(df: pd.DataFrame, max_rows: int = 200, null_marker: str = "‚Äî") -> tuple[str, bool]:
    """
    Convert a DataFrame to CSV for the LLM
    - Truncates to max_rows to keep prompts manageable.
    - Replaces NaNs with a visible marker (default "‚Äî").
    Returns (csv_text, truncated_flag).
    """
    truncated = False
    if len(df) > max_rows:
        df = df.head(max_rows).copy()
        truncated = True

    df = df.copy()
    df = df.fillna(null_marker)

    # Keep column order stable
    csv_buf = io.StringIO()
    df.to_csv(csv_buf, index=False)
    return csv_buf.getvalue(), truncated

def summarize_df_with_llm(
    df: pd.DataFrame,
    patient_id: str,
    user_question: str,
    model: str = "gpt-4o-mini",
    max_rows: int = 200,
    null_marker: str = "‚Äî",
    max_tokens: int = 600
) -> str:
    """
    Ask the LLM to summarize a DataFrame with better error handling
    """
    if df is None or df.empty:
        return f"No data found for your question: '{user_question}'. This could mean there are no relevant records for your account, or the information might be stored differently. Please contact your healthcare provider if you believe this information should be available."

    try:
        # Check if OpenAI client is available
        if 'client' not in globals():
            logger.warning("OpenAI client not available, creating fallback summary")
            return create_structured_fallback_summary(df, user_question, patient_id)

        csv_text, truncated = df_to_csv_for_llm(df, max_rows=max_rows, null_marker=null_marker)

        # Enhanced medical-focused prompt
        user_prompt = f"""You are a medical data interpreter helping a patient understand their health records.

PATIENT CONTEXT:
- Patient ID: {patient_id}
- Patient Question: "{user_question}"
- Data Retrieved: {min(len(df), max_rows)} records{' (showing first ' + str(max_rows) + ' of ' + str(len(df)) + ')' if truncated else ''}

DATA TO INTERPRET:
{csv_text}

INSTRUCTIONS:
1. **Direct Answer**: First, directly answer the patient's specific question
2. **Key Findings**: Highlight the most important medical information
3. **Context & Trends**: Note any patterns, dates, or changes over time
4. **Patient-Friendly Language**: Use clear, non-technical terms when possible
5. **Missing Data**: If some fields show "{null_marker}", explain this means "not recorded"
6. **Medical Disclaimer**: Always end with appropriate healthcare guidance

TONE: Professional but caring, informative but not alarming

RESPONSE FORMAT:
## Your [Question Type] Summary

**Direct Answer:** [Answer the specific question]

**Key Findings:**
- [Most important points]
- [Notable values or dates]

**Additional Context:**
- [Trends, patterns, or explanations]

**Next Steps:**
- Consult your healthcare provider for detailed interpretation
- [Any specific recommendations based on the data]

Remember: This information is from your medical records but should not replace professional medical advice."""

        resp = client.chat.completions.create(
            model=model,
            temperature=0.1,
            messages=[
                {"role": "system", "content": "You are a helpful medical data interpreter who explains patient health records in clear, patient-friendly language while maintaining medical accuracy."},
                {"role": "user", "content": user_prompt},
            ],
            max_tokens=max_tokens,
        )

        llm_response = resp.choices[0].message.content.strip()

        # Validate the response isn't too generic
        if len(llm_response) < 100 or 'technical issue' in llm_response.lower():
            logger.warning("LLM response seems inadequate, using structured fallback")
            return create_structured_fallback_summary(df, user_question, patient_id)

        return llm_response

    except Exception as e:
        logger.error(f"Error in LLM summarization: {e}")
        return create_structured_fallback_summary(df, user_question, patient_id)

def create_structured_fallback_summary(df: pd.DataFrame, user_question: str, patient_id: str) -> str:
    """
    Create a comprehensive structured summary when LLM fails.
    This provides much better fallback than the basic summary.
    """
    try:
        record_count = len(df)
        columns = list(df.columns)
        date_columns = [col for col in columns if 'date' in col.lower() or 'time' in col.lower()]

        summary = f"""## Your Medical Records Summary

**Question:** {user_question}
**Patient ID:** {patient_id[:8]}...
**Records Found:** {record_count} entries

"""

        # Add specific insights based on question type and available data
        question_lower = user_question.lower()

        if 'condition' in question_lower or 'diagnos' in question_lower:
            summary += "### üè• Your Diagnoses\\n"
            if 'condition_name' in columns:
                conditions = df['condition_name'].value_counts().head(10)
                for condition, count in conditions.items():
                    summary += f"‚Ä¢ **{condition}** ({count} record{'s' if count > 1 else ''})\\n"
            elif 'display' in columns:
                conditions = df['display'].value_counts().head(10)
                for condition, count in conditions.items():
                    summary += f"‚Ä¢ **{condition}** ({count} record{'s' if count > 1 else ''})\\n"
            summary += "\\n"

        elif 'medication' in question_lower or 'drug' in question_lower:
            summary += "### üíä Your Medications\\n"
            if 'med_name' in columns:
                medications = df['med_name'].value_counts().head(10)
                for med, count in medications.items():
                    summary += f"‚Ä¢ **{med}** ({count} prescription{'s' if count > 1 else ''})\\n"
            summary += "\\n"

        elif 'procedure' in question_lower or 'surgery' in question_lower:
            summary += "### üî¨ Your Procedures\\n"
            if 'display' in columns:
                procedures = df['display'].value_counts().head(10)
                for proc, count in procedures.items():
                    summary += f"‚Ä¢ **{proc}** ({count} time{'s' if count > 1 else ''})\\n"
            summary += "\\n"

        elif 'vaccine' in question_lower or 'immunization' in question_lower:
            summary += "### üíâ Your Vaccinations\\n"
            if 'display' in columns:
                vaccines = df['display'].value_counts().head(10)
                for vaccine, count in vaccines.items():
                    summary += f"‚Ä¢ **{vaccine}** ({count} dose{'s' if count > 1 else ''})\\n"
            summary += "\\n"

        elif 'observation' in question_lower or 'lab' in question_lower or 'test' in question_lower:
            summary += "### üî¨ Your Lab Results & Observations\\n"
            if 'display' in columns:
                tests = df['display'].value_counts().head(10)
                for test, count in tests.items():
                    summary += f"‚Ä¢ **{test}** ({count} result{'s' if count > 1 else ''})\\n"
            summary += "\\n"

        # Add date information if available
        if date_columns and not df.empty:
            summary += "### üìÖ Timeline Information\\n"
            try:
                for date_col in date_columns[:2]:  # Show up to 2 date columns
                    dates = pd.to_datetime(df[date_col], errors='coerce').dropna()
                    if not dates.empty:
                        latest = dates.max()
                        earliest = dates.min()
                        summary += f"‚Ä¢ **{date_col.replace('_', ' ').title()}:** {earliest.strftime('%B %d, %Y')} to {latest.strftime('%B %d, %Y')}\\n"
                summary += "\\n"
            except Exception:
                pass

        # Add data structure info
        summary += f"""### üìä Data Details
‚Ä¢ **Total Records:** {len(df)}
‚Ä¢ **Information Fields:** {', '.join(columns)}
‚Ä¢ **Data Source:** Electronic Health Records Database

### üè• Next Steps
1. **Review the Raw Data:** Check the "Raw Data" tab above for complete details
2. **Consult Your Healthcare Provider:** For medical interpretation and guidance
3. **Ask Follow-up Questions:** You can ask more specific questions about this data

### ‚ö†Ô∏è Important Medical Disclaimer
This information is retrieved directly from your electronic health records but should be interpreted by qualified healthcare professionals. Always consult with your healthcare provider for medical advice, diagnosis, or treatment decisions.

*Note: This summary was generated using structured data analysis. For AI-powered interpretation, ensure your OpenAI API connection is properly configured.*"""

        return summary

    except Exception as e:
        logger.error(f"Error creating structured fallback summary: {e}")
        return f"""## Summary for: {user_question}

**Records Found:** {len(df) if df is not None else 0} entries

We successfully retrieved your medical records but encountered an issue creating the detailed summary.

**Available Data:** {', '.join(df.columns) if df is not None and not df.empty else 'No data columns'}

**Next Steps:**
- Review the complete data in the "Raw Data" tab above
- Consult your healthcare provider for interpretation of this medical information

**Technical Note:** There was an issue with both AI summarization and structured analysis. Please check system logs or contact technical support."""

def patient_query_with_summary(user_question: str, patient_id: str, k: int = 5, max_tokens: int = 800):
    """
    COMPLETE WORKFLOW: Query + Results + Natural Language Summary
    With improved error handling and fallback mechanisms.
    """
    print(f" Processing: {user_question}")
    print(f" Patient: {patient_id}")
    print("-" * 50)

    start_time = time.time()

    try:
        # Step 1: Generate SQL and get results
        sql, rows = answer_patient_question(user_question, patient_id, k=k, max_tokens=max_tokens)

        if sql is None:
            return {
                'success': False,
                'error': 'Failed to generate SQL query - check FAISS vector store and LLM connection',
                'question': user_question,
                'patient_id': patient_id
            }

        print(f"üìä Found {len(rows)} records")

        # Step 2: Convert to DataFrame for analysis
        if rows:
            df = pd.DataFrame(rows)
            print(f"üìã Data columns: {list(df.columns)}")
        else:
            df = pd.DataFrame()

        # Step 3: Generate patient-friendly summary with improved error handling
        try:
            summary = summarize_df_with_llm(
                df,
                patient_id=patient_id,
                user_question=user_question,
                model=LLM_MODEL,
                max_tokens=600
            )
            print("‚úÖ Summary generated successfully")
        except Exception as summary_error:
            print(f" Summary generation failed: {summary_error}")
            summary = create_structured_fallback_summary(df, user_question, patient_id)

        execution_time = time.time() - start_time

        # Step 4: Return comprehensive results
        result = {
            'success': True,
            'question': user_question,
            'patient_id': patient_id,
            'sql_generated': sql,
            'records_found': len(rows),
            'data_summary': summary,
            'raw_data': rows,
            'execution_time': execution_time
        }

        print(f"‚úÖ Query completed in {execution_time:.2f}s")

        return result

    except Exception as e:
        execution_time = time.time() - start_time
        logger.error(f"Complete workflow error: {e}")

        return {
            'success': False,
            'error': str(e),
            'question': user_question,
            'patient_id': patient_id,
            'execution_time': execution_time
        }

# Production logging
logger.info("‚úÖ Summarization system loaded")


# Gradio UI

In [None]:
# ENHANCED GRADIO PATIENT PORTAL UI
#!pip -q install gradio

import pandas as pd
import gradio as gr
import time

def production_patient_portal(patient_id: str, user_question: str, k: int = 5, max_tokens: int = 800):
    """
    PRODUCTION PATIENT PORTAL: Complete workflow with improved error handling and user experience.
    """
    if not patient_id or not user_question:
        return (
            " Please provide both Patient ID and your question.",
            pd.DataFrame(),
            "Both fields are required to process your request.",
            ""
        )

    # Validate input format
    if len(patient_id.strip()) < 10:
        return (
            " Patient ID appears to be too short. Please check your Patient ID.",
            pd.DataFrame(),
            "Patient IDs are typically longer identifiers. Please verify with your healthcare provider.",
            ""
        )

    try:
        start_time = time.time()

        # Process the complete workflow
        result = patient_query_with_summary(
            user_question.strip(),
            patient_id.strip(),
            k=k,
            max_tokens=max_tokens
        )

        if not result['success']:
            error_msg = result.get('error', 'Unknown error occurred')
            return (
                f"‚ùå Query Error: {error_msg}",
                pd.DataFrame(),
                "We encountered an issue processing your request. Please try again or contact support if the problem persists.",
                f"‚ùå Error: {error_msg}"
            )

        # Prepare outputs
        sql_display = f"```sql\\n{result['sql_generated']}\\n```"

        # Create DataFrame for display
        if result['raw_data']:
            df = pd.DataFrame(result['raw_data'])
            # Limit display to reasonable size
            display_df = df.head(50) if len(df) > 50 else df
        else:
            display_df = pd.DataFrame({'Message': ['No records found for your query']})

        # Checking summary
        data_summary = result.get('data_summary', '')

        # Check if we got a fallback summary (indicating LLM failure)
        is_fallback = ('technical issue generating the detailed summary' in data_summary or
                      'Summary for:' in data_summary and len(data_summary) < 500)

        if is_fallback and result['raw_data']:
            # Try to create a better summary directly
            enhanced_summary = create_enhanced_summary(
                df, user_question, patient_id, result['records_found']
            )
            data_summary = enhanced_summary

        # Summary with metadata
        summary_with_meta = f"""## Query Results for Patient {patient_id[:8]}...

{data_summary}

---
**Query Information:**
- Records Found: {result['records_found']}
- Processing Time: {result['execution_time']:.2f} seconds
- Query Type: SQL-RAG (Natural Language to Database)

*This information is retrieved directly from your medical records database.*
"""

        return (
            sql_display,
            display_df,
            summary_with_meta,
            f"‚úÖ Success - Found {result['records_found']} records in {result['execution_time']:.2f}s"
        )

    except Exception as e:
        execution_time = time.time() - start_time
        logger.error(f"UI error: {e}")
        return (
            f"‚ùå System Error: {str(e)}",
            pd.DataFrame({'Error': ['System encountered an unexpected error']}),
            f"""## System Error

We apologize, but there was a technical issue processing your request.

**Error Details:** {str(e)}

**Possible causes:**
- Database connectivity issues
- Invalid Patient ID format
- Query complexity exceeded limits
- OpenAI API issues

**Next steps:**
- Verify your Patient ID is correct
- Try a simpler question
- Contact technical support if issues persist

**Processing Time:** {execution_time:.2f} seconds
""",
            f"‚ùå Error occurred during processing: {str(e)}"
        )

def create_enhanced_summary(df: pd.DataFrame, user_question: str, patient_id: str, record_count: int) -> str:
    """
    Create an enhanced summary when LLM summarization fails.
    This provides more detailed fallback than the basic summary.
    """
    try:
        # Analyze the data structure
        columns = list(df.columns)
        date_columns = [col for col in columns if 'date' in col.lower() or 'time' in col.lower()]

        summary = f"""## Your Medical Records Summary

**Question:** {user_question}
**Records Found:** {record_count} entries in your medical history

"""

        # Add specific insights based on question type
        question_lower = user_question.lower()

        if 'condition' in question_lower or 'diagnos' in question_lower:
            summary += "**Your Diagnoses:**\\n"
            if 'condition_name' in columns or 'display' in columns:
                condition_col = 'condition_name' if 'condition_name' in columns else 'display'
                conditions = df[condition_col].unique()[:10]  # Top 10
                for i, condition in enumerate(conditions, 1):
                    summary += f"{i}. {condition}\\n"
            summary += "\\n"

        elif 'medication' in question_lower or 'drug' in question_lower:
            summary += "**Your Medications:**\\n"
            if 'med_name' in columns:
                medications = df['med_name'].unique()[:10]
                for i, med in enumerate(medications, 1):
                    summary += f"{i}. {med}\\n"
            summary += "\\n"

        elif 'procedure' in question_lower or 'surgery' in question_lower:
            summary += "**Your Procedures:**\\n"
            if 'display' in columns:
                procedures = df['display'].unique()[:10]
                for i, proc in enumerate(procedures, 1):
                    summary += f"{i}. {proc}\\n"
            summary += "\\n"

        elif 'vaccine' in question_lower or 'immunization' in question_lower:
            summary += "**Your Vaccinations:**\\n"
            if 'display' in columns:
                vaccines = df['display'].unique()[:10]
                for i, vaccine in enumerate(vaccines, 1):
                    summary += f"{i}. {vaccine}\\n"
            summary += "\\n"

        # Add date information if available
        if date_columns and not df.empty:
            try:
                latest_date = None
                earliest_date = None

                for date_col in date_columns:
                    dates = pd.to_datetime(df[date_col], errors='coerce').dropna()
                    if not dates.empty:
                        col_latest = dates.max()
                        col_earliest = dates.min()

                        if latest_date is None or col_latest > latest_date:
                            latest_date = col_latest
                        if earliest_date is None or col_earliest < earliest_date:
                            earliest_date = col_earliest

                if latest_date and earliest_date:
                    summary += f"""**Timeline:**
- Most Recent Entry: {latest_date.strftime('%B %d, %Y')}
- Earliest Entry: {earliest_date.strftime('%B %d, %Y')}
- Time Span: {(latest_date - earliest_date).days} days

"""
            except Exception:
                pass

        # Add data structure info
        summary += f"""**Data Details:**
- Available Information: {', '.join(columns)}
- Total Records: {len(df)}

**Next Steps:**
Please review the detailed data in the "Raw Data" tab above. For medical interpretation and next steps, consult with your healthcare provider.

**Important:** This is your actual medical data retrieved from electronic health records. While we've organized it for you, always discuss these findings with your healthcare team for proper medical guidance."""

        return summary

    except Exception as e:
        logger.error(f"Error creating enhanced summary: {e}")
        return f"""## Summary for: {user_question}

**Records Found:** {record_count} entries

We successfully retrieved your medical records, but encountered an issue creating the detailed summary. Please review the data in the "Raw Data" tab for the complete information.

**Next Steps:** Consult your healthcare provider for interpretation of this medical information."""

# Enhanced Gradio Interface with Better Error Handling
with gr.Blocks(
    css="""
    footer {visibility: hidden}
    .gradio-container {max-width: 1200px !important}
    .patient-header {background: linear-gradient(45deg, #2196F3, #21CBF3); color: white; padding: 20px; border-radius: 10px; margin-bottom: 20px;}
    .error-box {background: #ffebee; border: 1px solid #f44336; padding: 15px; border-radius: 5px; color: #d32f2f;}
    .success-box {background: #e8f5e8; border: 1px solid #4caf50; padding: 15px; border-radius: 5px; color: #2e7d32;}
    """,
    title="MedIntellAgent - Patient Portal"
) as demo:

    # Header
    gr.HTML("""
    <div class="patient-header">
        <h1>üè• MedIntellAgent - Patient Medical Records Portal</h1>
        <p>Ask questions about your medical history in natural language. Your data is retrieved securely from your electronic health records.</p>
    </div>
    """)


    # Input Section
    with gr.Row():
        with gr.Column(scale=1):
            patient_input = gr.Textbox(
                label="Patient ID",
                placeholder="Enter your unique Patient ID (e.g., 8c8e1c9a-b310-43c6-33a7-ad11bad21c40)",
                info="This is provided by your healthcare provider",
                value="8c8e1c9a-b310-43c6-33a7-ad11bad21c40"  # Pre-fill for testing
            )

    question_input = gr.Textbox(
        label="Your Medical Question",
        lines=3,
        placeholder="Examples:\\n‚Ä¢ What medications am I currently taking?\\n‚Ä¢ What vaccines have I received?\\n‚Ä¢ What conditions have I been diagnosed with?\\n‚Ä¢ Show me my recent lab results",
        info="Ask about your medications, conditions, lab results, procedures, vaccines, or appointments"
    )

    # Quick Question Buttons
    with gr.Row():
        medications_btn = gr.Button("üíä My Medications", size="sm")
        conditions_btn = gr.Button("üè• My Conditions", size="sm")
        vaccines_btn = gr.Button("üíâ My Vaccines", size="sm")
        procedures_btn = gr.Button("üî¨ My Procedures", size="sm")

    # Advanced Options (collapsed by default)
    with gr.Accordion("Advanced Options", open=False):
        with gr.Row():
            k_input = gr.Slider(
                1, 10, value=5, step=1,
                label="Query Precision (k)",
                info="Higher values may provide more accurate SQL generation"
            )
            max_tokens_input = gr.Slider(
                200, 1500, value=800, step=100,
                label="Response Detail Level",
                info="Higher values allow for more detailed responses"
            )

    # Action Button
    submit_btn = gr.Button("üîç Search My Medical Records", variant="primary", size="lg")

    # Output Section
    gr.Markdown("## Query Results")

    with gr.Row():
        with gr.Column(scale=1):
            status_output = gr.Textbox(label="Status", interactive=False)

    with gr.Tabs():
        with gr.TabItem("üìã Patient Summary"):
            summary_output = gr.Markdown(label="Medical Summary")

        with gr.TabItem("üìä Raw Data"):
            table_output = gr.Dataframe(
                label="Database Results",
                interactive=False,
                wrap=True,
            )

        with gr.TabItem("üîç Generated SQL"):
            sql_output = gr.Code(label="SQL Query", language="sql")

    # Event handlers for quick buttons
    medications_btn.click(
        lambda: "What medications am I currently taking?",
        outputs=question_input
    )
    conditions_btn.click(
        lambda: "What conditions have I been diagnosed with?",
        outputs=question_input
    )
    vaccines_btn.click(
        lambda: "What vaccines have I received?",
        outputs=question_input
    )
    procedures_btn.click(
        lambda: "What procedures have I had?",
        outputs=question_input
    )

    # Main submission handler
    submit_btn.click(
        fn=production_patient_portal,
        inputs=[patient_input, question_input, k_input, max_tokens_input],
        outputs=[sql_output, table_output, summary_output, status_output]
    )

    # Privacy and Security Notice
    gr.HTML("""
    <div style="background: #f5f5f5; padding: 15px; border-radius: 5px; margin-top: 20px;">
        <h4>üîí Privacy & Security</h4>
        <ul>
            <li><strong>Secure:</strong> All queries are processed securely and logged for audit purposes</li>
            <li><strong>Private:</strong> Only you can access your medical records using your unique Patient ID</li>
            <li><strong>Accurate:</strong> Information is retrieved directly from your electronic health records</li>
            <li><strong>Support:</strong> For technical issues or questions about your data, contact your healthcare provider</li>
        </ul>
    </div>
    """)

# Launch the interface
demo.launch(
    share=True,
    show_error=True,
    debug=False
)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://4ed6866b623d533031.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


