In [1]:
pip install pandas pyarrow fsspec huggingface_hub datasets sqlalchemy-iris llama_iris llama-index-legacy


Note: you may need to restart the kernel to use updated packages.


In [None]:
from sqlalchemy import Column, MetaData, Table, select
from sqlalchemy.sql.sqltypes import Integer
from sqlalchemy_iris import IRISVector
from sqlalchemy import create_engine,text
from sqlalchemy.orm import DeclarativeBase


# Change to your InterSystems IRIS username, password, IP address, port and Namespace
url = "iris://superuser:SYS@localhost:1972/USER"
engine = create_engine(url, echo=False)

# Create a table metadata
metadata = MetaData()

In [90]:
import getpass
import os
from dotenv import load_dotenv

# load_dotenv(override=True)

# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY_Fan")
# print(OPENAI_API_KEY)

# os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY_Fan")
OPENAI_API_KEY_Fan = os.getenv("OPENAI_API_KEY_Fan")



In [None]:
# Import the data into a Pandas DataFrame:

import pandas as pd

# Login using e.g. `huggingface-cli login` to access this dataset
df = pd.read_csv("hf://datasets/nissenbaumbrad/MergedhealthcareProjectData/clean_healthcare_data.csv")
df.head()


Unnamed: 0,PATIENT_ID,FIRST,BIRTHDATE,CLINICAL_NOTES
0,0.0,Kareem959,2019-12-03,well child visit at age 3 including medication...
1,1.0,Dante562,2006-05-26,well child visit conducted at age 7. no proced...
2,2.0,Mitchel812,1989-02-12,general exam conducted for 18-year-old patient...
3,3.0,Arie325,1975-02-15,"patient, age 4, presented to emergency room wi..."
4,4.0,Everette494,1974-02-19,patient presented to the emergency room with a...


In [None]:
# Turn Data Frame into Documents so we can send to IRIS Vector Store:

from llama_index.legacy import Document

def dataframe_to_documents(df):
    return [
        Document(
            text=f"Index: {int(row['PATIENT_ID'])}\nName: {row['FIRST']}\nDOB: {row['BIRTHDATE']}\nNotes: {row['CLINICAL_NOTES']}"
        )
        for _, row in df.iterrows()
    ]

# Use this with your DataFrame
documents = dataframe_to_documents(df)


In [93]:
print(documents[0].text[:1000]) 
len(documents)
# print(documents)

Index: 0
Name: Kareem959
DOB: 2019-12-03
Notes: well child visit at age 3 including medication reconciliation. med review due to assess current medications and overall health. follow-up well child visit with medication reconciliation performed. continued monitoring required; med review is due. routine well child visit; med review remains due for updated medication and health status assessment. standard well child visit completed with no specific procedures or medication changes reported. well child visit with medication reconciliation; patient remains stable without active conditions. well child visit focusing on medication reconciliation; med review is indicated to ensure up-to-date management. routine well child visit, with med review still pending to evaluate any ongoing medication needs. well child visit including medication reconciliation, no new issues identified. regular well child examination completed; medication review remains due. well child visit with a dental referral issu

34

In [None]:
# Use Llama Index llama IRIS to create IRIS Vector Store:

from llama_index.legacy import StorageContext,VectorStoreIndex
from llama_iris import IRISVectorStore

# StorageContext captures how vectors will be stored
vector_store = IRISVectorStore.from_params(
    connection_string = url,
    table_name = "combined_data_new",
    embed_dim = 1536,  # openai embedding dimensionality
    engine_args = { "connect_args": {"sslcontext": None} }
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [None]:
# Create the prompt, using prompt engineering to fine-tune response from LLM:

from llama_index.legacy.prompts.prompts import QuestionAnswerPrompt

custom_prompt = QuestionAnswerPrompt(
"""You are a helpful assistant analyzing healthcare patient data. Follow the structure below.

✅ Do NOT return data about deceased patients.  
✅ If the question refers to 1 patient, return ONLY that 1 patient.  
✅ If the question refers to multiple patients, return each one separately with a blank line between.  
✅ Today's date is **July 10, 2025**. Use it to calculate ages from the patient's birthdate (first field, MM/DD/YYYY).  
✅ The patient name is the **second field** in the record.

---

📋 Use this exact format for **each patient**:

Patient Name: <name>  
Patient Age: <age>  
Risk Score: <1-10>  
Risk Score Reasoning: 
<explanation>  

Resources:  
Florida Health Resources: 
<Provide the specific resources relevant to this patient here>

---

🎯 Purpose: Help caseworkers identify high-risk individuals for prioritized care.

🏥 Risk Score Guidance:  
Base the score on medical history, behavioral health, social factors (e.g., food/housing insecurity), and engagement patterns (e.g., missed appointments). Use 1 (least risk) to 10 (highest).

---

Context:
{context_str}

Question:
{query_str}

Answer:""")


In [None]:
from langchain_openai.chat_models import ChatOpenAI
from llama_index.legacy import ServiceContext

llm = ChatOpenAI(temperature=0.3, api_key=OPENAI_API_KEY_Fan, model_name="gpt-4o")


service_context = ServiceContext.from_defaults(
    llm=llm
)

index = VectorStoreIndex.from_documents(
    documents, 
    storage_context=storage_context, 
    service_context=service_context,
    show_progress=True, 
)

# Use similarity search with IRIS to return the top 5 most similar matches to the query:
query_engine = index.as_query_engine(text_qa_template=custom_prompt,
                                     similarity_top_k=5)

Parsing nodes: 100%|██████████| 34/34 [00:00<00:00, 587.59it/s]
Generating embeddings: 100%|██████████| 47/47 [00:00<00:00, 68.56it/s]


In [97]:
def rag_interface(question: str) -> str:
    try:
        response = query_engine.query(question)
        return str(response)  # don't touch formatting
    except Exception as e:
        return f"❌ Error: {e}"



In [98]:
import gradio as gr

logo_path = "https://media.licdn.com/dms/image/v2/C4E0BAQFhEni_JauxUg/company-logo_200_200/company-logo_200_200/0/1631300968602?e=2147483647&v=beta&t=K11wdBoJDGxHHjZSS2YTaKBwH-YH043hgS6LatMmkjs"

iface = gr.Interface(
    fn=rag_interface,
    inputs=gr.Textbox(label="Enter your question about the patient data:"),
    outputs=gr.Textbox(label="RAG Response:"),
    title="☤ Care 🩺 Compass 🧭",
    description="Ask healthcare-related questions based on patient clinical notes.",
    examples=[
        "Tell me about patient Kareem959",
        "Which patients are socially isolated?",
        "Who has the highest mental health risk?",
        "Who is likely to be hospitalized soon?"
    ],
    submit_btn="Ask",
    flagging_mode="never",
    article=f"""
    <hr>
    <div style="display: flex; align-items: center; gap: 10px;">
        <img src="{logo_path}" alt="InterSystems IRIS" width="60">
        <span><strong>Powered by InterSystems IRIS</strong></span>
    </div>
    """
)



iface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7884

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.







In [None]:
# try again with a different dataset:
