In [104]:
import pandas as pd
import torch
import json
import chromadb
import requests
from pymongo import MongoClient
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel

In [105]:
import dns.resolver
from pymongo import MongoClient

resolver = dns.resolver.Resolver()
resolver.nameservers = ["1.1.1.1"]  # or ["8.8.8.8"]
dns.resolver.default_resolver = resolver

In [106]:
# === CONFIG ===
MONGO_URI = "mongodb+srv://ishaanroopesh0102:6eShFuC0pNnFFNGm@cluster0.biujjg4.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
UNIT_NO = 71957719
CHROMA_PATH = "../vector_db/chroma"
NUM_RESULTS = 5
OLLAMA_MODEL = "llama3"

In [107]:

# === Load BioBERT ===
print("Loading Bio ClinicalBERT...")
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
model = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
model.eval()


Loading Bio ClinicalBERT...


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(28996, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [108]:

def embed_text(text):
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        outputs = model(**inputs)
        cls_embedding = outputs.last_hidden_state[:, 0, :]
        return cls_embedding.squeeze(0).tolist()

def format_x_fields(record):
    fields = [
        "name", "unit no", "admission date", "date of birth", "sex", "service",
        "allergies", "attending", "chief complaint", "major surgical or invasive procedure",
        "history of present illness", "past medical history", "social history",
        "family history", "physical exam", "pertinent results", "medications on admission",
        "brief hospital course", "discharge medications", "discharge diagnosis",
        "discharge condition", "discharge instructions", "follow-up", "discharge disposition"
    ]
    parts = [f"{field.title()}: {record.get(field, '')}" for field in fields if record.get(field)]
    return " ".join(parts)


In [109]:

# === Step 1: Get patient record from MongoDB ===
# client = MongoClient(MONGO_URI)
client = MongoClient("mongodb+srv://ishaanroopesh0102:6eShFuC0pNnFFNGm@cluster0.biujjg4.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
record = client["hospital_db"]["test_patients"].find_one({"unit no": UNIT_NO})
if record:
    print("Patient Found")
if not record:
    raise Exception(f"❌ Patient with unit no '{UNIT_NO}' not found in MongoDB.")


query_text = format_x_fields(record)
query_embedding = embed_text(query_text)
query_text

Patient Found


"Name: Myra Shah Unit No: 71957719 Admission Date: 2022-06-22 Date Of Birth: 1977-08-01 Sex: F Service: ORTHOPAEDICS Allergies: Penicillins / Amoxicillin / Ultram / hydrocodone / meloxicam / \nomnipague 240 Attending: Dr. Sharma Chief Complaint: left shoulder osteoarthritis/pain Major Surgical Or Invasive Procedure: nan History Of Present Illness: nan Past Medical History: dyslipidemia, heart murmur, OSA (remote hx, resolved w/weight \nloss), migraines, spinal stenosis, vertigo, hypothyroidism, \nGERD, pancreatic cyst, anemia, depression, s/p B/L TKRs, R TSR \n(___), tonsillectomy, L hand ___ digit arthrodesis (___) Social History: nan Family History: Non-contributory Physical Exam: Well appearing in no acute distress  \n Afebrile with stable vital signs  \n Pain well-controlled  \n Respiratory: CTAB  \n Cardiovascular: RRR  \n Gastrointestinal: NT/ND  \n Genitourinary: Voiding independently  \n Neurologic: Intact with no focal deficits  \n Psychiatric: Pleasant, A&O x3  \n Musculoskel

In [110]:
# === Step 2: Query ChromaDB ===
chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
collection = chroma_client.get_or_create_collection("patient_embeddings")

results = collection.query(
    query_embeddings=[query_embedding],
    n_results=NUM_RESULTS,
    include=["documents", "metadatas"]
)

In [111]:
system_prompt = """You are an expert medical AI assistant tasked with generating a structured, clinically accurate, and concise discharge summary for the 'QUERY PATIENT'.
Base your summary on the 'INPUT PATIENT DATA' provided for the query patient.
Use the 'EXAMPLES' (past patient data and their summaries) to guide the style, tone, and appropriate level of detail for each section.

The discharge summary MUST include all the following sections. Populate each section with relevant information from the 'INPUT PATIENT DATA'.
If essential information for a required section is genuinely absent from the input for the query patient, clearly state "[Information not directly available in input]" or a similar brief note for that specific part, but still include the section header.

**REQUIRED DISCHARGE SUMMARY STRUCTURE:**

**Patient Information:** (Include Name, Unit No, Sex, Service. Extract Admission Date and Date of Birth from input if available under these headings too.)
**Admission/Discharge Dates:** (Official Admission Date, Discharge Date. If dates are also in Patient Information, ensure consistency or use the primary source.)
**Attending:**
**Chief Complaint:**
**Procedure (or Major Surgical or Invasive Procedure):**
**History:** (History of Present Illness; Past Medical History - list relevant conditions.)
**Physical Exam (on Admission):** (Summarize key objective findings by system. Be brief but clinically relevant.)
**Pertinent Results:** (Highlight truly significant lab values, imaging findings, or other test results. Categorize if appropriate and concise, e.g., "Labs:", "Imaging:". Avoid listing all normal results unless their normality is significant.)
**Brief Hospital Course:** (Provide a clear narrative of the patient's hospital stay, including key events, problems encountered, significant interventions, and patient's progress. This should be a focused summary.)
**Medications on Admission:** (List as provided in input.)
**Discharge Medications:** (List key discharge medications. Focus on changes from admission meds or new critical medications. Be concise with details unless specific counseling points are provided in input.)
**Discharge Instructions:**
[From the 'INPUT PATIENT DATA', synthesize a concise, numbered list of the MOST CRITICAL discharge instructions for the patient. Prioritize safety and essential actions. Generally include:
1.  Urgent Warning Signs: Key symptoms requiring immediate medical attention (e.g., ER visit, call physician).
2.  Activity & Self-Care: Essential restrictions or instructions (e.g., mobility, wound care, driving).
3.  Key Medication Points: Critical advice related to discharge medications (e.g., duration of anticoagulants, vital administration instructions for new meds, key opioid advice). Do not detail routine refill procedures unless explicitly part of critical discharge counseling in the input.
4.  Important Follow-up Actions: Patient-actionable follow-up (this can complement the 'Follow-up' section).
Aim for clarity, actionability, and conciseness. Ensure the level of detail matches well-crafted, brief patient instructions as seen in the examples.]
**Discharge Disposition:**
**Discharge Diagnosis:** (Primary diagnosis; pertinent secondary diagnoses.)
**Discharge Condition:** (Summarize mental status, consciousness, and activity level.)
**Follow-up:** (List specific scheduled appointments, necessary tests the patient needs to arrange, or critical monitoring tasks.)

Maintain a professional, objective medical tone throughout. Do not add conversational phrases or explanations beyond what is typical in a medical summary.
The goal is a summary that is useful for handoff and patient understanding, reflecting the style of the provided examples.
"""

# User Prompt: This provides the examples and the actual query.
user_prompt_parts = []
user_prompt_parts.append("Here are some examples of patient input data and their corresponding discharge summaries. Use these to understand the desired style, tone, and level of detail.\n")

for i in range(NUM_RESULTS):
    example_x = results["documents"][0][i]  # This is the input data for the example
    example_y = results["metadatas"][0][i].get("summary", "[No summary available for this example]") # This is the gold summary
    user_prompt_parts.append(f"--- EXAMPLE {i+1} ---")
    user_prompt_parts.append("**INPUT PATIENT DATA (Example):**")
    user_prompt_parts.append(example_x)
    user_prompt_parts.append("\n**DISCHARGE SUMMARY (Example):**")
    user_prompt_parts.append(example_y)
    user_prompt_parts.append("\n") # Extra newline for separation

user_prompt_parts.append("--- QUERY PATIENT ---")
user_prompt_parts.append("Now, using the same structured approach and level of detail, generate a discharge summary for the following patient:")
user_prompt_parts.append("**INPUT PATIENT DATA (Query):**")
user_prompt_parts.append(query_text) # Your formatted data from format_x_fields(record)
user_prompt_parts.append("\n**DISCHARGE SUMMARY (Query):**") # The LLM will complete from here

final_user_prompt = "\n".join(user_prompt_parts)

In [112]:
print("📡 Sending to LLaMA 3 via Ollama...")
response = requests.post(
    "http://localhost:11434/api/chat",
    json={
        "model": OLLAMA_MODEL,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": final_user_prompt}  # Use the combined prompt
        ],
        "stream": True # Assuming you want streaming
    }
)

# === Step 5: Parse & Print the Response ===
if response.ok:
    print("\n📝 Generated Discharge Summary:\n")
    full_response = ""
    for line in response.iter_lines(decode_unicode=True):
        if line:
            try:
                json_data = json.loads(line)
                if 'message' in json_data and 'content' in json_data['message']:
                    full_response += json_data['message']['content']
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON: {e} - {line}")
    print(full_response.strip())
else:
    print("❌ Ollama call failed:", response.text)

📡 Sending to LLaMA 3 via Ollama...

📝 Generated Discharge Summary:

Here is the discharge summary for the patient:

**Patient Information**

* Name: Myra Shah
* Unit Number: 71957719
* Admission Date: June 22, 2022
* Date of Birth: August 1, 1977
* Sex: Female
* Service: Orthopedic Surgery

**Chief Complaint**

* Left shoulder osteoarthritis/pain

**Major Surgical or Invasive Procedure**

* None (patient underwent left shoulder procedure)

**History of Present Illness**

* N/A

**Past Medical History**

* Dyslipidemia, heart murmur, OSA (resolved with weight loss), migraines, spinal stenosis, vertigo, hypothyroidism, GERD, pancreatic cyst, anemia, depression, s/p B/L TKRs, R TSR (___), tonsillectomy, L hand ___ digit arthrodesis (___)

**Physical Exam**

* Well appearing in no acute distress
* Afebrile with stable vital signs
* Pain well-controlled
* Respiratory: CTAB
* Cardiovascular: RRR
* Gastrointestinal: NT/ND
* Genitourinary: Voiding independently
* Neurologic: Intact with no foc