In [1]:
import pandas as pd
import kagglehub
import requests
import json
import os
import random
import ollama
import re

In [4]:
# Download MIMIC-III dataset from KaggleHub
path = kagglehub.dataset_download("asjad99/mimiciii")
print("Path to dataset files:", path)

Path to dataset files: /Users/harinivaranasi/.cache/kagglehub/datasets/asjad99/mimiciii/versions/1


In [7]:
# Define file paths
admissions_path = os.path.join(path, "mimic-iii-clinical-database-demo-1.4", "ADMISSIONS.csv")
diagnoses_path = os.path.join(path,"mimic-iii-clinical-database-demo-1.4", "DIAGNOSES_ICD.csv")
patients_path = os.path.join(path,"mimic-iii-clinical-database-demo-1.4", "PATIENTS.csv")
procedures_path = os.path.join(path,"mimic-iii-clinical-database-demo-1.4", "PROCEDUREEVENTS_MV.csv")
prescriptions_path = os.path.join(path,"mimic-iii-clinical-database-demo-1.4", "PRESCRIPTIONS.csv")
labevents_path = os.path.join(path,"mimic-iii-clinical-database-demo-1.4", "LABEVENTS.csv")
noteevents_path = os.path.join(path,"mimic-iii-clinical-database-demo-1.4", "NOTEEVENTS.csv")


# Ensure the files exist before proceeding
for file in [admissions_path, diagnoses_path, patients_path]:
    if not os.path.exists(file):
        raise FileNotFoundError(f"Missing file: {file}")


In [11]:
# Load CSVs into Pandas DataFrames
admissions_df = pd.read_csv(admissions_path)
diagnoses_df = pd.read_csv(diagnoses_path)
noteevents_df = pd.read_csv(noteevents_path)
patients_df = pd.read_csv(patients_path)
procedures_df = pd.read_csv(procedures_path)
prescriptions_df = pd.read_csv(prescriptions_path)
labevents_df = pd.read_csv(labevents_path)

In [14]:
# Select a random patient
random_subject_id = random.choice(patients_df["subject_id"].unique())

# Extract patient information
patient_info = patients_df[patients_df["subject_id"] == random_subject_id]
admission_info = admissions_df[admissions_df["subject_id"] == random_subject_id]
diagnoses_info = diagnoses_df[diagnoses_df["subject_id"] == random_subject_id]
procedures_info = procedures_df[procedures_df["subject_id"] == random_subject_id]
prescriptions_info = prescriptions_df[prescriptions_df["subject_id"] == random_subject_id]
lab_results_info = labevents_df[labevents_df["subject_id"] == random_subject_id]



In [17]:


# Select a sample patient (replace with filtering logic)
hadm_id = diagnoses_df["hadm_id"].iloc[0]
subject_id = diagnoses_df[diagnoses_df["hadm_id"] == hadm_id]["subject_id"].iloc[0]

# Extract patient information
patient_diagnoses = diagnoses_df[diagnoses_df["hadm_id"] == hadm_id]["icd9_code"].astype(str).tolist()
patient_procedures = procedures_df[procedures_df["hadm_id"] == hadm_id]["ordercategorydescription"].dropna().tolist()

# Get admission details
admission_details = admissions_df[admissions_df["hadm_id"] == hadm_id]
admission_date = admission_details["admittime"].iloc[0] if "admittime" in admission_details.columns else "Unknown"
discharge_date = admission_details["dischtime"].iloc[0] if "dischtime" in admission_details.columns else "Unknown"

# Construct a structured prompt
prompt = f"""
Generate a **medical discharge summary** based on the following patient records:

**Patient ID:** {subject_id}
**Admission ID:** {hadm_id}
**Admission Date:** {admission_date}
**Discharge Date:** {discharge_date}

**Diagnoses (ICD-9 Codes):** {', '.join(patient_diagnoses)}
**Procedures Performed:** {', '.join(patient_procedures)}

### **Discharge Summary Sections:**
- **Chief Complaint:** (Summarize based on diagnoses)
- **History of Present Illness:** (Use ICD-9 descriptions)
- **Procedures Performed:** (Summarize from procedures)
- **Hospital Course:** (Include major events)
- **Medications on Discharge:** (Use placeholders if unavailable)
- **Follow-up Instructions:** (General recommendations)

 **Evaluation Criteria with Scoring Guidelines**
Generate a **detailed and professional discharge summary** based on the above records.

After generating the discharge summary, evaluate it based on these four criteria and assign a **score (0-10) for each**, following the detailed scoring guide:

Please make sure you read and understand these instructions carefully. Please
keep this document open while reviewing,
and refer to it as needed.###

#### **1. Clinical Accuracy (0-10)**
- **10:** All diagnoses, procedures, medications, and lab results are completely accurate and well-integrated.
- **8-9:** Mostly accurate, but minor issues in phrasing or terminology.
- **6-7:** Some inaccuracies or missing minor details.
- **4-5:** Noticeable errors in medical data representation.
- **1-3:** Significant errors or contradictions in diagnoses, procedures, or medications.
- **0:** Severe misrepresentation of medical information.

#### **2. Completeness (0-10)**
- **10:** Includes all required sections (chief complaint, history, hospital course, medications, follow-up).
- **8-9:** Minor omissions that do not affect understanding.
- **6-7:** Some missing sections or incomplete descriptions.
- **4-5:** Major missing sections that impact usefulness.
- **1-3:** Very incomplete summary.
- **0:** No useful information included.

#### **3. Readability & Fluency (0-10)**
- **10:** Very clear, well-structured, professional, and easy to understand.
- **8-9:** Mostly readable, but minor grammar or structure issues.
- **6-7:** Some sections are unclear or require rewording.
- **4-5:** Difficult to read due to poor structure or unclear wording.
- **1-3:** Highly unreadable with major grammatical issues.
- **0:** Completely unreadable.

#### **4. Actionability (0-10)**
- **10:** Provides clear, detailed, and practical follow-up instructions for the patient.
- **8-9:** Actionable but missing minor details.
- **6-7:** Somewhat actionable but could be clearer.
- **4-5:** Lacks clarity or missing critical instructions.
- **1-3:** Very vague or unhelpful follow-up recommendations.
- **0:** No actionable guidance provided.

---
### **Instructions**
- **Step 1:** Generate a **detailed and professional discharge summary** based on the above records.
- **Step 2:** Evaluate the discharge summary **based on the scoring criteria** and assign a score (0-10) for each category.
- **Step 3:** Explain the reasoning behind each score.

---
### **Output Format:**
1. **Generated Discharge Summary**
2. **Evaluation Scores with Explanations**
   - **Clinical Accuracy:** (Score & Reason)
   - **Completeness:** (Score & Reason)
   - **Readability & Fluency:** (Score & Reason)
   - **Actionability:** (Score & Reason)

---
Now generate the discharge summary and provide the evaluation scores.
"""

# Query LLaMA 2-7B for text generation
response = ollama.chat(
    model="llama2:7b",
    messages=[{"role": "user", "content": prompt}]
)

# Print the generated discharge summary
print(response["message"]["content"])

### **Generated Discharge Summary**

Patient ID: 10006
Admission ID: 142345
Admission Date: 2164-10-23 21:09:00
Discharge Date: 2164-11-01 17:15:00

Chief Complaint: The patient presents with a 2-day history of worsening headache, fever, and nausea.

History of Present Illness: The patient was transferred from the emergency room due to worsening symptoms. The patient has a history of migraines but has never experienced anything like this before. The patient reported feeling dizzy and having blurred vision. The patient's vital signs were notable for a temperature of 103.5°F, blood pressure of 186/94 mmHg, and tachycardia.

Procedures Performed:

* CT scan of the brain: The results showed a large cerebral infarction in the left hemisphere.
* Blood cultures: The patient's blood cultures grew Streptococcus pneumoniae.
* Antibiotics were started for possible meningitis.

Hospital Course: The patient was admitted to the ICU due to worsening symptoms and a CT scan showing a large cerebral inf