In [11]:
import pandas as pd

# Global variable for storage
df = None

def load_data(filepath):
    global df
    df = pd.read_csv(filepath)
    
    # Rename 'table_name' to 'note_type' to keep function logic the same
    df.rename(columns={"table_name": "note_type"}, inplace=True)

    # Clean any stray whitespace
    df["note_type"] = df["note_type"].str.strip().str.lower()
    df["patient_id"] = df["patient_id"].astype(str).str.strip()

    # Confirm presence of required columns
    if "note_type" not in df.columns or "patient_id" not in df.columns:
        raise ValueError("CSV must contain 'note_type' and 'patient_id' columns.")
    
    print("Data loaded with", df.shape[0], "rows.")

def generate_notes_for_type(patient_id, note_type):
    if df is None:
        raise ValueError("Data not loaded. Run `load_data()` first.")
    
    patient_notes = df[
        (df["patient_id"] == patient_id) &
        (df["note_type"] == note_type)
    ]

    if patient_notes.empty:
        return f"No {note_type} notes found for patient {patient_id}."
    
    combined = "\n\n".join(patient_notes["note_text"].tolist())
    return f"{note_type.capitalize()} Notes for patient {patient_id}:\n\n{combined}"

In [12]:
load_data("synthea_notes_top42_patients.csv")

Data loaded with 45191 rows.


In [9]:
print(generate_notes_for_type("e7a5d3dc-3484-a24e-6ef3-0737b403f950", "medications"))
print(generate_notes_for_type("e7a5d3dc-3484-a24e-6ef3-0737b403f950", "conditions"))

Medications Notes for patient e7a5d3dc-3484-a24e-6ef3-0737b403f950:

patient_id: e7a5d3dc-3484-a24e-6ef3-0737b403f950; full_name: Marni Tremblay; medication_start_ts: 2024-12-13 09:00:18; birthdate: 1976-10-20; ssn: 999-36-6337; drivers: S99999779; passport: X42697334X; prefix: Mrs.; marital: D; race: white; ethnicity: nonhispanic; gender: F; birthplace: Park River  North Dakota  US; patient_address: 118 Kiehn Gardens Suite 40; patient_city: West Fargo; patient_state: North Dakota; county: Cass County; fips: 38017; patient_zip: 58078; lat: 46.893402133270634; lon: -96.86213906026983; healthcare_expenses: 157011.5; healthcare_coverage: 1055670.27; income: 29501; age_at_medication: 48; medication_code: 308136; medication_description: amLODIPine 2.5 MG Oral Tablet; medication_base_cost: 1.79; payer_coverage: 0.0; dispenses: 2; medication_total_cost: 3.58; medication_reason_code: 59621000; medication_reason_description: Essential hypertension (disorder); encounter_class: ambulatory; encoun