In [None]:
import os
import pandas as pd
import requests
import json
from tqdm import tqdm
import csv

OLLAMA_API = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "gpt-oss:20b"

def format_row_to_prompt(row):
    prompt_parts = []
    for col, val in row.items():
        val_str = str(val).strip()
        if val_str != "nan":
            prompt_parts.append(f"{col}: {val_str}")
    prompt_text = ", ".join(prompt_parts)

    return f"""
You are a clinical documentation specialist. Given the following structured data, generate a realistic clinical note.
Make the note concise, within 3-4 sentences, and ensure it includes all relevant personal and medical information naturally in the narrative.
Do not redact or sanitize any data. Include all personal and medical information naturally in the note.
Format like a medical intern’s verbose narrative. 
Here's an example of the formatted note:
Patient: Luis Garcia, born on 03/14/1982 (SSN: 442-19-6735), presented to St. Mary’s Cardiology Clinic on 09/12/2025 with sharp chest pain radiating to the left arm.  
Dr. Eleanor Kim prescribed nitroglycerin 0.4 mg sublingually and ordered an ECG; his home address is 312 Maple Avenue, Riverdale, IL 60621, and his phone is (312) 555‑0198.  
Follow‑up was scheduled for 09/19/2025 and the lab results will be reviewed in the next visit.

Structured Data:
{prompt_text}

Clinical Note:
"""

def generate_clinical_note(prompt):
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": OLLAMA_MODEL,
        "prompt": prompt,
        "stream": False
    }
    try:
        response = requests.post(OLLAMA_API, headers=headers, json=payload)
        response.raise_for_status()
        return response.json().get("response", "")
    except Exception as e:
        print(f"❌ Error generating note: {e}")
        return ""

def process_csv_file(file_path):
    print(f"\n📄 Processing: {file_path}")
    df = pd.read_csv(file_path)
    df.fillna("", inplace=True)

    target_id = "1329b83e-ea69-d184-b4af-0d2a8e07896e"
    df = df[df["patient_id"] == target_id].head(10)

    if df.empty:
        print(f"⚠️ No matching patient_id found in {file_path}")
        return

    output_csv_path = file_path.replace(".csv", f"_notes.csv")
    with open(output_csv_path, mode='w', newline='', encoding='utf-8') as f_out:
        writer = csv.DictWriter(f_out, fieldnames=["patient_id", "clinical_note"])
        writer.writeheader()

        for i, row in tqdm(df.iterrows(), total=len(df), desc=f"Generating notes for {target_id[:6]}..."):
            prompt = format_row_to_prompt(row)
            note = generate_clinical_note(prompt)
            writer.writerow({
                "patient_id": row.get("patient_id", f"row_{i}"),
                "clinical_note": note
            })

    print(f"✅ Saved clinical notes to {output_csv_path}")

def main():
    root_folder = "data"
    for dirpath, _, filenames in os.walk(root_folder):
        for file in filenames:
            if file.endswith(".csv"):
                full_path = os.path.join(dirpath, file)
                process_csv_file(full_path)

if __name__ == "__main__":
    main()

  df.fillna("", inplace=True)



📄 Processing: data/medications.csv


Generating note for patient 1329b8...:   6%|▌         | 4/71 [02:52<48:03, 43.03s/it]


KeyboardInterrupt: 