In [1]:
import pandas as pd
import numpy as np
import re
from sqlalchemy import create_engine
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
import ollama

PG_URL = "postgresql+psycopg2://postgres:4030@localhost:5432/omop_sandbox"
engine = create_engine(PG_URL)


In [4]:
# ============================================================================
# STEP 1 — LOAD ALL PATIENTS WITH NOTES + ICD INSOMNIA LABEL
# ============================================================================

patients = pd.read_sql("""
    WITH all_patients_with_notes AS (
        SELECT DISTINCT subject_id
        FROM mimic_omop.notes_norm
        WHERE text IS NOT NULL AND LENGTH(text) > 50
    ),
    icd_positive AS (
        SELECT DISTINCT subject_id
        FROM mimic_omop.insomnia_cohort
        WHERE insomnia_flag = 1
    )
    SELECT 
        a.subject_id,
        CASE WHEN p.subject_id IS NOT NULL THEN 1 ELSE 0 END AS icd_insomnia
    FROM all_patients_with_notes a
    LEFT JOIN icd_positive p ON a.subject_id = p.subject_id
    ORDER BY a.subject_id;
""", engine)

patients["icd_insomnia"] = patients["icd_insomnia"].astype(int)

print("Patients loaded:", len(patients))
print(patients["icd_insomnia"].value_counts())

Patients loaded: 145914
icd_insomnia
0    123830
1     22084
Name: count, dtype: int64


In [24]:
#Load gold standard ICD cohort

gold = pd.read_sql("""
    SELECT subject_id, rule_a, rule_b, rule_c, insomnia_flag
    FROM mimic_omop.insomnia_cohort;
""", engine)

for col in ["rule_a", "rule_b", "rule_c", "insomnia_flag"]:
    gold[col] = gold[col].astype(int)

print("\nGold cohort size:", len(gold))



Gold cohort size: 26785


In [6]:
full = patients.merge(gold, on="subject_id", how="left")

full = full.fillna({
    "rule_a": 0,
    "rule_b": 0,
    "rule_c": 0,
    "insomnia_flag": 0
})

# Convert to int
for col in ["rule_a","rule_b","rule_c","insomnia_flag"]:
    full[col] = full[col].astype(int)

print("\nMerged dataset:", full.shape)
print(full.head())




Merged dataset: (145914, 6)
   subject_id  icd_insomnia  rule_a  rule_b  rule_c  insomnia_flag
0    10000032             1       0       1       0              1
1    10000084             0       0       0       0              0
2    10000117             0       0       0       0              0
3    10000248             0       0       0       0              0
4    10000560             0       0       0       0              0


In [8]:
# ICD+ and ICD-
icd_pos = full[full["icd_insomnia"] == 1]
icd_neg = full[full["icd_insomnia"] == 0]

print("\nICD+ count:", len(icd_pos))
print("ICD- count:", len(icd_neg))


# ------------------------
# Helper to samp x patients safely
# ------------------------
def safe_sample(df, n):
    if len(df) >= n:
        return df.sample(n, random_state=42)
    else:
        # fallback: return all available
        return df


# ------------------------
# ICD-POSITIVE: Rule A/B/C
# ------------------------
ruleA_pool = icd_pos[icd_pos["rule_a"] == 1]
ruleB_pool = icd_pos[icd_pos["rule_b"] == 1]
ruleC_pool = icd_pos[icd_pos["rule_c"] == 1]

print("\nAvailable ICD+ by rule:")
print("Rule A:", len(ruleA_pool))
print("Rule B:", len(ruleB_pool))
print("Rule C:", len(ruleC_pool))

samp_A = safe_sample(ruleA_pool, 10)
samp_B = safe_sample(ruleB_pool, 10)
samp_C = safe_sample(ruleC_pool, 10)

icd_pos_selected = pd.concat([samp_A, samp_B, samp_C]).drop_duplicates()

print("\nSelected ICD+ patients:", len(icd_pos_selected))

# ------------------------
# ICD-NEGATIVE: Sample 30
# ------------------------
icd_neg_selected = icd_neg.sample(30, random_state=42)

print("Selected ICD- patients:", len(icd_neg_selected))


ICD+ count: 22084
ICD- count: 123830

Available ICD+ by rule:
Rule A: 2971
Rule B: 12983
Rule C: 10185

Selected ICD+ patients: 30
Selected ICD- patients: 30


In [9]:
# ------------------------
# FINAL BALANCED COHORT
# ------------------------
balanced = pd.concat([icd_pos_selected, icd_neg_selected]).sample(frac=1, random_state=42)

print("\nFinal balanced cohort size:", len(balanced))
print(balanced["icd_insomnia"].value_counts())

balanced_subjects = balanced["subject_id"].tolist()


Final balanced cohort size: 60
icd_insomnia
1    30
0    30
Name: count, dtype: int64


In [14]:
notes = pd.read_sql(f"""
    SELECT subject_id, hadm_id, text AS note_text
    FROM mimic_omop.notes_norm
    WHERE subject_id IN ({",".join(map(str, balanced_subjects))})
      AND text IS NOT NULL AND LENGTH(text) > 50;
""", engine)

print("\nNotes extracted:", len(notes))
num_patients_with_notes = notes["subject_id"].nunique()
print("Patients with usable notes:", num_patients_with_notes)



Notes extracted: 212
Patients with usable notes: 60


In [15]:
import re, json

SLEEP_TERMS = [
    "insomnia", "sleep onset", "sleep maintenance", "early awakening",
    "trouble sleeping", "difficulty sleeping", "can't sleep", "cant sleep",
    "sleep latency", "sleeplessness", "not sleeping", "poor sleep",
    "restless sleep", "hard to fall asleep", "sleep problem"
]

IMPAIR_TERMS = [
    "fatigue", "tired", "daytime sleepiness", "somnolence", "malaise",
    "irritable", "irritability", "poor concentration", "attention",
    "memory", "impaired performance", "decreased motivation",
    "errors", "accidents", "dissatisfaction with sleep",
    "low energy", "hard to concentrate", "sleepy", "tiredness"
]

PRIMARY_MED_TERMS = [
    "zolpidem", "zaleplon", "eszopiclone", "temazepam",
    "triazolam", "ramelteon", "suvorexant", "lemborexant"
]

SECONDARY_MED_TERMS = [
    "trazodone", "mirtazapine", "melatonin",
    "hydroxyzine", "doxepin"
]

def split_sentences(t):
    """Robust sentence splitter."""
    sents = re.split(r'(?<=[.!?])\s+', t.strip())
    return [s.strip()[:1000] for s in sents if len(s) > 5]
    

def is_candidate(sent):
    """Detect sentences mentioning sleep, impairment, or meds."""
    s = sent.lower()

    insomnia_specific = ["sleep", "awake", "awakening", "insomnia", "sleepless"]
    rest_specific = ["restless", "unable to rest", "difficulty resting"]
    night_specific = ["at night", "during the night", "night time", "nighttime"]

    return (
        any(t in s for t in insomnia_specific) or
        any(t in s for t in rest_specific) or
        any(t in s for t in night_specific) or
        any(t in s for t in SLEEP_TERMS) or
        any(t in s for t in IMPAIR_TERMS) or
        any(t in s for t in PRIMARY_MED_TERMS) or
        any(t in s for t in SECONDARY_MED_TERMS)
    )


In [16]:
# Add a note_rowid if missing (unique per note)
notes = notes.reset_index(drop=True)
notes["note_rowid"] = notes.index

rows = []

for _, r in notes.iterrows():
    sents = split_sentences(r["note_text"])
    for i, s in enumerate(sents):

        if is_candidate(s):
            rows.append({
                "subject_id": r["subject_id"],
                "hadm_id": r["hadm_id"],
                "note_rowid": r["note_rowid"],
                "sent_id": i,
                "text_span": s
            })

cands = pd.DataFrame(rows)
print(f"Extracted {len(cands)} candidate sentences")


Extracted 505 candidate sentences


In [17]:
SYSTEM_PROMPT = """
You are a clinical NLP assistant identifying evidence of insomnia in clinical notes. 
Your task is to classify a SINGLE SENTENCE. Do NOT infer anything not explicitly stated.

---------------------------------------
INSOMNIA CRITERIA DEFINITIONS
---------------------------------------

1. Sleep Difficulty (asserts_sleep_difficulty = true):
   Indicates difficulty initiating, maintaining, or restoring sleep, OR dissatisfaction with sleep.
   Examples include:
     - "trouble sleeping", "difficulty falling asleep", "early awakening"
     - "poor sleep", "restless sleep", "can't sleep", "insomnia"

2. Daytime Impairment (asserts_daytime_impairment = true):
   Evidence that poor sleep causes daytime consequences:
     - fatigue, tiredness, sleepiness, impaired concentration, irritability, low energy
   Only mark true if the impairment is PRESENT in the sentence.

3. Primary Insomnia Medications (asserts_primary_med = true):
   Medications prescribed PRIMARILY for insomnia:
     zolpidem (Ambien), zaleplon (Sonata), eszopiclone (Lunesta),
     temazepam (Restoril), triazolam (Halcion), suvorexant (Belsomra),
     lemborexant (Dayvigo), ramelteon (Rozerem).

4. Secondary Medications (asserts_secondary_med = true):
   Medications SOMETIMES used for insomnia, **even if prescribed for another condition**:
     trazodone, mirtazapine, melatonin, hydroxyzine,
     doxepin, gabapentin, quetiapine (Seroquel),
     olanzapine (Zyprexa), clonazepam (Klonopin),
     lorazepam (Ativan), diazepam (Valium).

---------------------------------------
NEGATION HANDLING
---------------------------------------
negated = true if the sentence explicitly states the absence of sleep difficulty,
impairment, or medication usage.
Examples:
  - "denies insomnia", "no difficulty sleeping", "not taking Ambien"
Otherwise negated = false.

---------------------------------------
TEMPORALITY
---------------------------------------
Choose ONE:
  - "current": symptoms/medication clearly present now
  - "historical": symptoms/medication were in the past
  - "uncertain": unclear timing or general statements

Be inclusive: when uncertain, lean toward "current".

---------------------------------------
STRICT JSON RESPONSE
---------------------------------------
Respond ONLY with valid JSON in this exact format:

{
  "asserts_sleep_difficulty": bool,
  "asserts_daytime_impairment": bool,
  "asserts_primary_med": bool,
  "asserts_secondary_med": bool,
  "negated": bool,
  "temporality": "current" | "historical" | "uncertain"
}

---------------------------------------
EXAMPLE
---------------------------------------
Sentence: "Patient reports difficulty falling asleep and feels tired during the day."

JSON:
{
  "asserts_sleep_difficulty": true,
  "asserts_daytime_impairment": true,
  "asserts_primary_med": false,
  "asserts_secondary_med": false,
  "negated": false,
  "temporality": "current"
}

"""

def extract_json(text):
    m = re.search(r"\{.*\}", text, re.DOTALL)
    if not m:
        return {"error": "no JSON"}
    try:
        return json.loads(m.group(0))
    except:
        return {"error": "bad JSON", "raw": m.group(0)}

def classify_sentence_ollama(text):
    resp = ollama.chat(
        model="llama3:8b",
        messages=[{"role": "user", "content": SYSTEM_PROMPT + f'\nSentence: "{text}"'}]
    )
    parsed = extract_json(resp["message"]["content"])

    if "temporality" not in parsed:
        parsed["temporality"] = "uncertain"

    return parsed


# Run LLM classification
out = []
for _, r in cands.iterrows():
    y = classify_sentence_ollama(r["text_span"])
    out.append({**r, **y})

ev = pd.DataFrame(out)
print("Classified", len(ev), "sentences")


Classified 505 sentences


In [18]:
# -----------------------------------------------------------
# 1. Ensure all LLM columns exist and are boolean
# -----------------------------------------------------------
bool_cols = [
    "asserts_sleep_difficulty",
    "asserts_daytime_impairment",
    "asserts_primary_med",
    "asserts_secondary_med",
    "negated"
]

for col in bool_cols:
    if col not in ev.columns:
        ev[col] = False
    ev[col] = ev[col].fillna(False).astype(bool)

# Temporality (string column, safe)
if "temporality" not in ev.columns:
    ev["temporality"] = "uncertain"
ev["temporality"] = ev["temporality"].fillna("uncertain").astype(str)


# -----------------------------------------------------------
# 2. RAW FEATURES — DIRECT OUTPUT FROM LLM (NO FILTERING)
# -----------------------------------------------------------
ev["sleep_raw"]  = ev["asserts_sleep_difficulty"].astype(bool)
ev["impair_raw"] = ev["asserts_daytime_impairment"].astype(bool)

ev["primary_raw"]   = ev["asserts_primary_med"].astype(bool)
ev["secondary_raw"] = ev["asserts_secondary_med"].astype(bool)


# -----------------------------------------------------------
# 3. TEMPORALITY-FILTERED FEATURES (optional)
# -----------------------------------------------------------
ev["is_sleep"] = (
    ev["asserts_sleep_difficulty"] &
    ~ev["negated"]
)

ev["is_impair"] = (
    ev["asserts_daytime_impairment"] &
    ~ev["negated"]
)

ev["is_primary"] = (
    ev["asserts_primary_med"] &
    ~ev["negated"]
)

ev["is_secondary"] = (
    ev["asserts_secondary_med"] &
    ~ev["negated"]
)


# -----------------------------------------------------------
# 4. Patient-level aggregation
# -----------------------------------------------------------
agg = ev.groupby("subject_id").agg({
    "sleep_raw": "max",
    "impair_raw": "max",
    "is_primary": "max",
    "is_secondary": "max"
}).reset_index()

agg = agg.rename(columns={
    "is_primary": "primary",
    "is_secondary": "secondary"
})


# -----------------------------------------------------------
# 5. Rule Definitions (FINAL INSOMNIA RULESET)
# -----------------------------------------------------------

# Rule A — Symptoms (sleep + impairment)
agg["rule_a_text"] = (agg["sleep_raw"] & agg["impair_raw"]).astype(int)

# Rule B — Primary insomnia medications
agg["rule_b_text"] = agg["primary"].astype(int)

# Rule C — Secondary meds + symptoms
agg["rule_c_text"] = (
    agg["secondary"] &
    (agg["sleep_raw"] | agg["impair_raw"])
).astype(int)

# Any rule
agg["any_text"] = (
    agg["rule_a_text"] |
    agg["rule_b_text"] |
    agg["rule_c_text"]
).astype(int)


print("\n=== AGGREGATION COMPLETED ===")
print(agg[["subject_id", "sleep_raw", "impair_raw", "rule_a_text",
          "rule_b_text", "rule_c_text", "any_text"]].head())



=== AGGREGATION COMPLETED ===
   subject_id  sleep_raw  impair_raw  rule_a_text  rule_b_text  rule_c_text  \
0    10087943      False        True            0            0            0   
1    10267773       True        True            1            1            1   
2    10520918      False       False            0            0            0   
3    10581221       True        True            1            0            1   
4    10659371       True        True            1            1            1   

   any_text  
0         0  
1         1  
2         0  
3         1  
4         1  


  ev[col] = ev[col].fillna(False).astype(bool)
  ev[col] = ev[col].fillna(False).astype(bool)
  ev[col] = ev[col].fillna(False).astype(bool)
  ev[col] = ev[col].fillna(False).astype(bool)
  ev[col] = ev[col].fillna(False).astype(bool)


In [25]:
# ============================================================================
# STEP 7: Load gold standard and merge
# ============================================================================

# Pull gold only for the sampled patients
ids_str = ",".join(map(str, balanced))


# Rename rule columns
gold = gold.rename(columns={
    "rule_a": "rule_a_gold",
    "rule_b": "rule_b_gold",
    "rule_c": "rule_c_gold",
    "insomnia_flag": "any_gold"
})

# Convert TRUE/FALSE or 0/1 to int
for col in ["rule_a_gold", "rule_b_gold", "rule_c_gold", "any_gold"]:
    gold[col] = gold[col].fillna(0).astype(int)

# Merge sampled patients with gold labels
gold_with_icd = balanced.merge(gold, on="subject_id", how="left")

# Missing entries = non-insomnia cases → set to 0
gold_with_icd[["rule_a_gold", "rule_b_gold", "rule_c_gold", "any_gold"]] = (
    gold_with_icd[["rule_a_gold", "rule_b_gold", "rule_c_gold", "any_gold"]]
    .fillna(0).astype(int)
)

# Merge LLM predictions
df = gold_with_icd.merge(agg, on="subject_id", how="left").fillna(0)

# Convert predictions to int
for col in ["rule_a_text", "rule_b_text", "rule_c_text", "any_text"]:
    df[col] = df[col].astype(int)

print(f"Final merged dataframe shape: {df.shape}")


Final merged dataframe shape: (60, 18)


In [26]:
# ============================================================================
# STEP 8: Evaluation
# ============================================================================

print("\n" + "="*70)
print("EVALUATION RESULTS - SENTENCE-BASED APPROACH")
print("="*70)

def evaluate(true, pred, label):
    print(f"\n=== {label} ===")
    cm = confusion_matrix(true, pred)
    print("Confusion Matrix:")
    print(cm)

    prec = precision_score(true, pred, zero_division=0)
    rec = recall_score(true, pred, zero_division=0)
    f1 = f1_score(true, pred, zero_division=0)

    print(f"Precision: {prec:.3f}")
    print(f"Recall:    {rec:.3f}")
    print(f"F1 Score:  {f1:.3f}")

    return {"precision": prec, "recall": rec, "f1": f1, "cm": cm}

results = {}
results["Rule A"] = evaluate(df["rule_a_gold"], df["rule_a_text"], "Rule A (Symptoms)")
results["Rule B"] = evaluate(df["rule_b_gold"], df["rule_b_text"], "Rule B (Primary Meds)")
results["Rule C"] = evaluate(df["rule_c_gold"], df["rule_c_text"], "Rule C (Secondary Meds)")
results["Any Rule"] = evaluate(df["any_gold"], df["any_text"], "Any Rule (Insomnia)")



EVALUATION RESULTS - SENTENCE-BASED APPROACH

=== Rule A (Symptoms) ===
Confusion Matrix:
[[38  9]
 [ 4  9]]
Precision: 0.500
Recall:    0.692
F1 Score:  0.581

=== Rule B (Primary Meds) ===
Confusion Matrix:
[[35 10]
 [ 6  9]]
Precision: 0.474
Recall:    0.600
F1 Score:  0.529

=== Rule C (Secondary Meds) ===
Confusion Matrix:
[[36  6]
 [ 5 13]]
Precision: 0.684
Recall:    0.722
F1 Score:  0.703

=== Any Rule (Insomnia) ===
Confusion Matrix:
[[27  3]
 [ 6 24]]
Precision: 0.889
Recall:    0.800
F1 Score:  0.842


In [27]:
# ============================================================================
# STEP 9: Summary Comparison Table
# ============================================================================

comparison = pd.DataFrame({
    'Rule': ['A: Symptoms', 'B: Primary Meds', 'C: Secondary Meds', 'Any Rule'],
    'Gold +': [
        df['rule_a_gold'].sum(),
        df['rule_b_gold'].sum(),
        df['rule_c_gold'].sum(),
        df['any_gold'].sum()
    ],
    'LLM +': [
        df['rule_a_text'].sum(),
        df['rule_b_text'].sum(),
        df['rule_c_text'].sum(),
        df['any_text'].sum()
    ]
})

comparison["TP"] = [
    ((df['rule_a_gold']==1) & (df['rule_a_text']==1)).sum(),
    ((df['rule_b_gold']==1) & (df['rule_b_text']==1)).sum(),
    ((df['rule_c_gold']==1) & (df['rule_c_text']==1)).sum(),
    ((df['any_gold']==1) & (df['any_text']==1)).sum()
]

comparison["FN"] = [
    ((df['rule_a_gold']==1) & (df['rule_a_text']==0)).sum(),
    ((df['rule_b_gold']==1) & (df['rule_b_text']==0)).sum(),
    ((df['rule_c_gold']==1) & (df['rule_c_text']==0)).sum(),
    ((df['any_gold']==1) & (df['any_text']==0)).sum()
]

comparison["FP"] = [
    ((df['rule_a_gold']==0) & (df['rule_a_text']==1)).sum(),
    ((df['rule_b_gold']==0) & (df['rule_b_text']==1)).sum(),
    ((df['rule_c_gold']==0) & (df['rule_c_text']==1)).sum(),
    ((df['any_gold']==0) & (df['any_text']==1)).sum()
]

comparison["TN"] = [
    ((df['rule_a_gold']==0) & (df['rule_a_text']==0)).sum(),
    ((df['rule_b_gold']==0) & (df['rule_b_text']==0)).sum(),
    ((df['rule_c_gold']==0) & (df['rule_c_text']==0)).sum(),
    ((df['any_gold']==0) & (df['any_text']==0)).sum()
]

comparison["Precision"] = comparison["TP"] / (comparison["TP"] + comparison["FP"])
comparison["Recall"] = comparison["TP"] / (comparison["TP"] + comparison["FN"])
comparison["F1"] = 2 * (comparison["Precision"] * comparison["Recall"]) / (comparison["Precision"] + comparison["Recall"])
comparison["Accuracy"] = (comparison["TP"] + comparison["TN"]) / len(df)

print("\nSUMMARY TABLE\n")
print(comparison.round(3).to_string(index=False))



SUMMARY TABLE

             Rule  Gold +  LLM +  TP  FN  FP  TN  Precision  Recall    F1  Accuracy
      A: Symptoms      13     18   9   4   9  38      0.500   0.692 0.581     0.783
  B: Primary Meds      15     19   9   6  10  35      0.474   0.600 0.529     0.733
C: Secondary Meds      18     19  13   5   6  36      0.684   0.722 0.703     0.817
         Any Rule      30     27  24   6   3  27      0.889   0.800 0.842     0.850


In [29]:
# ============================================================================
# STEP 12: Save Results (fix all types before parquet)
# ============================================================================

print("\n" + "="*70)
print("CLEANING DATA TYPES BEFORE SAVING")
print("="*70)

# ---------------------------------------------------------------------------
# 1) Clean EV dataframe
# ---------------------------------------------------------------------------
ev = ev.copy()

bool_like_cols_ev = [
    "is_sleep", "is_impair", "is_primary", "is_secondary",
    "asserts_sleep_difficulty", "asserts_daytime_impairment",
    "asserts_primary_med", "asserts_secondary_med",
    "negated"
]

for col in bool_like_cols_ev:
    if col in ev.columns:
        ev[col] = ev[col].astype(bool).astype("int8")

# Convert temporality to string (Parquet requires consistent dtype)
if "temporality" in ev.columns:
    ev["temporality"] = ev["temporality"].astype(str)

# Convert remaining objects to string
for col in ev.columns:
    if ev[col].dtype == "object":
        ev[col] = ev[col].astype(str)


# ---------------------------------------------------------------------------
# 2) Clean AGG dataframe
# ---------------------------------------------------------------------------
agg = agg.copy()

int_cols_agg = ["rule_a_text", "rule_b_text", "rule_c_text", "any_text",
                "is_sleep", "is_impair", "is_primary", "is_secondary"]

for col in int_cols_agg:
    if col in agg.columns:
        agg[col] = agg[col].astype(int)

# Convert remaining objects to string
for col in agg.columns:
    if agg[col].dtype == "object":
        agg[col] = agg[col].astype(str)


# ---------------------------------------------------------------------------
# 3) Clean DF dataframe (main evaluation table)
# ---------------------------------------------------------------------------
df = df.copy()

rule_cols = [
    "rule_a_text", "rule_b_text", "rule_c_text", "any_text",
    "rule_a_gold", "rule_b_gold", "rule_c_gold", "any_gold",
    "icd_insomnia"
]

for col in rule_cols:
    if col in df.columns:
        df[col] = df[col].astype(int)

# Convert any remaining objects into strings
for col in df.columns:
    if df[col].dtype == "object":
        df[col] = df[col].astype(str)


# ---------------------------------------------------------------------------
# SAVE OUTPUT FILES
# ---------------------------------------------------------------------------

notes.to_parquet("notes_sample_balanced.parquet", engine="fastparquet", index=False)
ev.to_parquet("ev_sentence_level_balanced.parquet", engine="fastparquet", index=False)
agg.to_parquet("agg_patient_level_balanced.parquet", engine="fastparquet", index=False)
df.to_parquet("df_evaluation_balanced.parquet", engine="fastparquet", index=False)

false_neg = df[(df["any_gold"] == 1) & (df["any_text"] == 0)]
false_pos = df[(df["any_gold"] == 0) & (df["any_text"] == 1)]

false_neg.to_csv("false_negatives_balanced.csv", index=False)
false_pos.to_csv("false_positives_balanced.csv", index=False)

print("\nAll results saved successfully!")
print("Files created:")
print("  - notes_sample_balanced.parquet")
print("  - ev_sentence_level_balanced.parquet")
print("  - agg_patient_level_balanced.parquet")
print("  - df_evaluation_balanced.parquet")
print("  - false_negatives_balanced.csv")
print("  - false_positives_balanced.csv")



CLEANING DATA TYPES BEFORE SAVING

All results saved successfully!
Files created:
  - notes_sample_balanced.parquet
  - ev_sentence_level_balanced.parquet
  - agg_patient_level_balanced.parquet
  - df_evaluation_balanced.parquet
  - false_negatives_balanced.csv
  - false_positives_balanced.csv
