In [1]:
from sklearn.metrics import classification_report, confusion_matrix
import json
from pathlib import Path
from typing import Dict, Any, List
import pandas as pd

In [2]:
def load_gold_omap_xlsx(path: str) -> pd.DataFrame:
    df = pd.read_excel(path, dtype=str).fillna("")

    # --- Split OMOP entity + attribute ---
    df[["entity", "attribute"]] = df["omop"].str.split("-", n=1, expand=True)

    # --- Split MIMIC table + column ---
    df[["gold_table", "gold_column"]] = df["table"].str.split("-", n=1, expand=True)

    # Keep only needed columns
    df = df[["entity", "attribute", "gold_table", "gold_column", "label"]]

    return df


In [3]:
def evaluate_mapping(mapping_json_path, gold_file_path):
    if not gold_file_path:
        print("No gold file provided — skipping evaluation.")
        return

    print(f"\n=== Running Evaluation using GOLD dataset: {gold_file_path} ===")

    # ---- Load predictions ----
    with open(mapping_json_path, "r", encoding="utf-8") as f:
        pred = json.load(f)

    pred_entities = pred["mapping"]

    # ---- Load gold ----
    file_ext = gold_file_path.split(".")[-1].lower()

    if file_ext == "xlsx":
        raw = pd.read_excel(gold_file_path, dtype=str).fillna("")
        gold = load_gold_omap_xlsx(gold_file_path)
    elif file_ext == "csv":
        gold = pd.read_csv(gold_file_path, dtype=str)
    else:
        raise ValueError(f"Unsupported gold file type: {file_ext}")
    
    gold["entity"] = gold["entity"].str.lower()
    gold["attribute"] = gold["attribute"].str.lower()
    gold["gold_table"] = gold["gold_table"].str.lower()
    gold["gold_column"] = gold["gold_column"].str.lower()

    # ---- Build prediction DF ----
    rows = []
    for ent in pred_entities:
        e = ent["entity"].lower()
        matched_table = ent.get("matched_table") or ""

        # Table-level prediction
        rows.append({
            "entity": e,
            "attribute": None,
            "pred_table": matched_table,
            "pred_column": None
        })

        for attr in ent["attributes"]:
            rows.append({
                "entity": e,
                "attribute": attr["name"].lower(),
                "pred_table": matched_table,
                "pred_column": (attr.get("target_column") or "")
            })

    pred_df = pd.DataFrame(rows)

    # ---- Table-level join ----
    gold_tables = gold[["entity", "gold_table"]].drop_duplicates()
    table_eval = gold_tables.merge(
        pred_df[pred_df["attribute"].isna()],
        on="entity", how="left"
    )
    table_eval["y_true"] = table_eval["gold_table"]
    table_eval["y_pred"] = table_eval["pred_table"]

    # ---- Column-level join ----
    gold_cols = gold.dropna(subset=["attribute"])
    col_eval = gold_cols.merge(
        pred_df[pred_df["attribute"].notna()],
        on=["entity", "attribute"], how="left"
    )
    col_eval["y_true"] = col_eval["gold_column"]
    col_eval["y_pred"] = col_eval["pred_column"]

    # ---- Metrics ----
    print("\n>>> TABLE-LEVEL METRICS")
    table_report = classification_report(table_eval["y_true"], table_eval["y_pred"], zero_division=0)
    print("\n" + table_report)
    table_cm = confusion_matrix(table_eval["y_true"], table_eval["y_pred"])
    print(f"Table-level Confusion Matrix:\n{table_cm}")

    print("\n>>> COLUMN-LEVEL METRICS")
    col_report = classification_report(col_eval["y_true"], col_eval["y_pred"], zero_division=0)
    print("\n" + col_report)
    col_cm = confusion_matrix(col_eval["y_true"], col_eval["y_pred"])
    print(f"Column-level Confusion Matrix:\n{col_cm}")

    # ---- Save evaluation artifacts ----
    eval_dir = Path("artifacts/evaluation")
    eval_dir.mkdir(parents=True, exist_ok=True)

    table_eval.to_csv(eval_dir/"table_eval.csv", index=False)
    col_eval.to_csv(eval_dir/"column_eval.csv", index=False)

    with open(eval_dir/"report.json", "w", encoding="utf-8") as f:
        json.dump({
            "table_classification_report": table_report,
            "table_confusion_matrix": table_cm.tolist(),
            "column_classification_report": col_report,
            "column_confusion_matrix": col_cm.tolist(),
        }, f, indent=2)

    print(f"\nSaved evaluation results in: {eval_dir}")


In [4]:
evaluate_mapping("./mapping_report.json", "./omop_mimic_data.xlsx")


=== Running Evaluation using GOLD dataset: ./omop_mimic_data.xlsx ===

>>> TABLE-LEVEL METRICS

                    precision    recall  f1-score   support

                         0.00      0.00      0.00         0
        admissions       0.04      0.05      0.04        21
           callout       0.00      0.00      0.00        21
        caregivers       0.00      0.00      0.00        21
       chartevents       0.04      0.05      0.04        21
         cptevents       0.04      0.05      0.04        21
             d_cpt       0.00      0.00      0.00        21
   d_icd_diagnoses       0.00      0.00      0.00        21
  d_icd_procedures       0.00      0.00      0.00        21
           d_items       0.00      0.00      0.00        21
        d_labitems       0.00      0.00      0.00        21
    datetimeevents       0.00      0.00      0.00        21
     diagnoses_icd       0.00      0.00      0.00        21
          drgcodes       0.00      0.00      0.00        21
  

In [17]:
def evaluate_mapping(mapping_json_path, gold_file_path):
    if not gold_file_path:
        print("No gold file provided — skipping evaluation.")
        return

    print(f"\n=== Running Evaluation using GOLD dataset: {gold_file_path} ===")

    # ---- Load predictions ----
    with open(mapping_json_path, "r", encoding="utf-8") as f:
        pred = json.load(f)

    pred_entities = pred["mapping"]

    # ---- Load gold ----
    file_ext = gold_file_path.split(".")[-1].lower()

    if file_ext == "xlsx":
        gold = load_gold_omap_xlsx(gold_file_path)
    elif file_ext == "csv":
        gold = pd.read_csv(gold_file_path, dtype=str)
    else:
        raise ValueError(f"Unsupported gold file type: {file_ext}")

    # Normalize fields
    gold["entity"] = gold["entity"].str.lower()
    gold["attribute"] = gold["attribute"].str.lower()
    gold["gold_table"] = gold["gold_table"].str.lower()
    gold["gold_column"] = gold["gold_column"].str.lower()

    # Convert label to int if needed
    gold["label"] = gold["label"].astype(int)

    # ---- Build prediction DF ----
    rows = []
    for ent in pred_entities:
        e = ent["entity"].lower()
        matched_table = (ent.get("matched_table") or "").lower()

        # Table-level prediction row
        rows.append({
            "entity": e,
            "attribute": None,
            "pred_table": matched_table,
            "pred_column": None
        })

        # Attribute-level prediction rows
        for attr in ent["attributes"]:
            rows.append({
                "entity": e,
                "attribute": attr["name"].lower(),
                "pred_table": matched_table,
                "pred_column": (attr.get("target_column") or "").lower()
            })

    pred_df = pd.DataFrame(rows)

    # ============================================================
    # TABLE-LEVEL BINARY EVALUATION
    # ============================================================

    gold_tables = gold[["entity", "gold_table", "label"]].drop_duplicates()

    table_eval = gold_tables.merge(
        pred_df[pred_df["attribute"].isna()],
        on="entity",
        how="left"
    )

    # y_true = gold label
    table_eval["y_true"] = table_eval["label"]

    # y_pred = 1 if predicted_table == gold_table else 0
    table_eval["y_pred"] = (table_eval["pred_table"] == table_eval["gold_table"]).astype(int)

    print("\n>>> TABLE-LEVEL METRICS (Binary)")
    table_report = classification_report(
        table_eval["y_true"], table_eval["y_pred"],
        digits=4, zero_division=0
    )
    print(table_report)

    table_cm = confusion_matrix(table_eval["y_true"], table_eval["y_pred"])
    print("Table Confusion Matrix:\n", table_cm)


    # ============================================================
    # COLUMN-LEVEL BINARY EVALUATION
    # ============================================================

    gold_cols = gold.dropna(subset=["attribute"])

    col_eval = gold_cols.merge(
        pred_df[pred_df["attribute"].notna()],
        on=["entity", "attribute"],
        how="left"
    )

    # y_true = gold label
    col_eval["y_true"] = col_eval["label"]

    # y_pred = 1 if predicted_column == gold_column else 0
    col_eval["y_pred"] = (col_eval["pred_column"] == col_eval["gold_column"]).astype(int)

    print("\n>>> COLUMN-LEVEL METRICS (Binary)")
    col_report = classification_report(
        col_eval["y_true"], col_eval["y_pred"],
        digits=4, zero_division=0
    )
    print(col_report)

    col_cm = confusion_matrix(col_eval["y_true"], col_eval["y_pred"])
    print("Column Confusion Matrix:\n", col_cm)


    # ============================================================
    # SAVE ARTIFACTS
    # ============================================================

    eval_dir = Path("artifacts/evaluation")
    eval_dir.mkdir(parents=True, exist_ok=True)

    table_eval.to_csv(eval_dir / "table_eval_binary.csv", index=False)
    col_eval.to_csv(eval_dir / "column_eval_binary.csv", index=False)

    with open(eval_dir / "binary_report.json", "w", encoding="utf-8") as f:
        json.dump({
            "table_report": table_report,
            "table_confusion_matrix": table_cm.tolist(),
            "column_report": col_report,
            "column_confusion_matrix": col_cm.tolist(),
        }, f, indent=2)

    print(f"\nSaved evaluation results to: {eval_dir}")
evaluate_mapping("./mapping_imputed.json", "./omop_mimic_data.xlsx")


=== Running Evaluation using GOLD dataset: ./omop_mimic_data.xlsx ===

>>> TABLE-LEVEL METRICS (Binary)
              precision    recall  f1-score   support

           0     0.9274    0.9733    0.9498       525
           1     0.3000    0.1304    0.1818        46

    accuracy                         0.9054       571
   macro avg     0.6137    0.5519    0.5658       571
weighted avg     0.8769    0.9054    0.8879       571

Table Confusion Matrix:
 [[511  14]
 [ 40   6]]

>>> COLUMN-LEVEL METRICS (Binary)
              precision    recall  f1-score   support

           0     0.9981    0.9972    0.9977     63951
           1     0.0378    0.0543    0.0446       129

    accuracy                         0.9953     64080
   macro avg     0.5180    0.5257    0.5211     64080
weighted avg     0.9962    0.9953    0.9957     64080

Column Confusion Matrix:
 [[63773   178]
 [  122     7]]

Saved evaluation results to: artifacts\evaluation


In [10]:
def evaluate_mapping(mapping_json_path, gold_file_path, threshold=0.90):
    if not gold_file_path:
        print("No gold file provided — skipping evaluation.")
        return

    print(f"\n=== Running Evaluation using GOLD dataset: {gold_file_path} ===")

    # ---- Load predictions ----
    with open(mapping_json_path, "r", encoding="utf-8") as f:
        pred = json.load(f)

    pred_entities = pred["mapping"]

    # ---- Load gold ----
    file_ext = gold_file_path.split(".")[-1].lower()

    if file_ext == "xlsx":
        gold = load_gold_omap_xlsx(gold_file_path)
    elif file_ext == "csv":
        gold = pd.read_csv(gold_file_path, dtype=str)
    else:
        raise ValueError(f"Unsupported gold file type: {file_ext}")

    # Normalize fields
    gold["entity"] = gold["entity"].str.lower()
    gold["attribute"] = gold["attribute"].str.lower()
    gold["gold_table"] = gold["gold_table"].str.lower()
    gold["gold_column"] = gold["gold_column"].str.lower()

    # Convert label to int if needed
    gold["label"] = gold["label"].astype(int)

    # ---- Build prediction DF ----
    rows = []
    for ent in pred_entities:
        e = ent["entity"].lower()
        matched_table = (ent.get("matched_table") or "").lower()

        # Table-level prediction row
        rows.append({
            "entity": e,
            "attribute": None,
            "pred_table": matched_table,
            "pred_column": None
        })

        # Attribute-level prediction rows
        for attr in ent["attributes"]:
            rows.append({
                "entity": e,
                "attribute": attr["name"].lower(),
                "pred_table": matched_table,
                "pred_column": (attr.get("target_column") or "").lower()
            })

    pred_df = pd.DataFrame(rows)

    # ============================================================
    # TABLE-LEVEL BINARY EVALUATION
    # ============================================================

    gold_tables = gold[["entity", "gold_table", "label"]].drop_duplicates()

    table_eval = gold_tables.merge(
        pred_df[pred_df["attribute"].isna()],
        on="entity",
        how="left"
    )
    conf_map = {e["entity"].lower(): e.get("table_confidence", 0.0)
            for e in pred_entities}

    table_eval["table_confidence"] = table_eval["entity"].map(conf_map)

    # y_true = gold label
    table_eval["y_true"] = table_eval["label"]

    table_eval["y_pred"] = (
            (table_eval["pred_table"] == table_eval["gold_table"]) &
            (table_eval["table_confidence"] >= threshold)
        ).astype(int)

    print("\n>>> TABLE-LEVEL METRICS (Binary)")
    table_report = classification_report(
        table_eval["y_true"], table_eval["y_pred"],
        digits=4, zero_division=0
    )
    print(table_report)

    table_cm = confusion_matrix(table_eval["y_true"], table_eval["y_pred"])
    print("Table Confusion Matrix:\n", table_cm)


    # ============================================================
    # COLUMN-LEVEL BINARY EVALUATION
    # ============================================================

    gold_cols = gold.dropna(subset=["attribute"])

    col_eval = gold_cols.merge(
        pred_df[pred_df["attribute"].notna()],
        on=["entity", "attribute"],
        how="left"
    )

    # y_true = gold label
    col_eval["y_true"] = col_eval["label"]

    # y_pred = 1 if predicted_column == gold_column else 0
    col_eval["y_pred"] = (col_eval["pred_column"] == col_eval["gold_column"]).astype(int)

    print("\n>>> COLUMN-LEVEL METRICS (Binary)")
    col_report = classification_report(
        col_eval["y_true"], col_eval["y_pred"],
        digits=4, zero_division=0
    )
    print(col_report)

    col_cm = confusion_matrix(col_eval["y_true"], col_eval["y_pred"])
    print("Column Confusion Matrix:\n", col_cm)


    # ============================================================
    # SAVE ARTIFACTS
    # ============================================================

    eval_dir = Path("artifacts/evaluation")
    eval_dir.mkdir(parents=True, exist_ok=True)

    table_eval.to_csv(eval_dir / "table_eval_binary.csv", index=False)
    col_eval.to_csv(eval_dir / "column_eval_binary.csv", index=False)

    with open(eval_dir / "binary_report.json", "w", encoding="utf-8") as f:
        json.dump({
            "table_report": table_report,
            "table_confusion_matrix": table_cm.tolist(),
            "column_report": col_report,
            "column_confusion_matrix": col_cm.tolist(),
        }, f, indent=2)

    print(f"\nSaved evaluation results to: {eval_dir}")
evaluate_mapping("./mapping_report.json", "./omop_mimic_data.xlsx")


=== Running Evaluation using GOLD dataset: ./omop_mimic_data.xlsx ===

>>> TABLE-LEVEL METRICS (Binary)
              precision    recall  f1-score   support

           0     0.9299    0.9848    0.9565       525
           1     0.4667    0.1522    0.2295        46

    accuracy                         0.9177       571
   macro avg     0.6983    0.5685    0.5930       571
weighted avg     0.8925    0.9177    0.8980       571

Table Confusion Matrix:
 [[517   8]
 [ 39   7]]

>>> COLUMN-LEVEL METRICS (Binary)
              precision    recall  f1-score   support

           0     0.9983    0.9994    0.9989     63951
           1     0.3710    0.1783    0.2408       129

    accuracy                         0.9977     64080
   macro avg     0.6847    0.5888    0.6199     64080
weighted avg     0.9971    0.9977    0.9973     64080

Column Confusion Matrix:
 [[63912    39]
 [  106    23]]

Saved evaluation results to: artifacts\evaluation


In [16]:
def evaluate_mapping(mapping_json_path, gold_file_path, threshold=0.25):
    if not gold_file_path:
        print("No gold file provided — skipping evaluation.")
        return

    print(f"\n=== Running Evaluation using GOLD dataset: {gold_file_path} ===")

    # ---- Load predictions ----
    with open(mapping_json_path, "r", encoding="utf-8") as f:
        pred = json.load(f)

    pred_entities = pred["mapping"]

    # ---- Load gold ----
    file_ext = gold_file_path.split(".")[-1].lower()

    if file_ext == "xlsx":
        gold = load_gold_omap_xlsx(gold_file_path)
    elif file_ext == "csv":
        gold = pd.read_csv(gold_file_path, dtype=str)
    else:
        raise ValueError(f"Unsupported gold file type: {file_ext}")

    # Normalize fields
    gold["entity"] = gold["entity"].str.lower()
    gold["attribute"] = gold["attribute"].str.lower()
    gold["gold_table"] = gold["gold_table"].str.lower()
    gold["gold_column"] = gold["gold_column"].str.lower()

    # Convert label to int if needed
    gold["label"] = gold["label"].astype(int)

    # ---- Build prediction DF ----
    rows = []
    for ent in pred_entities:
        e = ent["entity"].lower()
        matched_table = (ent.get("matched_table") or "").lower()

        # Table-level prediction row
        rows.append({
            "entity": e,
            "attribute": None,
            "pred_table": matched_table,
            "pred_column": None
        })

        # Attribute-level prediction rows
        for attr in ent["attributes"]:
            rows.append({
                "entity": e,
                "attribute": attr["name"].lower(),
                "pred_table": matched_table,
                "pred_column": (attr.get("target_column") or "").lower()
            })

    pred_df = pd.DataFrame(rows)

    # ============================================================
    # TABLE-LEVEL BINARY EVALUATION
    # ============================================================

    gold_tables = gold[["entity", "gold_table", "label"]].drop_duplicates()

    table_eval = gold_tables.merge(
        pred_df[pred_df["attribute"].isna()],
        on="entity",
        how="left"
    )
    conf_map = {e["entity"].lower(): e.get("table_confidence", 0.0)
            for e in pred_entities}

    table_eval["table_confidence"] = table_eval["entity"].map(conf_map)

    # y_true = gold label
    table_eval["y_true"] = table_eval["label"]

    table_eval["y_pred"] = (
            (table_eval["pred_table"] == table_eval["gold_table"]) &
            (table_eval["table_confidence"] >= threshold)
        ).astype(int)

    print("\n>>> TABLE-LEVEL METRICS (Binary)")
    table_report = classification_report(
        table_eval["y_true"], table_eval["y_pred"],
        digits=4, zero_division=0
    )
    print(table_report)

    table_cm = confusion_matrix(table_eval["y_true"], table_eval["y_pred"])
    print("Table Confusion Matrix:\n", table_cm)


    # ============================================================
    # COLUMN-LEVEL BINARY EVALUATION
    # ============================================================

    # ============================================================
# COLUMN-LEVEL BINARY EVALUATION (WITH CONFIDENCE)
# ============================================================

    gold_cols = gold.dropna(subset=["attribute"])

    # Build lookup for column confidence
    col_conf_map = {}
    for ent in pred_entities:
        e = ent["entity"].lower()
        for attr in ent["attributes"]:
            a = attr["name"].lower()
            col_conf_map[(e, a)] = attr.get("confidence", 0.0)

    col_eval = gold_cols.merge(
        pred_df[pred_df["attribute"].notna()],
        on=["entity", "attribute"],
        how="left"
    )

    # y_true = gold label
    col_eval["y_true"] = col_eval["label"]

    # Inject column confidence
    col_eval["column_confidence"] = col_eval.apply(
        lambda row: col_conf_map.get((row["entity"], row["attribute"]), 0.0),
        axis=1
    )

    # Apply threshold:
    col_threshold = threshold  # you can use config["column_threshold"] if needed

    col_eval["y_pred"] = (
        (col_eval["pred_column"] == col_eval["gold_column"]) &
        (col_eval["column_confidence"] >= col_threshold)
    ).astype(int)

    print("\n>>> COLUMN-LEVEL METRICS (Binary)")
    col_report = classification_report(
        col_eval["y_true"], col_eval["y_pred"],
        digits=4, zero_division=0
    )
    print(col_report)

    col_cm = confusion_matrix(col_eval["y_true"], col_eval["y_pred"])
    print("Column Confusion Matrix:\n", col_cm)


    # ============================================================
    # SAVE ARTIFACTS
    # ============================================================

    eval_dir = Path("artifacts/evaluation")
    eval_dir.mkdir(parents=True, exist_ok=True)

    table_eval.to_csv(eval_dir / "table_eval_binary.csv", index=False)
    col_eval.to_csv(eval_dir / "column_eval_binary.csv", index=False)

    with open(eval_dir / "binary_report.json", "w", encoding="utf-8") as f:
        json.dump({
            "table_report": table_report,
            "table_confusion_matrix": table_cm.tolist(),
            "column_report": col_report,
            "column_confusion_matrix": col_cm.tolist(),
        }, f, indent=2)

    print(f"\nSaved evaluation results to: {eval_dir}")
evaluate_mapping("./mapping_imputed.json", "./omop_mimic_data.xlsx")


=== Running Evaluation using GOLD dataset: ./omop_mimic_data.xlsx ===

>>> TABLE-LEVEL METRICS (Binary)
              precision    recall  f1-score   support

           0     0.9194    1.0000    0.9580       525
           1     0.0000    0.0000    0.0000        46

    accuracy                         0.9194       571
   macro avg     0.4597    0.5000    0.4790       571
weighted avg     0.8454    0.9194    0.8808       571

Table Confusion Matrix:
 [[525   0]
 [ 46   0]]

>>> COLUMN-LEVEL METRICS (Binary)
              precision    recall  f1-score   support

           0     0.9980    0.9988    0.9984     63951
           1     0.0256    0.0155    0.0193       129

    accuracy                         0.9968     64080
   macro avg     0.5118    0.5072    0.5089     64080
weighted avg     0.9961    0.9968    0.9964     64080

Column Confusion Matrix:
 [[63875    76]
 [  127     2]]

Saved evaluation results to: artifacts\evaluation
