In [6]:
# final_table_builder.py
import itertools
import pandas as pd

# -----------------------------
# 1) Configure methods & tasks
# -----------------------------
GENERAL_MODELS = ["GPT", "Gemini", "Claude", "Llava", "Qwen", "Pixtral"]
SPECIALIZED_MODELS = [
    "Llama-Surgery",  # trained for CholecT50 only
    "PeskaVLP",       # image-text matching (no segmentation)
    "RASO",           # image-text matching (no segmentation)
    "CholeNet",       # CholecOrgans presence only
    "GoNoGoNet",      # CholecGoNoGo presence only
]
ALL_METHODS = GENERAL_MODELS + SPECIALIZED_MODELS

# Tasks and which metrics they have in the main table
# You can rename the metrics for T50/SSG-VQA once decided
TASKS = {
    "CholecOrgans": ["Presence", "IoU"],
    "CholecGoNoGo": ["Presence", "IoU"],
    "CholecSeg8k":  ["Presence", "IoU"],
    "CholecT50":    ["Metric"],      # e.g., "Acc" or "F1"; set below
    "SSG-VQA":      ["Metric"],      # e.g., "Acc" or "BLEU"; set below
}

# If you already know the exact names:
T50_METRIC_NAME = "Acc"   # or "F1", "AUROC", etc.
SSGVQA_METRIC_NAME = "Acc"  # change as needed

# Normalize the metric names in columns
TASKS = {
    k: (["Presence","IoU"] if v == ["Presence","IoU"]
        else ([T50_METRIC_NAME] if k == "CholecT50" else [SSGVQA_METRIC_NAME]))
    for k, v in TASKS.items()
}

# -----------------------------
# 2) Model capability matrix
# -----------------------------
# Define which (method, task, metric) cells are valid (“applicable”) and which are N/A.
# Rules from your description:

# - General models (GPT, Gemini, Claude, Llava, Qwen, Pixtral): can do all tasks.
# - Llama-Surgery: only CholecT50 (unknown if it has presence/IoU notions there; use single metric)
# - PeskaVLP, RASO: image-text matching → can do "Presence" metrics but *cannot segment* (no IoU).
#   For tasks with Presence+IoU, Presence is applicable, IoU is N/A.
#   For single-metric tasks (T50, SSG-VQA), treat the single metric as applicable (classification/QA).
# - CholeNet: only CholecOrgans; Presence applicable, IoU depends on your pipeline; if CholeNet is classification-only, set IoU to N/A.
# - GoNoGoNet: only CholecGoNoGo; Presence applicable, IoU N/A if classification-only.

def is_applicable(method: str, task: str, metric: str) -> bool:
    if method in GENERAL_MODELS:
        return True  # all tasks & metrics
    if method == "Llama-Surgery":
        return (task == "CholecT50")  # single-metric task
    if method in ["PeskaVLP", "RASO"]:
        if task in ["CholecT50", "SSG-VQA"]:
            return True  # treat as classification/QA metric
        # Presence ok, IoU not available for segmentation
        return (metric == "Presence")
    if method == "CholeNet":
        if task == "CholecOrgans":
            # Presence OK. IoU likely not (classification model). Change to True if you compute IoU via MedSAM ablation.
            return (metric == "Presence")
        return False
    if method == "GoNoGoNet":
        if task == "CholecGoNoGo":
            return (metric == "Presence")
        return False
    return False

# -----------------------------
# 3) Ablation: MedSAM + presence-capable methods
# -----------------------------
# “MedSAM + any method that can detect the presence of something.”
# Define presence-capable per task:
def presence_capable(method: str, task: str) -> bool:
    if method in GENERAL_MODELS:
        return True
    if method == "Llama-Surgery":
        return (task == "CholecT50")  # if it predicts the label for T50
    if method in ["PeskaVLP", "RASO"]:
        return True  # presence / matching
    if method == "CholeNet":
        return (task == "CholecOrgans")
    if method == "GoNoGoNet":
        return (task == "CholecGoNoGo")
    return False

# Which ablations to include:
INCLUDE_ABLATIONS = True

def ablation_rows():
    rows = []
    for base in ALL_METHODS:
        # Create at least one ablation row if base method is presence-capable for ANY task
        tasks_with_presence = [t for t in TASKS if presence_capable(base, t)]
        if not tasks_with_presence:
            continue
        rows.append(f"MedSAM + {base}")
    return rows

ABLATION_METHODS = ablation_rows() if INCLUDE_ABLATIONS else []
ALL_ROWS = ALL_METHODS + ABLATION_METHODS

# -----------------------------
# 4) Build the table skeleton
# -----------------------------
# MultiIndex columns: (Task, Metric)
columns = pd.MultiIndex.from_tuples(
    list(itertools.chain.from_iterable(
        [ [(task, metric) for metric in TASKS[task]] for task in TASKS ]
    )),
    names=["Task", "Metric"]
)

# Initialize dataframe with empty strings for future numeric entries
df = pd.DataFrame("", index=ALL_ROWS, columns=columns)

# Fill N/A cells
NA_STR = "—"  # en-dash looks nice in booktabs tables

def mark_na_cells(frame: pd.DataFrame):
    for method in frame.index:
        # For ablation rows, IoU becomes available wherever MedSAM provides masks.
        # Rule: In ablations, Presence still comes from the base method, IoU comes from MedSAM;
        # So for tasks that originally lacked IoU (single metric) nothing changes;
        # for tasks with IoU, we now consider IoU applicable.
        is_ablation = method.startswith("MedSAM + ")
        base_name = method.replace("MedSAM + ", "") if is_ablation else method

        for (task, metric) in frame.columns:
            if is_ablation:
                # Presence applicability = presence-capable(base, task)
                presence_ok = presence_capable(base_name, task)
                if metric == "Presence":
                    if not presence_ok:
                        frame.loc[method, (task, metric)] = NA_STR
                else:
                    # IoU or single metric:
                    if metric == "IoU":
                        # With MedSAM, IoU is applicable for any task that actually has IoU
                        # (i.e., those with Presence+IoU). If the task only has a single metric,
                        # this column won't exist; for IoU column present here, set applicable=True.
                        # However, if the task does not conceptually support segmentation, you can
                        # force NA here by adding a rule (left as applicable by default).
                        pass
                    else:
                        # Single-metric tasks (T50, SSG-VQA) — ablation doesn't change applicability.
                        if not is_applicable(base_name, task, metric):
                            frame.loc[method, (task, metric)] = NA_STR
                continue

            # Non-ablation rows use the base rules
            if not is_applicable(method, task, metric):
                frame.loc[method, (task, metric)] = NA_STR

mark_na_cells(df)

# -----------------------------
# 5) (Optional) Pre-fill demo numbers
# -----------------------------
# Leave cells as "" to fill later; or prefill with mock values to visualize layout.
PREFILL_WITH_PLACEHOLDERS = False
if PREFILL_WITH_PLACEHOLDERS:
    for r in df.index:
        for c in df.columns:
            if df.loc[r, c] != NA_STR:
                df.loc[r, c] = "0.00"

# -----------------------------
# 6) Export LaTeX
# -----------------------------
# We’ll emit a nice LaTeX table with booktabs and multi-row header.
# Tip: Wrap in \resizebox{\textwidth}{!}{...} if it’s too wide for two-column format.
CAPTION = (
    "Comparison across models and tasks. "
    "General-domain LVLMs can run on all tasks; specialized models are limited as noted. "
    "Ablations use MedSAM for IoU while preserving presence detection from the base method."
)
LABEL = "tab:main_results"

# Build top header manually (Task spanning)
def latex_multicol_header(columns: pd.MultiIndex) -> str:
    parts = ["\\toprule"]
    # First header row: bold dataset names
    first_row = ["\\multirow{2}{*}{Method}"]
    for task in TASKS:
        n = len(TASKS[task])
        first_row.append(f"\\multicolumn{{{n}}}{{c}}{{\\textbf{{{task}}}}}")
    parts.append(" & ".join(first_row) + " \\\\")
    # Second header row: metrics
    second_row = []
    for task in TASKS:
        second_row += TASKS[task]
    parts.append(" & " + " & ".join(second_row) + " \\\\")
    parts.append("\\midrule")
    return "\n".join(parts)


def dataframe_to_latex_rows(frame: pd.DataFrame) -> str:
    lines = []
    for method, row in frame.iterrows():
        cells = [method]
        for task in TASKS:
            for metric in TASKS[task]:
                cells.append(str(row[(task, metric)]))
        lines.append(" & ".join(cells) + " \\\\")
    return "\n".join(lines)

def full_table_latex(frame: pd.DataFrame) -> str:
    header = latex_multicol_header(frame.columns)
    body = dataframe_to_latex_rows(frame)
    return """
\\begin{table*}[t]
\\centering
\\small
\\setlength{\\tabcolsep}{6pt}
\\begin{tabular}{l""" + "c" * frame.shape[1] + """}
""" + header + """
""" + body + """
\\bottomrule
\\end{tabular}
\\caption{""" + CAPTION + """}
\\label{""" + LABEL + """}
\\end{table*}
""".strip()


latex_code = full_table_latex(df)

# Save to disk
OUT_PATH = "final_main_table.tex"
with open(OUT_PATH, "w", encoding="utf-8") as f:
    f.write(latex_code)

# Also print to console so you can copy-paste
print(latex_code)

# -----------------------------
# 7) Notes for usage
# -----------------------------
# - Fill numeric results directly into `df.loc[method, (task, metric)]`.
# - If you decide T50/SSG-VQA metric names, set T50_METRIC_NAME / SSGVQA_METRIC_NAME above.
# - If some specialized model actually supports IoU via your evaluation pipeline (e.g., by leveraging
#   predicted masks from somewhere), flip the applicability logic in `is_applicable` accordingly.
# - If the table is too wide in a two-column doc, wrap the tabular with:
#     \resizebox{\linewidth}{!}{
#       <tabular...>
#     }
#   or switch to `table*` in a two-column layout, as already used here.



\begin{table*}[t]
\centering
\small
\setlength{\tabcolsep}{6pt}
\begin{tabular}{lcccccccc}
\toprule
\multirow{2}{*}{Method} & \multicolumn{2}{c}{\textbf{CholecOrgans}} & \multicolumn{2}{c}{\textbf{CholecGoNoGo}} & \multicolumn{2}{c}{\textbf{CholecSeg8k}} & \multicolumn{1}{c}{\textbf{CholecT50}} & \multicolumn{1}{c}{\textbf{SSG-VQA}} \\
 & Presence & IoU & Presence & IoU & Presence & IoU & Acc & Acc \\
\midrule
GPT &  &  &  &  &  &  &  &  \\
Gemini &  &  &  &  &  &  &  &  \\
Claude &  &  &  &  &  &  &  &  \\
Llava &  &  &  &  &  &  &  &  \\
Qwen &  &  &  &  &  &  &  &  \\
Pixtral &  &  &  &  &  &  &  &  \\
Llama-Surgery & — & — & — & — & — & — &  & — \\
PeskaVLP &  & — &  & — &  & — &  &  \\
RASO &  & — &  & — &  & — &  &  \\
CholeNet &  & — & — & — & — & — & — & — \\
GoNoGoNet & — & — &  & — & — & — & — & — \\
MedSAM + GPT &  &  &  &  &  &  &  &  \\
MedSAM + Gemini &  &  &  &  &  &  &  &  \\
MedSAM + Claude &  &  &  &  &  &  &  &  \\
MedSAM + Llava &  &  &  &  &  &  &  &  \\
MedSAM + 