
# MedCLIP_KaggleRunner_MedVLMPhase1
This notebook reproduces the workflow we used on Kaggle for the IU Chest Xâ€‘ray projection benchmark with **MedCLIP**.

> **Note:** Kaggle restarts on GPU/session change. Re-run from the top (clone + installs).
---

In [None]:
# ðŸ§° 0) Setup: clone repo + set paths
!rm -rf /kaggle/working/MedVLMBenchPhase1
!git clone https://github.com/dimplek0424/MedVLMBenchPhase1.git
%cd /kaggle/working/MedVLMBenchPhase1

In [None]:
import os
# Attach the dataset "chest-xrays-indiana-university" to the notebook first.
os.environ["DATA_DIR"]   = "/kaggle/input/chest-xrays-indiana-university"
os.environ["OUTPUT_DIR"] = "/kaggle/working/outputs"
print("DATA_DIR =", os.environ["DATA_DIR"])
print("OUTPUT_DIR =", os.environ["OUTPUT_DIR"])

In [None]:
# Quick Sanity Check
!ls -alh configs | sed -n '1,80p'

In [None]:
# Cell 1 â€” install GPU-compatible Torch (P100) + HF stack

#Kaggleâ€™s default Torch is too new for P100 (sm_60). Pin a CUDA 11.8 build that supports it.

# Remove incompatible torch if present
!pip uninstall -y torch torchvision torchaudio >/dev/null

# Install CUDA 11.8 wheels (support sm_60 / P100)
!pip install -q --index-url https://download.pytorch.org/whl/cu118 \
  "torch==2.1.2" "torchvision==0.16.2" "torchaudio==2.1.2"

# HF stack compatible with torch 2.1.x
!pip install -q "transformers==4.41.2" "huggingface-hub==0.25.2"

In [None]:

# Sanity Check
import torch
print("Torch:", torch.__version__, "| CUDA available:", torch.cuda.is_available())

In [None]:
# ðŸš€ 3) Run: MedCLIP projection benchmark
!python scripts/projection_medclip.py \
  --config configs/dataset_iu_v03_full.kaggle.yaml \
  --out    $OUTPUT_DIR/projection/iu_v03_medclip.csv

In [None]:
# Quick Peek
head -n 5 /kaggle/working/outputs/projection/iu_v03_medclip.csv

In [None]:
# ðŸ“Š 4) Evaluation: + save figures

!python scripts/evaluate_views.py \
  --csv      $OUTPUT_DIR/projection/iu_v03_medclip.csv \
  --data_dir $DATA_DIR \
  --outdir   $OUTPUT_DIR/eval_medclip

In [None]:
# List artifacts
ls -lah /kaggle/working/outputs/eval_medclip

In [None]:
# If you created /kaggle/working/disease_prompts.json, pass it. Otherwise, leave it "" to use defaults.
!python scripts/zeroshot_disease_medclip.py \
  --config configs/dataset_iu_v03_full.kaggle.yaml \
  --out    $OUTPUT_DIR/disease/iu_v03_medclip_zeroshot.csv \
  --classes_json ""

In [None]:
# Quick sanity peek (inline)
import pandas as pd, os
csv = os.path.join(os.environ["OUTPUT_DIR"], "disease/iu_v03_medclip_zeroshot.csv")
df = pd.read_csv(csv)
display(df.head(3))
print("rows:", len(df), "| classes:", [c for c in df.columns if c not in ("image","top1","top1_prob")][:8], "â€¦")
print("Top-1 preview:", df["top1"].value_counts().head(5))

In [None]:
#âœ… 3) (Inline Python cell) â†’ create ZIP
import os, zipfile

OUT = os.environ.get("OUTPUT_DIR", "/kaggle/working/outputs")
csv_path = f"{OUT}/disease/iu_v03_medclip_zeroshot.csv"
zip_path = f"{OUT}/zeroshot_results.zip"

with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as z:
    z.write(csv_path, arcname="iu_v03_medclip_zeroshot.csv")

print("âœ… wrote:", zip_path)
