
# MedCLIP_KaggleRunner_MedVLMPhase1
This notebook reproduces the workflow we used on Kaggle for the IU Chest X‚Äëray projection benchmark with **MedCLIP**.

> **Note:** Kaggle restarts on GPU/session change. Re-run from the top (clone + installs).
---


In [None]:
# üß∞ 1) Setup: clone repo, set paths, install deps, add repo to path
!rm -rf /kaggle/working/MedVLMPhase1-KagglePublic
!git clone https://github.com/dimplek0424/MedVLMPhase1-KagglePublic.git
%cd /kaggle/working/MedVLMPhase1-KagglePublic

# MedCLIP + helpers (pinning versions compatible with Kaggle image)
!pip install --quiet "medclip==0.0.3" "textaugment>=1.3.4" "wget"

import os, sys, glob
os.environ["DATA_DIR"]   = "/kaggle/input/chest-xrays-indiana-university"
os.environ["OUTPUT_DIR"] = "/kaggle/working/outputs"

print("python:", sys.executable)
print("REPO_DIR:", os.getcwd())
print("DATA_DIR:", os.getenv("DATA_DIR"))
print("OUTPUT_DIR:", os.getenv("OUTPUT_DIR"))

# quick peek to confirm config files present
!ls -alh configs


In [None]:
# install medclip (from Git) + small deps used by medclip
!pip install --no-input --no-deps "git+https://github.com/RyanWangZf/MedCLIP.git@main" textaugment==1.3.4 wget

In [None]:

# ‚úÖ 2) Sanity: check critical imports
import os, sys, pathlib, yaml, torch

print("python:", sys.executable)
print("cwd:", os.getcwd())
p = pathlib.Path("scripts/projection_medclip.py")
print("proj file exists:", p.exists(), "size:", p.stat().st_size if p.exists() else "NA")

try:
    from medclip import MedCLIPModel
    print("‚úÖ medclip import OK")
except Exception as e:
    print("‚ùå medclip import failed:", repr(e))

cfg_path = pathlib.Path("configs/dataset_iu_v03_full.yaml")
print("cfg exists:", cfg_path.exists())
cfg = yaml.safe_load(cfg_path.read_text())
print("dataset stanza:", cfg.get("dataset", {}))
print("images dir exists:",
      pathlib.Path(cfg["dataset"]["base_dir"], "images/images_normalized").exists())
print("torch:", torch.__version__, "cuda available:", torch.cuda.is_available())


In [None]:
# üöÄ 3) Run: MedCLIP projection benchmark
import os, sys, subprocess

cfg  = "configs/dataset_iu_v03_full.yaml"       
task = "configs/task_projection_v01.yaml"
out  = "results/projection/iu_v03_full_medclip.csv"

# Make sure output parent exists inside Kaggle's writable area
os.makedirs("/kaggle/working/outputs", exist_ok=True)
os.makedirs("results/projection", exist_ok=True)

cmd = [sys.executable, "scripts/projection_medclip.py",
       "--config", cfg, "--task", task, "--out", out]

print("Running:", " ".join(cmd))
subprocess.check_call(cmd)

print("\n‚úÖ Done. See", out)


In [None]:
# Optional: Quick smoke-check of results
import pandas as pd, os
csv_path = "results/projection/iu_v03_full_medclip.csv"
assert os.path.exists(csv_path), "Output CSV not found."
df = pd.read_csv(csv_path)
print(df.head(3))
print("\nRows:", len(df))

In [None]:
# üìä 4) Evaluation: confusion matrix, cosine-similarity, t-SNE fallback

%cd /kaggle/working/MedVLMPhase1-KagglePublic

PRED_CSV = "results/projection/iu_v03_full_medclip.csv"

!python scripts/evaluate_views.py \
  --csv {PRED_CSV} \
  --col_image image \
  --col_p1 p_frontal \
  --col_p2 p_lateral \
  --col_pred pred \
  --outdir results/eval_medclip

# sanity check + zip for download
!ls -lh results/eval_medclip
!zip -r /kaggle/working/eval_medclip_artifacts.zip results/eval_medclip
# Kaggle automatically shows a "Download" button in Outputs