
# MedCLIP_KaggleRunner_MedVLMPhase1
This notebook reproduces the workflow we used on Kaggle for the IU Chest X‑ray projection benchmark with **MedCLIP**.

> **Note:** Kaggle restarts on GPU/session change. Re-run from the top (clone + installs).
---


In [None]:
# 🧰 1) Setup: clone repo, set paths, install deps, add repo to path
!git clone https://github.com/dimplek0424/MedVLMPhase1-KagglePublic.git
%cd /kaggle/working/MedVLMPhase1-KagglePublic

# MedCLIP + helpers (pinning versions compatible with Kaggle image)
!pip install --quiet "medclip==0.0.3" "textaugment>=1.3.4" "wget"

import os, sys, glob
os.environ["DATA_DIR"]   = "/kaggle/input/chest-xrays-indiana-university"
os.environ["OUTPUT_DIR"] = "/kaggle/working/outputs"

print("python:", sys.executable)
print("REPO_DIR:", os.getcwd())
print("DATA_DIR:", os.getenv("DATA_DIR"))
print("OUTPUT_DIR:", os.getenv("OUTPUT_DIR"))

# quick peek to confirm config files present
!ls -alh configs


In [None]:

# ✅ 2) Sanity: check critical imports
import torch, torchvision
from PIL import Image as _PIL_Image  # loads Pillow
print("torch:", torch.__version__, "| torchvision:", torchvision.__version__)

import medclip
from transformers import AutoTokenizer, CLIPProcessor
print("✅ medclip import ok")
print("✅ transformers AutoTokenizer ok")
print("✅ transformers CLIPProcessor ok")

!ls -alh configs


In [None]:

# 🚀 3) Run: MedCLIP projection benchmark
import os, sys, subprocess

cfg  = "configs/dataset_iu_v03_full.yaml"
task = "configs/task_projection_v01.yaml"
out  = "results/projection/iu_v03_full_medclip.csv"

os.makedirs("results/projection", exist_ok=True)

cmd = [sys.executable, "scripts/projection_medclip.py",
       "--config", cfg, "--task", task, "--out", out]
print("Running:", " ".join(cmd))
subprocess.check_call(cmd)
print("\n✅ Done. See", out)


In [None]:
# Optional: Quick smoke-check of results
import pandas as pd, os
csv_path = "results/projection/iu_v03_full_medclip.csv"
assert os.path.exists(csv_path), "Output CSV not found."
df = pd.read_csv(csv_path)
print(df.head(3))
print("\nRows:", len(df))

In [None]:
# Optional:Save results to download later
!zip -r /kaggle/working/medclip_results.zip results

#You can then download medclip_results.zip from the right file panel.

In [None]:
# 📊 4) Evaluation: confusion matrix, cosine-similarity, t-SNE fallback

# ---- EVALUATION: MedCLIP ----
!python scripts/evaluate_views.py \
  --csv results/projection/iu_v03_full_medclip.csv \
  --outdir results/eval/medclip \
  --col_image image \
  --col_p1 p_frontal \
  --col_p2 p_lateral \
  --col_pred pred \
  --label1 frontal \
  --label2 lateral

In [None]:
# Compress all evaluation artifacts
!zip -r medclip_eval_results.zip results/eval/medclip

# Kaggle automatically shows a "Download" button in Outputs