# VLM Cross-Modal Deep Hashing — Colab Launcher

Train a cross-modal hashing model (SigLIP2 → 1-bit binary codes) with real-time monitoring dashboard.

In [1]:
# Cell 1: GPU Check + Google Drive Mount
import torch

assert torch.cuda.is_available(), "No GPU detected — switch to a GPU runtime."
gpu_name = torch.cuda.get_device_name(0)
vram = torch.cuda.get_device_properties(0).total_memory / 1024**3
print(f"GPU: {gpu_name} ({vram:.1f} GB)")

from google.colab import drive
drive.mount("/content/drive")

!mkdir -p /content/drive/MyDrive/vlm_quantization/checkpoints

  import pynvml  # type: ignore[import]


GPU: Tesla T4 (14.7 GB)
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Cell 2: Clone Repo + Install Dependencies + Load .env from Google Drive
!git clone https://github.com/hyunlord/vlm_quantization.git /content/vlm_quantization
%cd /content/vlm_quantization
!pip install -q -r requirements.txt
!pip install -q pyngrok

import os

env_path = "/content/drive/MyDrive/vlm_quantization/.env"
if os.path.exists(env_path):
    with open(env_path) as f:
        for line in f:
            line = line.strip()
            if line and not line.startswith("#") and "=" in line:
                key, val = line.split("=", 1)
                os.environ[key.strip()] = val.strip()
    print(f".env loaded from Google Drive")
else:
    print(f"No .env found at {env_path} — create one with NGROK_AUTH_TOKEN if needed")

fatal: destination path '/content/vlm_quantization' already exists and is not an empty directory.
/content/vlm_quantization
.env loaded from Google Drive


In [None]:
%%time
# Cell 3: Load COCO Captions + Karpathy Split (zip cached on Drive, extracted to local SSD)
# Strategy: Drive FUSE can't handle 82K small files reliably.
# → Cache zips on Drive (single large files = safe)
# → Extract to local /content/data/coco each session (fast SSD)
import os, shutil

DRIVE_CACHE = "/content/drive/MyDrive/data/coco_zips"  # zip cache on Drive
LOCAL_COCO  = "/content/data/coco"                      # training reads from here

SOURCES = {
    "train2014": {
        "url": "http://images.cocodataset.org/zips/train2014.zip",
        "zip": "train2014.zip",
        "folder": "train2014",
    },
    "val2014": {
        "url": "http://images.cocodataset.org/zips/val2014.zip",
        "zip": "val2014.zip",
        "folder": "val2014",
    },
    "annotations": {
        "url": "http://images.cocodataset.org/annotations/annotations_trainval2014.zip",
        "zip": "annotations_trainval2014.zip",
        "folder": "annotations",
    },
}

os.makedirs(DRIVE_CACHE, exist_ok=True)
os.makedirs(LOCAL_COCO, exist_ok=True)

for i, (name, src) in enumerate(SOURCES.items(), 1):
    local_dir = f"{LOCAL_COCO}/{src['folder']}"
    drive_zip = f"{DRIVE_CACHE}/{src['zip']}"
    tmp_zip   = f"/tmp/{src['zip']}"

    if os.path.isdir(local_dir):
        print(f"  [{i}/3] {name} — already extracted locally, skipping")
        continue

    # Try Drive cache first, else download from web
    if os.path.isfile(drive_zip):
        print(f"  [{i}/3] {name} — copying cached zip from Drive...")
        shutil.copy2(drive_zip, tmp_zip)
    else:
        print(f"  [{i}/3] {name} — downloading...")
        !wget -q --show-progress {src['url']} -O {tmp_zip}
        # Cache zip to Drive for next session
        print(f"         caching zip to Drive...")
        shutil.copy2(tmp_zip, drive_zip)

    # Extract to local SSD (fast, no FUSE issues)
    print(f"         extracting to local disk...")
    !unzip -q {tmp_zip} -d {LOCAL_COCO}/
    os.remove(tmp_zip)

# Karpathy split JSON (dataset_coco.json)
KARPATHY_JSON = f"{LOCAL_COCO}/dataset_coco.json"
if not os.path.isfile(KARPATHY_JSON):
    KARPATHY_URL = "https://cs.stanford.edu/people/karpathy/deepimagesent/caption_datasets.zip"
    karpathy_zip = "/tmp/caption_datasets.zip"
    drive_karpathy = f"{DRIVE_CACHE}/caption_datasets.zip"

    if os.path.isfile(drive_karpathy):
        print("  [K] Karpathy JSON — copying cached zip from Drive...")
        shutil.copy2(drive_karpathy, karpathy_zip)
    else:
        print("  [K] Karpathy JSON — downloading...")
        !wget -q --show-progress {KARPATHY_URL} -O {karpathy_zip}
        print("         caching zip to Drive...")
        shutil.copy2(karpathy_zip, drive_karpathy)

    print("         extracting dataset_coco.json...")
    !unzip -q -j {karpathy_zip} "dataset_coco.json" -d {LOCAL_COCO}/
    os.remove(karpathy_zip)
else:
    print("  [K] Karpathy JSON — already present, skipping")

# Verify
for name in ("train2014", "val2014", "annotations"):
    assert os.path.isdir(f"{LOCAL_COCO}/{name}"), f"{name} missing!"
assert os.path.isfile(KARPATHY_JSON), "dataset_coco.json missing!"

import json
with open(KARPATHY_JSON) as f:
    kdata = json.load(f)
splits = {}
for img in kdata["images"]:
    s = img["split"]
    splits[s] = splits.get(s, 0) + 1
del kdata

print(f"\nCOCO ready: {LOCAL_COCO}")
print(f"  train2014: {len(os.listdir(f'{LOCAL_COCO}/train2014')):,} images")
print(f"  val2014:   {len(os.listdir(f'{LOCAL_COCO}/val2014')):,} images")
print(f"  Karpathy splits: {splits}")

In [None]:
# Cell 4: Pull Latest Code + Build Frontend (static export)
# git pull first so we build from the latest code (Cell 2 cloned a snapshot)
!cd /content/vlm_quantization && git pull

# Next.js 16 requires Node.js >= 18.18.0; Colab's default is too old
!curl -fsSL https://deb.nodesource.com/setup_20.x | bash - > /dev/null 2>&1
!apt-get -qq install -y nodejs > /dev/null 2>&1
!node --version
!cd /content/vlm_quantization/monitor/frontend && npm install --silent && npm run build
print("Frontend built → monitor/frontend/out/")

In [3]:
# Cell 5: Start Monitoring Server + ngrok Tunnel
import os
import threading
import time

import uvicorn
from pyngrok import ngrok

token = os.environ.get("NGROK_AUTH_TOKEN", "")
if token:
    ngrok.set_auth_token(token)

def run_server():
    uvicorn.run("monitor.server.app:app", host="0.0.0.0", port=8000, log_level="warning")

threading.Thread(target=run_server, daemon=True).start()
time.sleep(3)

tunnel = ngrok.connect(8000)
print(f"\n Dashboard: {tunnel.public_url}\n")


 Dashboard: https://absorbed-efren-rubbly.ngrok-free.dev



In [None]:
# Cell 6: Train (checkpoints → Google Drive)
# git pull already done in Cell 4 before frontend build
!cd /content/vlm_quantization && PYTHONPATH=/content/vlm_quantization python train.py --config configs/colab.yaml

In [None]:
# Cell 7: (Optional) Optuna Hyperparameter Search
# Runs 50 trials with 5 epochs each — takes several hours
# Results DB + best config saved to Google Drive for persistence
OPTUNA_DIR = "/content/drive/MyDrive/vlm_quantization/optuna"
!mkdir -p {OPTUNA_DIR}
!cd /content/vlm_quantization && git pull && PYTHONPATH=/content/vlm_quantization python optuna_search.py \
    --config configs/colab.yaml \
    --n-trials 50 \
    --storage sqlite:///{OPTUNA_DIR}/optuna_results.db \
    --export-config {OPTUNA_DIR}/best_config.yaml

In [None]:
# Cell 8: Retrain with Best Optuna Config
# Uses the best hyperparameters found by Optuna for full training
OPTUNA_DIR = "/content/drive/MyDrive/vlm_quantization/optuna"
BEST_CONFIG = f"{OPTUNA_DIR}/best_config.yaml"

import os
assert os.path.exists(BEST_CONFIG), f"Best config not found: {BEST_CONFIG}\nRun Cell 7 (Optuna search) first."

print("Best config contents:")
!cat {BEST_CONFIG}
print("\n--- Starting full training with best hyperparameters ---\n")
!cd /content/vlm_quantization && PYTHONPATH=/content/vlm_quantization python train.py --config {BEST_CONFIG}