# VLM Cross-Modal Deep Hashing — Colab Launcher

Train a cross-modal hashing model (SigLIP2 → 1-bit binary codes) with real-time monitoring dashboard.

In [1]:
# Cell 1: GPU Check + Google Drive Mount
import torch

assert torch.cuda.is_available(), "No GPU detected — switch to a GPU runtime."
gpu_name = torch.cuda.get_device_name(0)
vram = torch.cuda.get_device_properties(0).total_memory / 1024**3
print(f"GPU: {gpu_name} ({vram:.1f} GB)")

from google.colab import drive
drive.mount("/content/drive")

!mkdir -p /content/drive/MyDrive/vlm_quantization/checkpoints

  import pynvml  # type: ignore[import]


GPU: Tesla T4 (14.7 GB)
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Cell 2: Clone Repo + Install Dependencies + Load .env from Google Drive
!git clone https://github.com/hyunlord/vlm_quantization.git /content/vlm_quantization
%cd /content/vlm_quantization
!pip install -q -r requirements.txt
!pip install -q pyngrok

import os

env_path = "/content/drive/MyDrive/vlm_quantization/.env"
if os.path.exists(env_path):
    with open(env_path) as f:
        for line in f:
            line = line.strip()
            if line and not line.startswith("#") and "=" in line:
                key, val = line.split("=", 1)
                os.environ[key.strip()] = val.strip()
    print(f".env loaded from Google Drive")
else:
    print(f"No .env found at {env_path} — create one with NGROK_AUTH_TOKEN if needed")

fatal: destination path '/content/vlm_quantization' already exists and is not an empty directory.
/content/vlm_quantization
.env loaded from Google Drive


In [None]:
# Cell 3: Load COCO Captions (cached on Google Drive)
import os

DRIVE_COCO = "/content/drive/MyDrive/data/coco"

if os.path.isdir(f"{DRIVE_COCO}/train2014") and os.path.isdir(f"{DRIVE_COCO}/annotations"):
    print(f"COCO ready: {DRIVE_COCO}")
else:
    print("First run: downloading COCO to Google Drive (one-time)...")
    !mkdir -p {DRIVE_COCO}
    !wget -q --show-progress http://images.cocodataset.org/zips/train2014.zip -O /tmp/train2014.zip
    !wget -q --show-progress http://images.cocodataset.org/zips/val2014.zip -O /tmp/val2014.zip
    !wget -q --show-progress http://images.cocodataset.org/annotations/annotations_trainval2014.zip -O /tmp/ann.zip
    !unzip -q /tmp/train2014.zip -d {DRIVE_COCO}/
    !unzip -q /tmp/val2014.zip -d {DRIVE_COCO}/
    !unzip -q /tmp/ann.zip -d {DRIVE_COCO}/
    !rm /tmp/train2014.zip /tmp/val2014.zip /tmp/ann.zip
    print("COCO saved to Google Drive")

In [7]:
# Cell 4: Build Frontend (static export)
# Next.js 16 requires Node.js >= 18.18.0; Colab's default is too old
!curl -fsSL https://deb.nodesource.com/setup_20.x | bash - > /dev/null 2>&1
!apt-get -qq install -y nodejs > /dev/null 2>&1
!node --version
!cd /content/vlm_quantization/monitor/frontend && npm install --silent && npm run build
print("Frontend built → monitor/frontend/out/")

v20.20.0

> frontend@0.1.0 build
> next build

[1G[0K[35m[1mAttention[22m[39m: Next.js now collects completely anonymous telemetry regarding usage.
This information is used to shape Next.js' roadmap and prioritize features.
You can learn more, including how to opt-out if you'd not like to participate in this anonymous program, by visiting the following URL:
[36mhttps://nextjs.org/telemetry[39m

[1m[38;2;173;127;168m▲ Next.js 16.1.6[39m[22m (Turbopack)

[37m[1m [22m[39m Creating an optimized production build ...
[32m[1m✓[22m[39m Compiled successfully in 12.5s
[32m[1m✓[22m[39m Finished TypeScript in 6.5s 36m.[39m[2K[1G[37m[1m [22m[39m Running TypeScript  [36m..[39m[2K[1G[37m[1m [22m[39m Running TypeScript  [36m...[39m[2K[1G[37m[1m [22m[39m Running TypeScript  [36m.[39m[2K[1G[37m[1m [22m[39m Running TypeScript  [36m..[39m[2K[1G[37m[1m [22m[39m Running TypeScript  [36m...[39m[2K[1G[37m[1m [22m[39m Running TypeScript  [

In [3]:
# Cell 5: Start Monitoring Server + ngrok Tunnel
import os
import threading
import time

import uvicorn
from pyngrok import ngrok

token = os.environ.get("NGROK_AUTH_TOKEN", "")
if token:
    ngrok.set_auth_token(token)

def run_server():
    uvicorn.run("monitor.server.app:app", host="0.0.0.0", port=8000, log_level="warning")

threading.Thread(target=run_server, daemon=True).start()
time.sleep(3)

tunnel = ngrok.connect(8000)
print(f"\n Dashboard: {tunnel.public_url}\n")


 Dashboard: https://absorbed-efren-rubbly.ngrok-free.dev



In [None]:
# Cell 6: Train (checkpoints → Google Drive)
!cd /content/vlm_quantization && git pull && PYTHONPATH=/content/vlm_quantization python train.py --config configs/colab.yaml

In [None]:
# Cell 7: (Optional) Manual Checkpoint Backup
!cp -r /content/vlm_quantization/checkpoints/* /content/drive/MyDrive/vlm_quantization/checkpoints/ 2>/dev/null || echo "No local checkpoints to back up (already saving to Drive)."

In [None]:
# Cell 8: (Optional) Optuna Hyperparameter Search
# Runs 50 trials with 5 epochs each — takes several hours
# Results stored in optuna_results.db (SQLite)
!cd /content/vlm_quantization && PYTHONPATH=. python optuna_search.py --config configs/colab.yaml --n-trials 50