# Zero-Shot Road Hazard Detection - Colab Training

This notebook trains cross-attention layers on BDD100K data for zero-shot object detection.


In [None]:
# Install dependencies
%pip install torch torchvision transformers matplotlib pillow numpy

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Clone repository
!git clone https://github.com/cykurd/open-vocab-detection.git /content/open-vocab-detection
%cd /content/open-vocab-detection


In [None]:
# Verify data structure
import os
data_path = '/content/drive/MyDrive/10k_clean'
print(f"Data path: {data_path}")
print(f"Exists: {os.path.exists(data_path)}")

if os.path.exists(data_path):
    print("\nDirectory contents:")
    for item in os.listdir(data_path):
        item_path = os.path.join(data_path, item)
        if os.path.isdir(item_path):
            count = len(os.listdir(item_path))
            print(f"  {item}/ ({count} items)")
        else:
            print(f"  {item}")
    
    # Check for images and labels
    images_path = os.path.join(data_path, 'images')
    labels_path = os.path.join(data_path, 'labels')
    
    if os.path.exists(images_path):
        print(f"\nImages directory found: {images_path}")
        for split in ['train', 'val', 'test']:
            split_path = os.path.join(images_path, split)
            if os.path.exists(split_path):
                count = len([f for f in os.listdir(split_path) if f.endswith('.jpg')])
                print(f"  {split}: {count} images")
    
    if os.path.exists(labels_path):
        print(f"\nLabels directory found: {labels_path}")
        for f in os.listdir(labels_path):
            if f.endswith('.json'):
                print(f"  {f}")
else:
    print("❌ Data directory not found! Please check the path.")


In [None]:
# Quick smoke test (2 samples, 2 steps)
!python train_bdd100k.py --data_dir /content/drive/MyDrive/10k_clean --split train --use_10k --max_samples 2 --steps 2 --device cuda


In [None]:
# Full training run
!python train_bdd100k.py --data_dir /content/drive/MyDrive/10k_clean --split train --use_10k --max_samples 1000 --steps 100 --batch_size 1 --lr 1e-5 --device cuda


In [None]:
# Validation run
!python train_bdd100k.py --data_dir /content/drive/MyDrive/10k_clean --split val --use_10k --max_samples 100 --steps 10 --batch_size 1 --lr 1e-5 --device cuda


In [None]:
# Save any generated checkpoints or results to Drive
import shutil
import os

# Create results directory in Drive
results_dir = '/content/drive/MyDrive/training_results'
os.makedirs(results_dir, exist_ok=True)

# Look for any generated files
local_results = ['checkpoints', 'logs', 'results']
for item in local_results:
    if os.path.exists(item):
        dest = os.path.join(results_dir, item)
        if os.path.isdir(item):
            shutil.copytree(item, dest, dirs_exist_ok=True)
        else:
            shutil.copy2(item, dest)
        print(f"Saved {item} to Drive")

print(f"\nResults saved to: {results_dir}")
print("Training completed!")
