# Zero-Shot Road Hazard Detection - Colab Training

This notebook trains cross-attention layers on BDD100K data for zero-shot object detection.


In [None]:
# Install dependencies
%pip install torch torchvision transformers matplotlib pillow numpy

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Clone repository
!git clone https://github.com/cykurd/open-vocab-detection.git /content/open-vocab-detection
%cd /content/open-vocab-detection


In [None]:
# Find your data directory
import os
from pathlib import Path

def find_10k_clean():
    """Search for 10k_clean directory in common locations"""
    base_paths = [
        '/content/drive/MyDrive',
        '/content/drive/MyDrive/data',
        '/content/drive/MyDrive/bdd100k',
        '/content/drive/MyDrive/BDD100K',
    ]
    
    found_paths = []
    for base in base_paths:
        if os.path.exists(base):
            for root, dirs, files in os.walk(base):
                if '10k_clean' in dirs:
                    path = os.path.join(root, '10k_clean')
                    found_paths.append(path)
    
    return found_paths

print("Searching for 10k_clean directory...")
found = find_10k_clean()

if found:
    print(f"Found {len(found)} potential 10k_clean directories:")
    for i, path in enumerate(found):
        print(f"  {i+1}. {path}")
    
    # Use the first one found
    DATA_DIR = found[0]
    print(f"\nUsing: {DATA_DIR}")
else:
    print("❌ No 10k_clean directory found!")
    print("\nPlease check:")
    print("1. Is your folder named exactly '10k_clean'?")
    print("2. Is it uploaded to Google Drive?")
    print("3. Try uploading it to /content/drive/MyDrive/10k_clean")
    
    # Let user manually set the path
    print("\nIf you know the exact path, run this cell:")
    print("import os")
    print("os.environ['DATA_DIR'] = '/content/drive/MyDrive/your/exact/path/10k_clean'")
    print("Then rerun this cell.")
    raise FileNotFoundError("Could not find 10k_clean directory")

# Set environment variable for shell commands
os.environ['DATA_DIR'] = DATA_DIR

# Verify structure
print(f"\nVerifying structure at: {DATA_DIR}")
print(f"Exists: {os.path.exists(DATA_DIR)}")

if os.path.exists(DATA_DIR):
    print("\nDirectory contents:")
    try:
        for item in os.listdir(DATA_DIR):
            item_path = os.path.join(DATA_DIR, item)
            if os.path.isdir(item_path):
                try:
                    count = len(os.listdir(item_path))
                except Exception:
                    count = 'N/A'
                print(f"  {item}/ ({count} items)")
            else:
                print(f"  {item}")
    except Exception as e:
        print(f"Error listing directory: {e}")

    # Check for expected structure
    images_path = os.path.join(DATA_DIR, 'images')
    labels_path = os.path.join(DATA_DIR, 'labels')
    
    print(f"\nChecking for images directory: {images_path}")
    if os.path.exists(images_path):
        print("✓ Images directory found")
        for split in ['train', 'val', 'test']:
            split_path = os.path.join(images_path, split)
            if os.path.exists(split_path):
                count = len([f for f in os.listdir(split_path) if f.endswith('.jpg')])
                print(f"  {split}: {count} images")
            else:
                print(f"  {split}: NOT FOUND")
    else:
        print("❌ Images directory not found")
    
    print(f"\nChecking for labels directory: {labels_path}")
    if os.path.exists(labels_path):
        print("✓ Labels directory found")
        json_files = [f for f in os.listdir(labels_path) if f.endswith('.json')]
        for f in json_files:
            print(f"  {f}")
    else:
        print("❌ Labels directory not found")
        print("Expected structure:")
        print("  10k_clean/")
        print("    ├── images/")
        print("    │   ├── train/")
        print("    │   ├── val/")
        print("    │   └── test/")
        print("    └── labels/")
        print("        ├── bdd100k_labels_train.json")
        print("        ├── bdd100k_labels_val.json")
        print("        └── bdd100k_labels_test.json")


In [None]:
# If the above didn't find your data, manually set the path here:
# Uncomment and modify the line below with your actual path
# import os
# os.environ['DATA_DIR'] = '/content/drive/MyDrive/your/actual/path/10k_clean'
# print(f"Manually set DATA_DIR to: {os.environ['DATA_DIR']}")

# Or if you need to explore your Drive structure:
# !ls -la /content/drive/MyDrive/
# !find /content/drive/MyDrive -name "*10k*" -type d


In [None]:
# Quick smoke test (2 samples, 2 steps)
!python train_bdd100k.py --data_dir $DATA_DIR --split train --use_10k --max_samples 2 --steps 2 --device cuda


In [None]:
# Full training run
!python train_bdd100k.py --data_dir $DATA_DIR --split train --use_10k --max_samples 1000 --steps 100 --batch_size 1 --lr 1e-5 --device cuda


In [None]:
# Validation run
!python train_bdd100k.py --data_dir $DATA_DIR --split val --use_10k --max_samples 100 --steps 10 --batch_size 1 --lr 1e-5 --device cuda


In [None]:
# Save any generated checkpoints or results to Drive
import shutil
import os

# Create results directory in Drive
results_dir = '/content/drive/MyDrive/training_results'
os.makedirs(results_dir, exist_ok=True)

# Look for any generated files
local_results = ['checkpoints', 'logs', 'results']
for item in local_results:
    if os.path.exists(item):
        dest = os.path.join(results_dir, item)
        if os.path.isdir(item):
            shutil.copytree(item, dest, dirs_exist_ok=True)
        else:
            shutil.copy2(item, dest)
        print(f"Saved {item} to Drive")

print(f"\nResults saved to: {results_dir}")
print("Training completed!")
