In [10]:
import numpy as np
import pandas as pd
import os
import json
import random
from pathlib import Path
from tqdm import tqdm
import shutil

In [11]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle

In [12]:
print("\n📦 Installing Roboflow...")
!pip install -q roboflow


📦 Installing Roboflow...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.8/66.8 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.9/49.9 MB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m81.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m117.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [13]:
from getpass import getpass
api_key = getpass("Paste your Roboflow API key here: ")


Paste your Roboflow API key here: ··········


In [14]:
print("\n📥 Downloading BDD100K from Roboflow...")
print("⚡ This will be in YOLO format - no conversion needed!")


📥 Downloading BDD100K from Roboflow...
⚡ This will be in YOLO format - no conversion needed!


In [15]:
from roboflow import Roboflow

# Initialize Roboflow
rf = Roboflow(api_key=api_key)

In [20]:
try:
    # Access Pedro Azevedo's BDD100K dataset (9900 images, cars + pedestrians)
    project = rf.workspace("pedro-azevedo-3c9ol").project("bdd100k-3zgda")

    print("✓ Found BDD100K dataset!")
    print(f"  Workspace: pedro-azevedo-3c9ol")
    print(f"  Project: bdd100k-3zgda")
    print(f"  Images: ~9,900 (cars + pedestrians)")

    # Get the latest version (v5)
    version = project.version(5)

    # Download in YOLOv9 format
    print("\n📥 Downloading dataset...")
    print("   Format: YOLOv9")
    print("   Location: /content/bdd100k")

    dataset = version.download("yolov9", location="/content/bdd100k")

    print("\n✓ Download complete!")

except Exception as e:
    print(f"\n⚠️  Could not access that specific dataset.")
    print(f"Error: {e}")
    print("\n📝 Alternative: Search for BDD100K on Roboflow Universe")
    print("   Go to: https://universe.roboflow.com/")
    print("   Search: 'BDD100K' or 'Berkeley Deep Drive'")
    print("   Choose a dataset and use its workspace/project names")
    print("\n   Then update the code:")
    print("   project = rf.workspace('WORKSPACE_NAME').project('PROJECT_NAME')")

loading Roboflow workspace...
loading Roboflow project...
✓ Found BDD100K dataset!
  Workspace: pedro-azevedo-3c9ol
  Project: bdd100k-3zgda
  Images: ~9,900 (cars + pedestrians)

📥 Downloading dataset...
   Format: YOLOv9
   Location: /content/bdd100k


Downloading Dataset Version Zip in /content/bdd100k to yolov9:: 100%|██████████| 419570/419570 [00:24<00:00, 17158.55it/s]





Extracting Dataset Version Zip to /content/bdd100k in yolov9:: 100%|██████████| 18570/18570 [00:02<00:00, 7636.13it/s]


✓ Download complete!





In [21]:
if os.path.exists(dataset_path):
    print(f"\n📁 Dataset location: {dataset_path}")

    # Show structure
    for root, dirs, files in os.walk(dataset_path):
        level = root.replace(dataset_path, '').count(os.sep)
        indent = '  ' * level
        print(f'{indent}📁 {os.path.basename(root)}/')
        subindent = '  ' * (level + 1)
        for file in files[:5]:
            print(f'{subindent}📄 {file}')
        if len(files) > 5:
            print(f'{subindent}... and {len(files)-5} more files')
        if level > 2:
            break

    # Find data.yaml
    yaml_files = []
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.endswith('.yaml') or file.endswith('.yml'):
                yaml_files.append(os.path.join(root, file))

    if yaml_files:
        print(f"\n✓ Found config file: {yaml_files[0]}")
        print("\nConfig contents:")
        with open(yaml_files[0], 'r') as f:
            print(f.read())

        yaml_path = yaml_files[0]
    else:
        print("\n⚠️  No YAML config found")
        yaml_path = None

    # Count images
    train_imgs = len([f for f in os.listdir(f"{dataset_path}/train/images") if f.endswith(('.jpg', '.png'))]) if os.path.exists(f"{dataset_path}/train/images") else 0
    val_imgs = len([f for f in os.listdir(f"{dataset_path}/valid/images") if f.endswith(('.jpg', '.png'))]) if os.path.exists(f"{dataset_path}/valid/images") else 0

    print(f"\n📊 Dataset Statistics:")
    print(f"  Training images: {train_imgs}")
    print(f"  Validation images: {val_imgs}")
    print(f"  Total: {train_imgs + val_imgs}")

else:
    print(f"\n⚠️  Dataset not found at {dataset_path}")
    yaml_path = None


📁 Dataset location: /content/bdd100k
📁 bdd100k/
  📄 README.roboflow.txt
  📄 README.dataset.txt
  📄 data.yaml
  📁 valid/
    📁 images/
      📄 c6580b8a-368c5647_jpg.rf.ea08a22b0420d30ca234c6e437fda52c.jpg
      📄 c376fce7-54f5721d_jpg.rf.73dfd9ced4c0219520fe5124116e6c68.jpg
      📄 b5dd8a5f-ef468c83_jpg.rf.bce953e39cfe60551e26f5b96af4ef6e.jpg
      📄 c65628b7-78e929a2_jpg.rf.1d06ca6ff57d096a7ff6c38b2ae3dced.jpg
      📄 c08eacf9-8054501c_jpg.rf.cfed03ba928e50631b651f33e64e8d04.jpg
      ... and 2003 more files
    📁 labels/
      📄 b4301c7b-da5261f0_jpg.rf.20bc5076070624a175393c5d2354813a.txt
      📄 c91d0862-74c58649_jpg.rf.0572ae7df0c16dd5155a815043ab115c.txt
      📄 c01c4d85-a68b0ac8_jpg.rf.1e1a6f232fd461af6ef59f57a0a2d848.txt
      📄 bf3ce441-1f58c6c7_jpg.rf.59cc72c6f3991fe815b57092225882c2.txt
      📄 b83e94b9-fc9ee1c7_jpg.rf.7a3c6096a4ff12e6b4eb1f2ba07084ab.txt
      ... and 2003 more files
  📁 train/
    📁 images/
      📄 b88a00df-e8484d02_jpg.rf.6ab9d8b93302e65a46172baeba478fd0.

In [23]:
print("\n📥 Cloning YOLOv9 repository...")
!git clone https://github.com/WongKinYiu/yolov9.git
%cd yolov9

# Install requirements
print("\n📦 Installing requirements...")
!pip install -q -r requirements.txt

# Download pre-trained weights
print("\n⬇️  Downloading pre-trained weights...")
!wget -q https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-c.pt

if os.path.exists('yolov9-c.pt'):
    print("✓ Pre-trained weights downloaded: yolov9-c.pt")
else:
    print("⚠️  Failed to download weights")


📥 Cloning YOLOv9 repository...
Cloning into 'yolov9'...
remote: Enumerating objects: 781, done.[K
remote: Total 781 (delta 0), reused 0 (delta 0), pack-reused 781 (from 1)[K
Receiving objects: 100% (781/781), 3.27 MiB | 3.56 MiB/s, done.
Resolving deltas: 100% (330/330), done.
/content/yolov9

📦 Installing requirements...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m65.4 MB/s[0m eta [36m0:00:00[0m
[?25h
⬇️  Downloading pre-trained weights...
✓ Pre-trained weights downloaded: yolov9-c.pt


In [24]:
if yaml_path and os.path.exists('yolov9-c.pt'):
    print("\n✅ Everything is ready!")
    print("\n📝 Training command:")
    print(f"python train.py --batch 16 --epochs 100 --img 640 --data {yaml_path} --weights yolov9-c.pt --device 0 --name bdd100k_pedestrian_vehicle")

    print("\n💡 Tips:")
    print("  • Adjust --batch based on GPU memory (16/8/4)")
    print("  • Increase --img to 1280 for better accuracy")
    print("  • Use --epochs 50 for quick testing")
    print("  • Monitor training with TensorBoard")

    print("\n🎯 For distance estimation, we'll add that after training!")

else:
    print("\n⚠️  Setup incomplete. Please check the errors above.")


✅ Everything is ready!

📝 Training command:
python train.py --batch 16 --epochs 100 --img 640 --data /content/bdd100k/data.yaml --weights yolov9-c.pt --device 0 --name bdd100k_pedestrian_vehicle

💡 Tips:
  • Adjust --batch based on GPU memory (16/8/4)
  • Increase --img to 1280 for better accuracy
  • Use --epochs 50 for quick testing
  • Monitor training with TensorBoard

🎯 For distance estimation, we'll add that after training!


In [27]:
%cd /content/yolov9

# Use ultralytics YOLO instead (simpler, more reliable)
!pip install -q ultralytics

from ultralytics import YOLO

# Load pretrained model
model = YOLO('yolov9c.pt')

# Train
results = model.train(
    data='/content/bdd100k/data.yaml',
    epochs=30,
    imgsz=416,
    batch=16,
    device=0,
    project='runs/train',
    name='bdd10k_fast',
    cache=True,
    patience=10
)

/content/yolov9
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m71.4 MB/s[0m eta [36m0:00:00[0m
[?25hCreating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov9c.pt to 'yolov9c.pt': 100% ━━━━━━━━━━━━ 49.4MB 22.3MB/s 2.2s
Ultralytics 8.3.214 🚀 Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/bdd100k/data.yaml, degrees=0