In [4]:
##The code helps you to download images from Open images 

In [8]:
# Complete notebook-ready downloader with:
# - prompts for image classes (comma-separated)
# - caps images per class to MAX_IMAGES (50)
# - downloads into SB_SelfDrivingCar_ND/data (auto-detect project root)
# - if a label isn't found, suggests close matches and lets you pick interactively

import os
import difflib
from pathlib import Path

from openimages.download import download_dataset


MAX_IMAGES = 50
CSV_DIR = "openimages_csv"   # where the OpenImages label CSV will be cached
BOOTSTRAP_CLASS = "Dog"      # used to force-download label CSVs (usually valid)


def find_project_root(start: Path) -> Path:
    """
    Walk upwards looking for a likely project root.
    Heuristics:
      - folder named 'SB_SelfDrivingCar_ND'
      - or contains '.git'
      - or contains common project files
    """
    markers = {".git", "pyproject.toml", "requirements.txt", "README.md", "setup.py"}
    cur = start.resolve()

    for _ in range(10):  # don't walk forever
        if cur.name == "SB_SelfDrivingCar_ND":
            return cur
        if (cur / ".git").exists():
            return cur
        if any((cur / m).exists() for m in markers):
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent

    return start.resolve()


def ensure_csv_available(csv_dir: str = CSV_DIR) -> Path:
    """
    Ensures 'class-descriptions-boxable.csv' exists in csv_dir.
    Uses a small bootstrap download to make the openimages package populate CSVs.
    """
    csv_path = Path(csv_dir) / "class-descriptions-boxable.csv"
    Path(csv_dir).mkdir(parents=True, exist_ok=True)

    if csv_path.exists():
        return csv_path

    # Bootstrap: trigger openimages to fetch the CSVs
    try:
        # Use a temp dest folder; we only care about CSVs landing in csv_dir
        tmp_dest = Path("__oi_bootstrap__")
        tmp_dest.mkdir(exist_ok=True)
        download_dataset(dest_dir=str(tmp_dest), class_labels=[BOOTSTRAP_CLASS], csv_dir=csv_dir, limit=1)
    except Exception:
        # Even if download fails, CSV might still have been fetched
        pass

    if not csv_path.exists():
        raise FileNotFoundError(
            f"Could not find '{csv_path}'. "
            f"Try running one successful download with a known valid class (e.g., Dog) "
            f"and ensure your environment can reach Open Images."
        )

    return csv_path


def load_labels(csv_dir: str = CSV_DIR):
    """
    Loads label list from class-descriptions-boxable.csv.
    File format: <MID>,<Label>
    """
    csv_path = ensure_csv_available(csv_dir)
    labels = []
    with open(csv_path, "r", encoding="utf-8") as f:
        for line in f:
            parts = line.rstrip("\n").split(",", 1)
            if len(parts) == 2:
                labels.append(parts[1])
    return labels


def suggest_labels(user_label: str, labels, n=8):
    """Return close matches for a label (case-insensitive)."""
    # Try exact-ish suggestions first
    matches = difflib.get_close_matches(user_label, labels, n=n, cutoff=0.55)
    if matches:
        return matches

    # Case-insensitive fallback
    lower_map = {l.lower(): l for l in labels}
    matches_lower = difflib.get_close_matches(user_label.lower(), list(lower_map.keys()), n=n, cutoff=0.55)
    return [lower_map[m] for m in matches_lower]


def download_one_label(dest_dir: Path, label: str, limit: int, csv_dir: str = CSV_DIR):
    """
    Download a single label, handling the openimages package's IndexError when label isn't found.
    Returns True if downloaded, False if skipped.
    """
    try:
        download_dataset(dest_dir=str(dest_dir), class_labels=[label], csv_dir=csv_dir, limit=limit)
        print(f"✅ Downloaded: {label}")
        return True
    except IndexError:
        # This is the "label not found in CSV" crash you hit
        return False


def interactive_download(dest_dir: Path, requested_labels, limit: int, csv_dir: str = CSV_DIR):
    labels_db = load_labels(csv_dir=csv_dir)

    for raw_label in requested_labels:
        label = raw_label.strip()
        if not label:
            continue

        print(f"\n--- Processing: '{label}' ---")
        ok = download_one_label(dest_dir, label, limit, csv_dir=csv_dir)
        if ok:
            continue

        print(f"❌ '{label}' not found.")
        suggestions = suggest_labels(label, labels_db, n=8)
        if not suggestions:
            print("No close matches found. Try a simpler term (e.g., Car, Bus, Truck, Person).")
            continue

        print("Did you mean one of these?")
        for i, s in enumerate(suggestions, 1):
            print(f"  {i}) {s}")

        choice = input("Pick a number (or press Enter to skip): ").strip()
        if not choice:
            print("Skipping.")
            continue

        if not choice.isdigit() or not (1 <= int(choice) <= len(suggestions)):
            print("Invalid choice. Skipping.")
            continue

        chosen = suggestions[int(choice) - 1]
        print(f"➡️ Using: {chosen}")
        ok2 = download_one_label(dest_dir, chosen, limit, csv_dir=csv_dir)
        if not ok2:
            print("Still couldn’t download (unexpected). Skipping.")


# -------------------------
# MAIN (run in notebook)
# -------------------------

# 1) Set working directory to project root so data goes where you expect
project_root = find_project_root(Path.cwd())
os.chdir(project_root)
print("Working directory set to:", Path.cwd())

# 2) Ask user inputs
user_input = input("Enter image classes (comma separated, e.g. Car, Dog, Bus): ").strip()
requested = [c.strip() for c in user_input.split(",") if c.strip()]

if not requested:
    raise ValueError("No valid image classes entered.")

limit_input = input(f"How many images per class? (max {MAX_IMAGES}, default {MAX_IMAGES}): ").strip()
if limit_input:
    try:
        limit = int(limit_input)
    except ValueError:
        print("Invalid number. Using default.")
        limit = MAX_IMAGES
else:
    limit = MAX_IMAGES

if limit > MAX_IMAGES:
    print(f"Limit too high. Capping to {MAX_IMAGES}.")
    limit = MAX_IMAGES
if limit < 1:
    print("Limit too low. Using 1.")
    limit = 1

# 3) Set destination under SB_SelfDrivingCar_ND/data
dest_dir = Path.cwd() / "data"
dest_dir.mkdir(parents=True, exist_ok=True)

print(f"\nDownloading up to {limit} images per class into: {dest_dir}\n")

# 4) Download with suggestions on failure
interactive_download(dest_dir=dest_dir, requested_labels=requested, limit=limit, csv_dir=CSV_DIR)

print("\nDone. Check:", dest_dir)


Working directory set to: C:\Users\bsund\Udacity Projects\SB_SelfDrivingCar_ND


Enter image classes (comma separated, e.g. Car, Dog, Bus):  traffic lights
How many images per class? (max 50, default 50):  20



Downloading up to 20 images per class into: C:\Users\bsund\Udacity Projects\SB_SelfDrivingCar_ND\data



2026-02-04  23:00:40 INFO Downloading 1 train images for class 'dog'
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.46s/it]



--- Processing: 'traffic lights' ---
❌ 'traffic lights' not found.
Did you mean one of these?
  1) Traffic light
  2) Traffic sign
  3) Street light
  4) Office supplies


Pick a number (or press Enter to skip):  1


➡️ Using: Traffic light


2026-02-04  23:01:33 INFO Downloading 20 train images for class 'traffic light'
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:07<00:00,  2.51it/s]


✅ Downloaded: Traffic light

Done. Check: C:\Users\bsund\Udacity Projects\SB_SelfDrivingCar_ND\data


In [5]:
import os
from pathlib import Path

print("Current working directory:", os.getcwd())
print("Does ./data exist?", Path("data").exists())
print("Absolute path to ./data:", Path("data").resolve())

Current working directory: C:\Users\bsund\Udacity Projects\SB_SelfDrivingCar_ND\scripts
Does ./data exist? True
Absolute path to ./data: C:\Users\bsund\Udacity Projects\SB_SelfDrivingCar_ND\scripts\data
