In [None]:
import os
import requests
from ddgs import DDGS
from time import sleep
from tqdm import tqdm
import random
import kagglehub
import shutil
dataset_folder = "ddg_custom_bmw_dataset" #output downloaded imgs

def add_non_bmw():
    #first using stanford cars dataset to add non bmw cars
    print("grabbing stanford cars dataset by jkrause / jutrera")
    path = kagglehub.dataset_download("jutrera/stanford-car-dataset-by-classes-folder")
    stanford_src = os.path.join(path, 'car_data', 'car_data', 'train')

    destination_directory = os.path.join(dataset_folder, 'non_bmw_cars')
    if not os.path.exists(destination_directory):
        os.makedirs(destination_directory)

    non_bmw_folders = [f for f in os.listdir(stanford_src) if "BMW" not in f and "bmw" not in f]
    count = 0
    while count <500: 
        folder = random.choice(non_bmw_folders)
        files = os.listdir(os.path.join(stanford_src, folder))
        if not files: continue
        image_name = random.choice(files)

        src = os.path.join(stanford_src, folder, image_name)
        dst = os.path.join(destination_directory, f"other_car_{count}.jpg")
        if not os.path.exists(dst):
            shutil.copyfile(src, dst)
            count+=1
    print("500 non bmw cars added to non_bmw_cars folder")
    

    #next using ddgs to add non cars
    non_car_queries = ["forest landscape", 
                       "city street people", 
                       "plate of food", 
                       "living room interior", 
                       "cat and dog", 
                       "mountain scenery", 
                       "beach sunset", 
                       "computer on desk", 
                       "bird in tree", 
                       "ocean waves", 
                       "snowy mountain", 
                       "desert landscape", 
                       "rainy city street", 
                       "tropical island", 
                       "night sky stars", 
                       "busy marketplace", 
                       "ancient ruins", 
                       "colorful hot air balloons", 
                       "wild animals safari",
                       "house exterior",
                       "kitchen appliances",
                       "musical instruments",
                       "sports equipment",
                       "children playing",
                       "office workspace",
                       "gardening tools",
                       "underwater coral reef",
                       "festival celebration",
                       "historical monument",
                       "fashion accessories"]

    # clean folder name
    destination_directory = os.path.join(dataset_folder, 'non_cars')
    if not os.path.exists(destination_directory):
        os.makedirs(destination_directory)
    limit_per_query = 35
    
    for query in tqdm(non_car_queries, desc="Non-Car Queries"):
        prefix = query.replace(" ", "_")
        existing_images = len([f for f in os.listdir(destination_directory) if f.startswith(prefix)])
        if existing_images >= limit_per_query:
            continue
        needed = limit_per_query - existing_images
    
    # try 3 times before giving up on a car
        for attempt in range(3):
            try:
                with DDGS() as ddgs:
                    results = ddgs.images(
                        query, 
                        region="wt-wt", 
                        safesearch="off", 
                        max_results=100 #a lot of the links must not have valid imgs because its not adding 500 imgs
                        #so looking for tons of links to solve this
                    )
                    # progress bar for individual car model
                    with tqdm(total=needed, desc=f"--> {query}", unit="img", leave=False) as pbar:
                        count = 0
                        for r in results:
                            if count >= needed:
                                break
                            
                            url = r['image']
                            try:
                                response = requests.get(url, timeout=3)
                                if response.status_code == 200:
                                    # save with unique name to prevent overwrites
                                    # existing count + new count
                                    filename = f"{prefix}_{existing_images + count:03d}.jpg"
                                    with open(os.path.join(destination_directory, filename), 'wb') as f:
                                        f.write(response.content)
                                    
                                    count += 1
                                    pbar.update(1)
                            except:
                                pass
                
                # if finish loop without crashing, stop retrying
                tqdm.write(f" Finished!! {query} (+{count} new images)")
                break 

            except Exception as e:
                tqdm.write(f" error on {query}: {e}")
                tqdm.write("   waiting 10 seconds before retrying...")
                sleep(10) # retry

if __name__ == "__main__":
    print(f" starting/resuming download")
    add_non_bmw()
    

  from .autonotebook import tqdm as notebook_tqdm


 starting/resuming download
grabbing stanford cars dataset by jkrause / jutrera
500 non bmw cars added to non_bmw_cars folder


Non-Car Queries:   3%|▎         | 1/30 [00:42<20:45, 42.95s/it]

 Finished!! forest landscape (+35 new images)


Non-Car Queries:   7%|▋         | 2/30 [00:56<12:05, 25.91s/it]

 Finished!! city street people (+35 new images)


Non-Car Queries:  10%|█         | 3/30 [01:21<11:26, 25.43s/it]

 Finished!! plate of food (+35 new images)


Non-Car Queries:  13%|█▎        | 4/30 [01:44<10:31, 24.28s/it]

 Finished!! living room interior (+35 new images)


Non-Car Queries:  17%|█▋        | 5/30 [02:03<09:20, 22.42s/it]

 Finished!! cat and dog (+35 new images)


Non-Car Queries:  20%|██        | 6/30 [02:22<08:33, 21.40s/it]

 Finished!! mountain scenery (+35 new images)


Non-Car Queries:  23%|██▎       | 7/30 [02:42<07:57, 20.76s/it]

 Finished!! beach sunset (+35 new images)


Non-Car Queries:  27%|██▋       | 8/30 [02:56<06:51, 18.70s/it]

 Finished!! computer on desk (+35 new images)


Non-Car Queries:  30%|███       | 9/30 [03:21<07:11, 20.56s/it]

 Finished!! bird in tree (+35 new images)


Non-Car Queries:  33%|███▎      | 10/30 [03:38<06:31, 19.55s/it]

 Finished!! ocean waves (+35 new images)


Non-Car Queries:  37%|███▋      | 11/30 [03:55<05:57, 18.80s/it]

 Finished!! snowy mountain (+35 new images)


Non-Car Queries:  40%|████      | 12/30 [04:20<06:11, 20.66s/it]

 Finished!! desert landscape (+35 new images)


Non-Car Queries:  43%|████▎     | 13/30 [04:35<05:20, 18.86s/it]

 Finished!! rainy city street (+35 new images)


Non-Car Queries:  47%|████▋     | 14/30 [05:01<05:35, 20.94s/it]

 Finished!! tropical island (+35 new images)


Non-Car Queries:  50%|█████     | 15/30 [05:24<05:27, 21.84s/it]

 Finished!! night sky stars (+35 new images)


Non-Car Queries:  53%|█████▎    | 16/30 [05:47<05:08, 22.02s/it]

 Finished!! busy marketplace (+35 new images)


Non-Car Queries:  57%|█████▋    | 17/30 [06:05<04:32, 20.98s/it]

 Finished!! ancient ruins (+35 new images)


Non-Car Queries:  60%|██████    | 18/30 [06:23<04:00, 20.00s/it]

 Finished!! colorful hot air balloons (+35 new images)


Non-Car Queries:  63%|██████▎   | 19/30 [06:48<03:55, 21.39s/it]

 Finished!! wild animals safari (+35 new images)


Non-Car Queries:  67%|██████▋   | 20/30 [07:09<03:33, 21.37s/it]

 Finished!! house exterior (+35 new images)


Non-Car Queries:  70%|███████   | 21/30 [07:35<03:25, 22.87s/it]

 Finished!! kitchen appliances (+35 new images)


Non-Car Queries:  73%|███████▎  | 22/30 [08:04<03:17, 24.67s/it]

 Finished!! musical instruments (+35 new images)


Non-Car Queries:  77%|███████▋  | 23/30 [08:22<02:37, 22.56s/it]

 Finished!! sports equipment (+35 new images)


Non-Car Queries:  80%|████████  | 24/30 [08:47<02:19, 23.29s/it]

 Finished!! children playing (+35 new images)


Non-Car Queries:  83%|████████▎ | 25/30 [09:26<02:19, 27.91s/it]

 Finished!! office workspace (+35 new images)


Non-Car Queries:  87%|████████▋ | 26/30 [09:46<01:43, 25.77s/it]

 Finished!! gardening tools (+35 new images)


Non-Car Queries:  90%|█████████ | 27/30 [10:01<01:07, 22.37s/it]

 Finished!! underwater coral reef (+35 new images)


Non-Car Queries:  93%|█████████▎| 28/30 [10:35<00:51, 25.99s/it]

 Finished!! festival celebration (+35 new images)


Non-Car Queries:  97%|█████████▋| 29/30 [11:19<00:31, 31.38s/it]

 Finished!! historical monument (+35 new images)


Non-Car Queries: 100%|██████████| 30/30 [11:43<00:00, 23.43s/it]

 Finished!! fashion accessories (+35 new images)



