In [1]:
! pip freeze > requirements.txt

In [2]:
import fiftyone as fo
import fiftyone.zoo as foz
import os
import shutil

Some utility functions to sort datatset images

In [3]:
# make directory with 'dir_name'
def make_dir(path, dir_name):
    if not path.endswith("/"):
        path+="/"
    if not (os.path.isdir(path+dir_name)):
        os.makedirs(path+dir_name)

In [4]:
# move all jpg images from 'src' to 'dest'
def move_imgs(src, dest):
    if not src.endswith("/"):
        src+="/"
    if not dest.endswith("/"):
        dest+="/"
    img_files = [x for x in os.listdir(src) if x.endswith(".jpg")]
    for img in img_files:
        shutil.move(src+img, dest+img)

Initializing variables

In [15]:
class_names = ['Airplane', 'Ambulance', 'Bicycle', 'Bus', 'Car', 'Fire hydrant', 'Helicopter', 
               'Motorcycle', 'Parking meter', 'Stop sign', 'Taxi', 'Traffic sign', 'Truck']

# Change below directories to those of yours
train_dir = "C:/Users/Everage/fiftyone/open-images-v6/train/data" # default path for downloaded images
validation_dir = "C:/Users/Everage/fiftyone/open-images-v6/validation/data" 
test_dir = "C:/Users/Everage/fiftyone/open-images-v6/test/data"

Downloading Open Images V6 dataset

In [46]:
dataset = foz.load_zoo_dataset(
    "open-images-v6",
    split="train", # ['train', 'val', 'test']
    label_types=['classifications'],
    classes=class_names[0],
    max_samples=1000,
    dataset_name="Open_images_train"
)
make_dir(train_dir, class_names[0])
move_imgs(train_dir, train_dir+"/"+class_names[0])

Downloading split 'train' to 'C:\Users\Everage\fiftyone\open-images-v6\train' if necessary
Downloading 1000 images
 100% |█████████████████| 1000/1000 [1.1m elapsed, 0s remaining, 8.7 files/s]       
Dataset info written to 'C:\Users\Everage\fiftyone\open-images-v6\info.json'
Loading 'open-images-v6' split 'train'
 100% |███████████████| 1000/1000 [2.7s elapsed, 0s remaining, 493.4 samples/s]      
Dataset 'Open_images_train' created


In [34]:
for name in class_names[1:]:
    make_dir(train_dir, name)
    sub_set = foz.load_zoo_dataset(
        "open-images-v6",
        split="train", # ['train', 'validation', 'test']
        label_types=['classifications'],
        classes=name,
        max_samples=1000,
        dataset_name="Open_images_train_sub"
    )
    move_imgs(train_dir, train_dir+f"/{name}")
    merged = dataset.merge_samples(sub_set)
    sub_set.delete()
    print(f"Downloaded Class: {name}")
    print()

Downloading split 'validation' to 'C:\Users\Everage\fiftyone\open-images-v6\validation' if necessary
Only found 33 (<500) samples matching your requirements
Necessary images already downloaded
Existing download of split 'validation' is sufficient
Loading existing dataset 'Open_images_validation_sub'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use
Downloaded Class: Ambulance

Downloading split 'validation' to 'C:\Users\Everage\fiftyone\open-images-v6\validation' if necessary
Only found 381 (<500) samples matching your requirements
Downloading 381 images
 100% |███████████████████| 381/381 [24.1s elapsed, 0s remaining, 9.9 files/s]       
Dataset info written to 'C:\Users\Everage\fiftyone\open-images-v6\info.json'
Loading 'open-images-v6' split 'validation'
 100% |█████████████████| 381/381 [952.7ms elapsed, 0s remaining, 399.9 samples/s]      
Dataset 'Open_images_validation_sub' created
Downloaded Class: Bicycle

Downloading split 'val

In [36]:
dataset = foz.load_zoo_dataset(
    "open-images-v6",
    split="test", # ['train', 'val', 'test']
    label_types=['classifications'],
    classes=[class_names[0]],
    max_samples=200,
    dataset_name="Open_images_validation"
)
make_dir(test_dir, class_names[0])
move_imgs(test_dir, test_dir+"/"+class_names[0])

Downloading split 'test' to 'C:\Users\Everage\fiftyone\open-images-v6\test' if necessary
Downloading 'https://storage.googleapis.com/openimages/2018_04/test/test-images-with-rotation.csv' to 'C:\Users\Everage\fiftyone\open-images-v6\test\metadata\image_ids.csv'
Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to 'C:\Users\Everage\fiftyone\open-images-v6\test\metadata\classes.csv'
Downloading 'https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json' to 'C:\Users\Everage\AppData\Local\Temp\tmp86_ighuc\metadata\hierarchy.json'
Downloading 'https://storage.googleapis.com/openimages/v5/test-annotations-human-imagelabels-boxable.csv' to 'C:\Users\Everage\fiftyone\open-images-v6\test\labels\classifications.csv'
Downloading 200 images
 100% |███████████████████| 200/200 [12.9s elapsed, 0s remaining, 13.2 files/s]      
Dataset info written to 'C:\Users\Everage\fiftyone\open-images-v6\info.json'
Loading existing dataset 'Open_ima

In [39]:
for name in class_names[1:]:
    make_dir(test_dir, name)
    sub_set = foz.load_zoo_dataset(
        "open-images-v6",
        split="test", # ['train', 'validation', 'test']
        label_types=['classifications'],
        classes=name,
        max_samples=500,
        dataset_name="Open_images_validation_sub"
    )
    move_imgs(test_dir, test_dir+f"/{name}")
    merged = dataset.merge_samples(sub_set)
    sub_set.delete()
    print(f"Downloaded Class: {name}")
    print()

Downloading split 'test' to 'C:\Users\Everage\fiftyone\open-images-v6\test' if necessary
Only found 93 (<500) samples matching your requirements
Downloading 93 images
 100% |█████████████████████| 93/93 [9.0s elapsed, 0s remaining, 6.8 files/s]       
Dataset info written to 'C:\Users\Everage\fiftyone\open-images-v6\info.json'
Loading 'open-images-v6' split 'test'
 100% |███████████████████| 93/93 [199.2ms elapsed, 0s remaining, 467.0 samples/s]     
Dataset 'Open_images_validation_sub' created
Downloaded Class: Ambulance

Downloading split 'test' to 'C:\Users\Everage\fiftyone\open-images-v6\test' if necessary
Downloading 500 images
 100% |███████████████████| 500/500 [35.0s elapsed, 0s remaining, 7.5 files/s]       
Dataset info written to 'C:\Users\Everage\fiftyone\open-images-v6\info.json'
Loading 'open-images-v6' split 'test'
 100% |█████████████████| 500/500 [1.2s elapsed, 0s remaining, 409.8 samples/s]         
Dataset 'Open_images_validation_sub' created
Downloaded Class: Bicycl