# Init

In [1]:
import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F
import fiftyone.brain as fob

In [7]:
# The directory to which to write the exported dataset
EXPORT_DIR = "test_export2"

# dataset_type = fo.types.COCODetectionDataset  # COCO
EXPORT_DATASET_TYPE = fo.types.dataset_types.YOLOv5Dataset # YOLOv5

# The name of the sample field containing the label that you wish to export
# Used when exporting labeled datasets (e.g., classification or detection)
LABEL_FIELD = "ground_truth"  # for example

CLASSES_OF_INTEREST = ["Motorcycle", "Car", "Truck"]

In [2]:
# List available zoo datasets
print('######### Available zoo dataset #########')
print(foz.list_zoo_datasets())
print()

print('######### Donwloaded zoo dataset #########')
fo.pprint(foz.list_downloaded_zoo_datasets())
print()

print('######### Dataset list #########')
print(fo.list_datasets())

foz.list_downloaded_zoo_datasets()

######### Available zoo dataset #########
['bdd100k', 'caltech101', 'caltech256', 'cifar10', 'cifar100', 'cityscapes', 'coco-2014', 'coco-2017', 'fashion-mnist', 'hmdb51', 'imagenet-2012', 'imagenet-sample', 'kitti', 'lfw', 'mnist', 'open-images-v6', 'quickstart', 'quickstart-geo', 'quickstart-video', 'ucf101', 'voc-2007', 'voc-2012']

######### Donwloaded zoo dataset #########
{
    'open-images-v6': (
        '/home/dhp/fiftyone/open-images-v6',
        <fiftyone.zoo.datasets.ZooDatasetInfo object at 0x7f88601cd1d0>,
    ),
}

######### Dataset list #########
['open-image-vehicle_aug', 'open-images-v6', 'open-images-v6-train-test-validation', 'open-images-v6_mod']


{'open-images-v6': ('/home/dhp/fiftyone/open-images-v6',
  <fiftyone.zoo.datasets.ZooDatasetInfo at 0x7f88601d2490>)}

In [4]:
from IPython.display import clear_output
session = fo.launch_app()
# session.open_tab()
clear_output()

# Delete dataset

In [4]:
# # delete a dataset
fo.delete_dataset(name='open-image-vehicle_aug')

# Load dataset

In [None]:
# # # load a dataset from the zoo and save to database

# dataset = foz.load_zoo_dataset(
#     "open-images-v6",
#     splits=["test", 'validation'],
#     dataset_name="open-images-v6",
#     download_if_necessary=False,   
# )
# dataset_or_view = dataset
# dataset_or_view

In [5]:
# # Load existing dataset from the database

dataset = fo.load_dataset(name="open-image-vehicle_aug")
dataset_or_view = dataset
dataset_or_view

Name:        open-image-vehicle_aug
Media type:  image
Num samples: 58653
Persistent:  True
Tags:        ['train', 'train_aug', 'validation', 'validation_aug']
Sample fields:
    id:                   fiftyone.core.fields.ObjectIdField
    filepath:             fiftyone.core.fields.StringField
    tags:                 fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:             fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
    ground_truth:         fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    uniqueness:           fiftyone.core.fields.FloatField
    lightness:            fiftyone.core.fields.FloatField
    mistakenness_eval_tp: fiftyone.core.fields.IntField
    mistakenness_eval_fp: fiftyone.core.fields.IntField
    mistakenness_eval_fn: fiftyone.core.fields.IntField

In [None]:
# # # Load dataset from the directories

# name = "open-image-vehicle_aug"
# dataset_dir = "test_export"

# # The splits to load
# splits = ["train", "validation","train_aug", "validation_aug"]

# # Load the dataset, using tags to mark the samples in each split
# dataset = fo.Dataset(name)
# for split in splits:
#     dataset.add_dir(
#         dataset_dir=dataset_dir,
#         dataset_type=fo.types.YOLOv5Dataset,
#         split=split,
#         tags=split,
# )

# # View summary info about the dataset
# dataset_or_view = dataset
# dataset

In [None]:
# dataset.default_classes

In [None]:
# dataset.save()

In [None]:
# make dataset persistent
# dataset.name = 'open-images-v6'
dataset.persistent = True

In [None]:
view = dataset.filter_labels(LABEL_FIELD, F("label").is_in(CLASSES_OF_INTEREST))
# view = view.match_tags('validation')
dataset_or_view = view

In [7]:
session.view = dataset_or_view.view()
clear_output()

# Modify dataset

In [None]:
# rename datsset name
dataset.name = 'open-images-v6'

# Export data and labels

In [None]:
dataset_or_view.export(
    export_dir=EXPORT_DIR,
    dataset_type=EXPORT_DATASET_TYPE,
    label_field=LABEL_FIELD,
    classes=CLASSES_OF_INTEREST,
)

In [8]:
dataset.count_sample_tags()

{'train_aug': 28092,
 'validation_aug': 9380,
 'train': 15882,
 'validation': 5299}

In [8]:
dataset_or_view = dataset

for split in [
        'train', 
        # 'test',
        'validation',
        'train_aug',
        'validation_aug',
        ]:
    split_view = dataset_or_view.match_tags(split)

    # Export the dataset
    split_view.export(
        export_dir=EXPORT_DIR,
        dataset_type=EXPORT_DATASET_TYPE,
        label_field=LABEL_FIELD,
        classes=CLASSES_OF_INTEREST,
        split=split,
    )



 100% |█████████████| 15882/15882 [2.0m elapsed, 0s remaining, 249.3 samples/s]      
Directory 'test_export2' already exists; export will be merged with existing files
 100% |███████████████| 5299/5299 [40.5s elapsed, 0s remaining, 212.2 samples/s]      
Directory 'test_export2' already exists; export will be merged with existing files
 100% |█████████████| 28092/28092 [3.3m elapsed, 0s remaining, 225.3 samples/s]      
Directory 'test_export2' already exists; export will be merged with existing files
 100% |███████████████| 9380/9380 [1.3m elapsed, 0s remaining, 223.9 samples/s]       


# Modify dataset and save to database

## Modify sample tags
See more: https://voxel51.com/docs/fiftyone/user_guide/using_views.html#tagging-contents

In [None]:
# remove pre-existing tags
dataset.untag_samples("test") 

In [None]:
# replace tag
dataset.match_tags("test").tag_samples("train")
dataset.match_tags("test").untag_samples("test")

In [None]:
dataset

In [None]:
# Perform a random 90-10 test-train split
dataset.take(0.1 * len(dataset)).tag_samples("test")
dataset.match_tags("test", bool=False).tag_samples("train")

print(dataset.count_sample_tags())
# {'train': 180, 'test': 20}

## Modify classes

In [None]:
# lower all classes
# dataset.default_classes = [i.lower() for i in dataset.default_classes]
# dataset.default_classes[:3]


# TODO: add modify labels in the samples.

In [None]:
dataset

In [None]:
# save to database
dataset.save()

In [None]:
session = fo.launch_app(dataset)
clear_output()

# Delete by

In [None]:
## ONLY RUN THIS RIGHT AFTER EXPORT

In [None]:
! pip install tqdm

In [None]:
from tqdm import tqdm

for sample in tqdm(dataset.iter_samples()):
    sample.tags==['delete']
# print(cnt)

## Sample Tag

In [None]:
v1 = dataset.match_tags("delete")

In [None]:
dataset.delete_samples(v1)

In [None]:
dataset.save()
dataset

## Label Tag

In [None]:
v1 = dataset.filter_labels('ground_truth',F('tags')==['tag_delete'])

In [None]:
dataset.delete_samples(v1)

In [None]:
dataset.save()
dataset