# Dataset

In [2]:
import fiftyone as fo

### Create the dataset
https://voxel51.com/docs/fiftyone/user_guide/dataset_creation/datasets.html

In [3]:
name = "traffic_light"

In [6]:
dataset_dir = "dataset/"
splits = ["train", "val", 'test']

dataset = fo.Dataset(name)

for split in splits:
    dataset.add_dir(
        dataset_dir=dataset_dir,
        dataset_type=fo.types.YOLOv5Dataset,
        split=split,
        tags=split,
)



 100% |█████████████████| 981/981 [1.1s elapsed, 0s remaining, 863.4 samples/s]         
 100% |█████████████████| 200/200 [306.1ms elapsed, 0s remaining, 655.8 samples/s]      
 100% |█████████████████| 100/100 [151.9ms elapsed, 0s remaining, 663.8 samples/s]    


### Load the dataset

In [29]:
dataset = fo.load_dataset(name)

### Make the daatset persistent
https://voxel51.com/docs/fiftyone/user_guide/using_datasets.html#dataset-persistence

In [27]:
dataset.persistent = True

### Summary of the dataset

In [None]:
print(dataset)

In [None]:
len(dataset)

### Launch the dataset

In [7]:
session = fo.launch_app(dataset)

### List the datasets

In [None]:
fo.list_datasets()

### Delete the dataset

In [4]:
fo.delete_dataset(name)

### Clear Session

In [5]:
session.clear_dataset()
session.close()

NameError: name 'session' is not defined

# Make changes to the dataset

### Dataset view

In [None]:
view = dataset.view()

In [None]:
session = fo.launch_app(view)

### Selected Images IDs

In [None]:
selected = session.selected

### Iterate over samples

In [31]:
delete_files = []
edit_files = []

In [32]:
# sample tags
for sample in dataset:
    if 'delete' in sample['tags']:
        delete_files.append(sample['filepath'])
        
# sample tags
for sample in dataset:
    if 'edit' in sample['tags']:
        edit_files.append(sample['filepath'])
        

In [None]:
# label tags
for sample in dataset:
    new_sample = sample['ground_truth']['detections']
    for spl in new_sample:
        if spl['tags'] == ['add']:
            add_files.append(sample['filepath'])

In [None]:
len(delete_files)

In [33]:
import numpy as np
np.savetxt('delete.txt', delete_files, fmt='%s')
np.savetxt('edit.txt', edit_files, fmt='%s')

### Delete them from dataset by ID

In [None]:
for item in delete_ids:
    del dataset[item]

# Brain

In [None]:
import fiftyone.brain as fob

### Find unique samples

In [None]:
results = fob.compute_similarity(dataset, brain_key="img_sim")
results.find_unique(100)

In [None]:
vis_results = fob.compute_visualization(dataset, brain_key="img_vis")

In [None]:
plot = results.visualize_unique(visualization=vis_results)
plot.show()

In [None]:
session.plots.attach(plot, name="unique")
session.show()

### Visual similarity

In [None]:
# index images by similarity
fob.compute_similarity(dataset, brain_key="image_sim")

# launch the app and then select an image and then apply image_sim with sort by similarity button
session = fo.launch_app(dataset)

In [None]:
# Choose a random image from the dataset
query_id = dataset.take(1).first().id

# Programmatically construct a view containing the 15 most similar images
view = dataset.sort_by_similarity(query_id, k=15, brain_key="image_sim")

# View results in App
session.view = view

# Export the new dataset

In [None]:
import fiftyone as fo

export_dir = "yolov5-dataset/"
label_field = "ground_truth"  # for example

# The splits to export
splits = ["train", "val", 'test']

# All splits must use the same classes list
classes = ['traffic_sign']

# The dataset or view to export
# We assume the dataset uses sample tags to encode the splits to export
dataset_or_view = dataset

# Export the splits
for split in splits:
    split_view = dataset_or_view.match_tags(split)
    split_view.export(
        export_dir=export_dir,
        dataset_type=fo.types.YOLOv5Dataset,
        label_field=label_field,
        split=split,
        classes=classes,
    )