In [1]:
import fiftyone as fo
from fiftyone import ViewField as F
import glob
from datetime import date

### Load dataset

In [2]:
# Load dataset
ds = fo.load_dataset('D2S')
ds

Name:           D2S
Media type:     image
Num samples:    24098
Persistent:     True
Info:           {}
Tags:           ['aug', 'byte', 'test', 'train', 'val', 'val_clutter', 'val_wo_occ', 'water']
Sample fields:
    filepath:     fiftyone.core.fields.StringField
    tags:         fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
    ground_truth: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    prediction:   fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)

### Set categories

In [3]:
CATEGORIES = ['banana_single', 'banana_bundle', 'coca_cola_05', 'orange_single', 'vine_tomatoes']

### Filter dataset

In [4]:
filtered = ds.filter_labels(
    "ground_truth",
    F("label").is_in(CATEGORIES),
    only_matches=True,
).match_tags(['train', 'val', 'test'])
filtered

Dataset:        D2S
Media type:     image
Num samples:    1110
Tags:           ['byte', 'test', 'train', 'val']
Sample fields:
    filepath:     fiftyone.core.fields.StringField
    tags:         fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
    ground_truth: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    prediction:   fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
Pipeline stages:
    1. FilterLabels(field='ground_truth', filter={'$in': ['$$this.label', [...]]}, only_matches=True)
    2. MatchTags(tags=['train', 'val', 'test'])

### Export in JSON 

In [21]:
EXPORT_DIR = r'G:\My Drive\Food_Recognition\Datasets\YOLO_dataset2'
DATASET_TYPE = fo.types.COCODetectionDataset
filtered.export(export_dir=EXPORT_DIR, dataset_type=DATASET_TYPE)

Directory 'G:\My Drive\Food_Recognition\Datasets\YOLO_dataset2' already exists; export will be merged with existing files
   8% |/--------|  5/60 [4.6s elapsed, 50.1s remaining, 1.3 samples/s]   


KeyboardInterrupt: 

### Export in YOLO

In [6]:
EXPORT_DIR = r'G:\My Drive\Food_Recognition\Datasets\YOLO_dataset\common'
DATASET_TYPE = fo.types.YOLODataset
filtered.export(export_dir=EXPORT_DIR, dataset_type=DATASET_TYPE)

Directory 'G:\My Drive\Food_Recognition\Datasets\YOLO_dataset\common' already exists; export will be merged with existing files
 100% |█████| 1110/1110 [1.9m elapsed, 0s remaining, 9.0 samples/s]       


### Export TFRecords

In [None]:
EXPORT_DIR = r'G:\My Drive\Food_Recognition\Datasets & Images\export_dir'
DATASET_TYPE = fo.types.TFObjectDetectionDataset
filtered.export(export_dir=EXPORT_DIR, dataset_type=DATASET_TYPE)

### Check labels and images

In [15]:
IMG_PATH = r'G:\My Drive\Food_Recognition\Datasets\YOLO_dataset\images\train'
LAB_PATH = r'G:\My Drive\Food_Recognition\Datasets\YOLO_dataset\labels\train'
images = []
labels = []

In [23]:
for filepath in glob.glob(IMG_PATH + '\\*'):
    images.append(filepath[filepath.rfind('\\') + 1:-4])

In [24]:
for filepath in glob.glob(LAB_PATH + '\\*'):
    labels.append(filepath[filepath.rfind('\\') + 1:-4])

In [30]:
import os
for i in images:
    if i not in labels:
        os.remove(IMG_PATH + '\\' + i + '.jpg')