### Load Data

In [6]:
import fiftyone as fo
from fiftyone import ViewField as F

my_classes = ["Burrito", "Hot dog", "Muffin"]
export_dir = "data"

In [7]:
dataset = fo.zoo.load_zoo_dataset(
    "open-images-v7",
    split="train",
    label_types=["detections"],
    classes=my_classes,
    max_samples=1500,
    shuffle=True
)

Downloading split 'train' to '/Users/constantin/fiftyone/open-images-v7/train' if necessary
Found 100 images, downloading the remaining 1400
 100% |█████████████████| 1400/1400 [6.0m elapsed, 0s remaining, 5.3 files/s]       
Dataset info written to '/Users/constantin/fiftyone/open-images-v7/info.json'
Loading 'open-images-v7' split 'train'
 100% |███████████████| 1500/1500 [4.2s elapsed, 0s remaining, 368.5 samples/s]      
Dataset 'open-images-v7-train-1500' created


In [8]:
dataset = dataset.filter_labels("ground_truth", F("label").is_in(my_classes))
dataset = dataset.match(F("ground_truth.detections").length() > 0)

In [9]:
patches = dataset.to_patches("ground_truth")
patches.export(
    export_dir=export_dir,
    dataset_type=fo.types.ImageClassificationDirectoryTree,
    label_field="ground_truth",
)

Detected an image classification exporter and a label field 'ground_truth' of type <class 'fiftyone.core.labels.Detection'>. Exporting image patches...
 100% |███████████████| 4294/4294 [29.0s elapsed, 0s remaining, 149.0 samples/s]      


### Data Exploration

In [2]:
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

data_dir = "data"

In [3]:
# Klassen & Anzahl Bilder pro Klasse
class_counts = {class_name: len(os.listdir(os.path.join(data_dir, class_name)))
                for class_name in os.listdir(data_dir)}

for cls, count in class_counts.items():
    print(f"{cls}: {count} Bilder")

Burrito: 204 Bilder
Muffin: 3705 Bilder
Hot dog: 388 Bilder


In [None]:
import random
from PIL import Image

def show_random_images(data_dir, classes, n=3):
    plt.figure(figsize=(n * 3, len(classes) * 3))
    for i, class_name in enumerate(classes):
        class_path = os.path.join(data_dir, class_name)
        images = os.listdir(class_path)
        samples = random.sample(images, min(n, len(images)))
        for j, img_file in enumerate(samples):
            img_path = os.path.join(class_path, img_file)
            img = Image.open(img_path)
            plt.subplot(len(classes), n, i * n + j + 1)
            plt.imshow(img)
            plt.axis("off")
            plt.title(class_name)
    plt.tight_layout()
    plt.show()

show_random_images(data_dir, list(class_counts.keys()), n=3)