In [1]:
"""
This file downlaodes the train, test and validation datasets used for training the CNN.
We have used fiftyone's zoo library to downlaod from the openImages v7.

The entire Open Images V7  database size is 561 GB, It contins ~9 million images that 
around 2 million of them are annotated (Classification, Detection, Segmentation, etc.).
The zoo library supports partial download of the entire dataset which helps alot with 
our purpose.

* All images have been rescaled so that their largest dimension is at most 1024 pixels.
"""
import fiftyone as fo
import fiftyone.zoo as foz
from pprint import pprint
import matplotlib.pyplot as plt
import matplotlib.image as mpimg # Necessary for readig an image
import matplotlib.patches as patches # Necessary for drawing bounding boxes

In [52]:
def dispBBox(picDir, picName, labelDetail, labelsNames):
    """
    Displays the detection bounding box and the label text on an image.

    Args: 
        picDir: str: The directory where the image is saved
        picName: str: The name of the picture. We assume the file's extension is "jpg"
        labelDetail: list: A list containing the bounding box elements and other details.
            The bounding box is formatted as follows: [<top-left-x>, <top-left-y>, <width>, <height>]
            where the bounding box coordinates are expressed as relative values in [0, 1] x [0, 1].
        labelDetail: str: The location of the textfile containing the bounding box and labels for each
            detection. Each detection should be written in a seperate line containig 5 numbers with 
            the following numbers [labelNo. boxCenterX boxCenterY boxWidth boxHeight]
        labelsNames: list: A list of the labels used in detections process

    Returns: None
    """

    # Show the image
    fig, ax = plt.subplots()
    img = mpimg.imread(f"{picDir}/{picName}.jpg")
    ax.imshow(img)
    bbox = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
    width, height = abs(ax.get_xlim()[0]-ax.get_xlim()[1]), abs(ax.get_ylim()[0]-ax.get_ylim()[1])

    if type(labelDetail) == str:
        with open(labelDetail) as file:
            temp = file.readlines()
            for detection in temp:
                detection = detection.replace("\n","")
                detection = [float(x) for x in detection.split(" ")]

                # Note that the bounding box parameters when importing from a text file are different 
                # than that of the fiftyone library's exported json file. The bounding boxes are extracted
                # from the fiftyone's exported JSON file in a way to be compatible with YOLO-v8  algorithm
                bBox = [detection[1] - detection[3]/2, detection[2] - detection[4]/2, detection[3], detection[4]]
                ax.add_patch(patches.Rectangle((bBox[0]*width,bBox[1]*height+10),width*bBox[2],height*bBox[3], fill = None, color = "red"))
                ax.text(
                    bBox[0]*width,bBox[1]*height, labelsNames[int(detection[0])],
                    bbox = dict(facecolor='white', edgecolor='red', pad = 1), size = 7, backgroundcolor = "red"
                )
    else:
        # Dispaly the bounding boxes
        for i in range(len(labelDetail["labels"][picName])):
            bBox = labelDetail["labels"][picName][i]["bounding_box"]
            # print(labelDetail["labels"][picName][i])
            ax.add_patch(patches.Rectangle((bBox[0]*width,bBox[1]*height+10),width*bBox[2],height*bBox[3], fill = None, color = "red"))
            ax.text(
                bBox[0]*width,bBox[1]*height, labelsNames[labelDetail["labels"][picName][i]["label"]],
                bbox = dict(facecolor='white', edgecolor='red', pad = 1), size = 7, backgroundcolor = "red"
            )

# dispBBox("./data/images/train", "000802121329b103", js, ["Person"])
# dispBBox("./data/images/train", "000802121329b103", "./data/labels/train/000802121329b103.txt", ["Person"])

In [10]:
pprint(js)

{'classes': ['Person'],
 'labels': {'000002b66c9c498e': [{'attributes': {'IsDepiction': False,
                                                 'IsGroupOf': False,
                                                 'IsInside': False,
                                                 'IsOccluded': False,
                                                 'IsTruncated': True},
                                  'bounding_box': [0.0125,
                                                   0.148438,
                                                   0.182812,
                                                   0.43906200000000006],
                                  'label': 0},
                                 {'attributes': {'IsDepiction': False,
                                                 'IsGroupOf': False,
                                                 'IsInside': False,
                                                 'IsOccluded': False,
                                                

In [2]:
# Download the necessary data and dataset images for training. We have chosen "open-images-v7-object-detection-DS"
# as the name.
dsName = "open-images-v7-object-detection-DS-train"
dsClasses = ["Person"]
dsSplit = "train"
dsLblTypes = ["detections", "classifications"]
nSamples = 1000

if not fo.dataset_exists(dsName):
    dataset = foz.load_zoo_dataset(
        "open-images-v7",
        split = dsSplit,
        label_types = dsLblTypes,
        classes = dsClasses,
        max_samples = nSamples,
        seed = 1,
        shuffle = True,
        dataset_name = dsName,
    )
else:
    datasetTrain = fo.load_dataset(dsName)
    print("Dataset already loaded.")

Dataset already loaded.


In [3]:
# Download the necessary data and dataset images for testing. We have chosen "open-images-v7-object-detection-DS"
# as the name.
dsName = "open-images-v7-object-detection-DS-test"
dsClasses = ["Person"]
dsSplit = "test"
dsLblTypes = ["detections", "classifications"]
nSamples = 1000

if not fo.dataset_exists(dsName):
    dataset = foz.load_zoo_dataset(
        "open-images-v7",
        split = dsSplit,
        label_types = dsLblTypes,
        classes = dsClasses,
        max_samples = nSamples,
        seed = 2,
        shuffle = True,
        dataset_name = dsName,
    )
else:
    datasetTest = fo.load_dataset(dsName)
    print("Dataset already loaded.")

Dataset already loaded.


In [168]:
# Exporting the downloaded datasets to the desired locations.
# Test data
datasetTrain.export(
    data_path = "./data/images/train",
    labels_path = "./data/labels/train/labels.json",
    dataset_type = fo.types.FiftyOneImageDetectionDataset,
    classes = dsClasses,
    include_confidence = False
)

# Test data
datasetTest.export(
    data_path = "./data/images/test",
    labels_path = "./data/labels/test/labels.json",
    dataset_type = fo.types.FiftyOneImageDetectionDataset,
    classes = dsClasses,
    include_confidence = False
)

Directory './data/images/train' already exists; export will be merged with existing files
 100% |███████████████| 1000/1000 [3.5s elapsed, 0s remaining, 292.9 samples/s]      
Directory './data/images/test' already exists; export will be merged with existing files
 100% |███████████████| 1000/1000 [5.5s elapsed, 0s remaining, 202.7 samples/s]      


In [26]:
# Deserialize the json file and convert it to text files to make it compatible with yolov8
import json
file = open("./data/labels/train/labels.json")
js = json.load(file)
for item in js["labels"]:
    txt = ""
    with open(f"./data/labels/train/{item}.txt", 'w') as txtFile:
        for subItem in js["labels"][item]: 
            width = subItem["bounding_box"][2]
            height = subItem["bounding_box"][3]
            centerX = subItem["bounding_box"][0] + width/2
            centerY = subItem["bounding_box"][1] + height/2
            label = subItem["label"]
            txt += f"{label} {centerX} {centerY} {width} {height}\n"
        txtFile.write(txt)
        txtFile.close()
    

In [4]:
# List the current datasets
fo.list_datasets()

['open-images-v7-object-detection-DS-test',
 'open-images-v7-object-detection-DS-train']

In [79]:
# See the samples
aa = fo.load_dataset("open-images-v7-object-detection-DS-test")
session = fo.launch_app(aa.view())