In [21]:
datasetName = "be560875-c319-4c7e-a40c-87557783a96c"

syntheticDataPath = "synthetic_dataset/temp"

imageWidth = 640
imageHeight = 640

In [22]:
import os

datasetPath = f"synthetic_dataset/{datasetName}"

metadataName = next(filter(lambda x: x.startswith("Dataset"), os.listdir(datasetPath)))
metadataPath = f"{datasetPath}/{metadataName}"

captureFiles = list(filter(lambda x: x.startswith("captures_"), os.listdir(metadataPath)))

In [23]:
import json
import shutil
from uuid import uuid4

class_count = {
    "Black": 0,
    "Brown": 0,
    "Gray": 0,
    "White": 0,
}

shutil.rmtree(syntheticDataPath, ignore_errors=True, onerror=None)
os.makedirs(syntheticDataPath)

for captureFile in captureFiles:
    with open(f"{metadataPath}/{captureFile}") as f:
        data = json.load(f)
    
    for capture in data["captures"]:
        fileName = f"{datasetPath}/{capture['filename']}"
        annotations = capture["annotations"][0]["values"]
        if len(annotations) < 1:
            continue
        
        image_name = uuid4().hex
        
        shutil.copyfile(fileName, f"{syntheticDataPath}/{image_name}.png")

        with open(f"{syntheticDataPath}/{image_name}.txt", "w+") as f:
            for label in annotations:
                class_name = label["label_name"]
                class_id = label["label_id"]
                class_count[class_name] += 1

                x = label["x"] / imageWidth
                y = label["y"] / imageHeight
                w = label["width"] / imageWidth
                h = label["height"] / imageHeight

                xc = x + w / 2
                yc = y + h / 2

                if w > 2e-2 and h > 2e-2:
                    f.write(f"{class_id} {xc:.6f} {yc:.6f} {w:.6f} {h:.6f}\n")


In [24]:
class_count

{'Black': 1557, 'Brown': 2081, 'Gray': 1043, 'White': 503}

# Remove empty images

In [26]:
import os

syntheticDataPath = "synthetic_dataset/temp"

images = list(filter(lambda x: x.endswith(".png"), os.listdir(syntheticDataPath)))
labels = list(filter(lambda x: x.endswith(".txt"), os.listdir(syntheticDataPath)))

for image in images:
    name = image[:-4]
    if f"{name}.txt" not in labels:
        os.remove(f"{syntheticDataPath}/{image}")

for label in labels:
    with open(f"{syntheticDataPath}/{label}", "r") as f:
        if f.readline() == "":
            name = label[:-4]
            os.remove(f"{syntheticDataPath}/{label}")
            os.remove(f"{syntheticDataPath}/{name}.png")

# Move labels

In [31]:
import os
import shutil

syntheticDataPath = "synthetic_dataset/temp"
syntheticImagePath = "grid_dataset/synthetic/images"
syntheticLabelPath = "grid_dataset/synthetic/labels"

images = list(filter(lambda x: x.endswith(".png"), os.listdir(syntheticDataPath)))
labels = list(filter(lambda x: x.endswith(".txt"), os.listdir(syntheticDataPath)))

for label in labels:
    shutil.move(f"{syntheticDataPath}/{label}", f"{syntheticLabelPath}/{label}")

for image in images:
    shutil.move(f"{syntheticDataPath}/{image}", f"{syntheticImagePath}/{image}")

# Stats

In [32]:
import os

syntheticLabelPath = "grid_dataset/synthetic/labels"

classes = ["Black", "Brown", "Gray", "White"]

class_count = {
    "Black": 0,
    "Brown": 0,
    "Gray": 0,
    "White": 0,
}

labels = list(filter(lambda x: x.endswith(".txt"), os.listdir(syntheticLabelPath)))

for label in labels:
    with open(f"{syntheticLabelPath}/{label}", "r") as f:
        while (line := f.readline()) != "":
            clas = int(line[0])
            class_count[classes[clas]] += 1

In [33]:
class_count

{'Black': 2401, 'Brown': 3183, 'Gray': 1644, 'White': 851}

In [34]:
class_count["Brown"] + class_count["Black"] + class_count["Gray"] + class_count["White"]

8079

In [35]:
len(labels)

3867