This python notebook is used to cycle through parts of the visual wake words and wake vision dataset to estimate the amount of label errors in each dataset.

First we define a few variables that control how this notebook works.

In [None]:
samples_to_check = 500

Next we import necessary libraries.

In [None]:
import wake_vision_loader
import vww_loader
import qsl
import cv2
import tensorflow as tf
import experiment_config

Change the default configuration to get higher quality images.

In [None]:
cfg = experiment_config.get_cfg("estimate_label_errors_cfg")
cfg.INPUT_SHAPE= (448, 448, 3)

Next we load the two datasets.

In [None]:
_,_,vww_test = vww_loader.get_vww(cfg)
_,_,wv_test = wake_vision_loader.get_wake_vision(cfg)

Unbatch the datasets to later fetch one image at a time.

In [None]:
vww_test = vww_test.unbatch()
wv_test = wv_test.unbatch()

Then set up a qsl medialabeller to label the vww set.

In [None]:
vww_params = dict(
    config={
        "image": [
            {
                "name": "Correct Label",
                "options": [{"name": "Person"},{"name": "No Person"}],
                "required": True,
            }
        ]
    },
    items=[
        {
            "target": cv2.cvtColor(tf.image.convert_image_dtype(sample[0], tf.uint8).numpy(), cv2.COLOR_RGB2BGR),
            "metadata": {"orig_label": sample[1].numpy()},
            "image_num": i,
        } for i, sample in enumerate(vww_test.take(samples_to_check))
    ],
    maxCanvasSize=224,
)

vww_labeller = qsl.MediaLabeler(**vww_params)

Display the labeller

In [None]:
display(vww_labeller)

Get the amount of errors from the labelling process.

In [None]:
vww_output = [
    {
        "correct_label": item["labels"]["image"]["Correct Label"],
        "original_label": item["metadata"]["orig_label"],
    } for item in vww_labeller.items
]

In [None]:
for entry, sample in enumerate(vww_output):
    if sample["correct_label"] == ["Person"]:
        vww_output[entry]["correct_label"] = 1
    elif sample["correct_label"] == ["No Person"]:
        vww_output[entry]["correct_label"] = 0

In [None]:
vww_errors = 0
for sample in vww_output:
    if sample["correct_label"] != sample["original_label"]:
        vww_errors += 1

In [None]:
print(f"VWW Errors: {vww_errors}")

Next do the same thing for the wake vision dataset.

In [None]:
wv_params = dict(
    config={
        "image": [
            {
                "name": "Correct Label",
                "options": [{"name": "Person"},{"name": "No Person"}],
                "required": True,
            },
            {
                "name": "Depiction",
                "options": [{"name": "Depiction", "shortcut": "æ"}, {"name": "No Depiction", "shortcut": "ø"}],
            },
        ]
    },
    items=[
        {
            "target": cv2.cvtColor(tf.image.convert_image_dtype(sample[0], tf.uint8).numpy(), cv2.COLOR_RGB2BGR),
            "metadata": {"orig_label": sample[1].numpy()},
            "image_num": i,
        } for i, sample in enumerate(wv_test.take(samples_to_check))
    ],
    maxCanvasSize=224,
)

wv_labeller = qsl.MediaLabeler(**wv_params)

In [None]:
display(wv_labeller)

In [None]:
wv_output = [
    {
        "correct_label": item["labels"]["image"]["Correct Label"],
        "depiction": item["labels"]["image"]["Depiction"],
        "original_label": item["metadata"]["orig_label"],
    } for item in wv_labeller.items
]

In [None]:
for entry, sample in enumerate(wv_output):
    if sample["correct_label"] == ["Person"]:
        wv_output[entry]["correct_label"] = 1
    elif sample["correct_label"] == ["No Person"]:
        wv_output[entry]["correct_label"] = 0

In [None]:
wv_no_depiction_errors = 0
for sample in wv_output:
    if sample["correct_label"] != sample["original_label"] and sample["depiction"] == ["No Depiction"]:
        wv_no_depiction_errors += 1
wv_depiction_errors = 0
for sample in wv_output:
    if sample["correct_label"] != sample["original_label"] and sample["depiction"] == ["Depiction"]:
        wv_depiction_errors += 1

In [None]:
print(f"Wake Vision Non-Depiction Errors: {wv_no_depiction_errors}")
print(f"Wake Vision Depiction Errors: {wv_depiction_errors}")