# Load Dataset With Names

In [186]:
import tensorflow_datasets as tfds
from PIL import Image
import tensorflow as tf
from statistics import mean
import numpy as np


ds_office = tfds.load("OfficeRumlangValidationLabeled", split="OFFICE", as_supervised = False)
ds_rumlang = tfds.load("OfficeRumlangValidationLabeled", split="RUMLANG", as_supervised = False)

In [None]:
def evaluateLabels(pseudoLabelSets, gtSet, maxTimeDiff = 0.3):
    """
     Evaluate pseudo labels to groundtruth labels.
        gtSet: groundtruth labels from the hive
        pseudoLabelSets: list with datasets containing pseudolabels that should be matched to the groundtruth set
    """

    # Gather all filenames
    available_timestamps = []
    for set in pseudoLabelSets:
        for entry in set:
            filename = entry['filename'].numpy().decode("utf-8")
            available_timestamps.append(filename)

    cam_timestamp_to_label_gt = {}

    # For each groundtruth image find closest pseudo label file
    for entry in gtSet:
        name = entry['name'].numpy().decode("utf-8").replace("semseg_", "").replace(".png","")
        # swap cam to the back
        name = "_".join(reversed(name.split("_")))
        ts = int(name.split("_")[0])

        # Find closest timestamp from the same camera
        closest_ts = min(available_timestamps, key = lambda x : abs(int(x.split("_")[0]) - ts)\
                   + 10**14 * (x.split("_")[1] != name.split("_")[1])) # Additional term to punish wrong camera
        # Time difference
        diff = abs(int(closest_ts.split("_")[0]) - ts) / 10**9

        if diff < maxTimeDiff:
            cam_timestamp_to_label_gt[closest_ts] = entry['label']
        else:
            print(f"could not find pseudo label for gt file {name}. Closest time difference {diff}")

    scores = []
    # Iterate through pseudo labels again to now calculate mIoU scores of cloesest images.
    for set in pseudoLabelSets:
        for entry in set:
            filename = entry['filename'].numpy().decode("utf-8")
            if filename in cam_timestamp_to_label_gt.keys():
                l_gt = cam_timestamp_to_label_gt[filename]
                l_pseudo = np.asarray(Image.fromarray(np.squeeze(entry['label'].numpy().astype(np.uint8))).resize((l_gt.shape[1], l_gt.shape[0]), Image.NEAREST).convert("L"))

                mask = tf.constant(l_pseudo != 2)
                prediction_masked = tf.boolean_mask(tf.constant(l_pseudo), mask)
                gt_masked = tf.boolean_mask(tf.squeeze(l_gt), mask)

                mIoU = tf.keras.metrics.MeanIoU(num_classes=2)
                mIoU.update_state(prediction_masked, gt_masked)
                mIoUValue = mIoU.result().numpy()
                print(filename, "mIoU:", mIoUValue)
                scores.append(mIoUValue)
    print("valid files: ", len(scores))
    print("Average mIoU", mean(scores))
    return mean(scores)



In [190]:
import tensorflow_datasets as tfds
labels = tfds.load('meshdist_pseudolabels')

In [189]:
evaluateLabels([labels['rumlang2'], labels['rumlang3']],ds_rumlang, maxTimeDiff=0.5)

could not find pseudo label for gt file 1612787555066908000_cam1. Closest time difference 0.549865
could not find pseudo label for gt file 1612787570463007000_cam0. Closest time difference 1.29972
could not find pseudo label for gt file 1612787467893841000_cam0. Closest time difference 0.599738
could not find pseudo label for gt file 1612784110462165000_cam0. Closest time difference 379.581513
could not find pseudo label for gt file 1612783962992338000_cam0. Closest time difference 527.05134
could not find pseudo label for gt file 1612784144856845000_cam1. Closest time difference 345.137025
could not find pseudo label for gt file 1612787543769879000_cam2. Closest time difference 1.049582
could not find pseudo label for gt file 1612784002234186000_cam0. Closest time difference 487.809492
could not find pseudo label for gt file 1612784161204314000_cam1. Closest time difference 328.789556
could not find pseudo label for gt file 1612784075968755000_cam1. Closest time difference 414.025115


0.49461767

In [192]:
evaluateLabels([labels['office4'], labels['office5']],ds_office, maxTimeDiff=0.5)


could not find pseudo label for gt file 1611927755639513969_cam1. Closest time difference 1715846.703069031
could not find pseudo label for gt file 1611926874658593893_cam0. Closest time difference 1716727.683798107
could not find pseudo label for gt file 1611926840900478601_cam0. Closest time difference 1716761.441913399
could not find pseudo label for gt file 1611928312359371901_cam0. Closest time difference 1715289.983020099
could not find pseudo label for gt file 1611927703679764986_cam1. Closest time difference 1715898.662818014
could not find pseudo label for gt file 1611926816061948299_cam1. Closest time difference 1716786.280634701
could not find pseudo label for gt file 1611928372396341562_cam1. Closest time difference 1715229.946241438
could not find pseudo label for gt file 1611928359277365685_cam0. Closest time difference 1715243.065026315
could not find pseudo label for gt file 1611928402153963089_cam0. Closest time difference 1715200.188428911
could not find pseudo label 

StatisticsError: mean requires at least one data point