# Parse_TFRecord
This file parses bbox and confidence score from the tfrecord files generated by the storefront detector model on the UCF dataset.
And take the bbox information to generate streetview datasets for the Mixmatch SSL model.

There are several versions of streetview dataset that can be generated through this file:
- streetview_v1: This dataset mixes the TC11 and UCF images. For the UCF, only the max confidence bbox over threshold will be cropped and added into dataset. For the TC11, it will be added as positive examples for both train and test set.
- streetview_v2: This is a mixture of TC11 and UCF dataset. Compared to streetview_v1, ALL bbox over threshold will be cropped and added into dataset, instead of only cropping the highest bbox in a image. Also, from this version two views of UCF are removed, there is no marked view and skyview images included in the dataset.
- streetview_v3: It's similar to streetview_v2. The difference are:
    1. Trainset is unbiased, which means positive and negative examples are equal in quantity.
    2. The trainset only contains data from UCF.
    3. Testset is a combination of TC11 as positive examples and handpick UCF with confidence <0.2 as negative examples.

## Dependencies, global variables, and general functions
To generate all versions of streetview dataset, this part of the code should be run beforehand.

In [20]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import IPython.display as display
import itertools
import os  # used for directory operations
import io
from PIL import Image  # used to read images from directory

tf.enable_eager_execution()

In [None]:
# Global constants
# Information from input tfrecord files
SOURCE_ID = 'image/source_id'
BBOX_CONFIDENCE = 'image/object/bbox/confidence'
BBOX_XMIN = 'image/object/bbox/xmin'
BBOX_YMIN = 'image/object/bbox/ymin'
BBOX_XMAX = 'image/object/bbox/xmax'
BBOX_YMAX = 'image/object/bbox/ymax'

# confidence threshold for determine as neg/pos examples
CONF_THRESHOLD = {'neg': 0.1, 'pos': 0.9}
OUTPUT_IMAGE_SIZE = (64, 64)
INPUT_RECORD_CNT = 1000

INPUT_RECORD_PATH = './streetlearn-detections/'
INPUT_UCF_IMG_DIR = './UCF_Streetview_Dataset/raw/'
INPUT_TC11_IMG_DIR = './TC11/svt1'
OUTPUT_RECORD_PATH = '../ML_DATA/'

In [21]:
# Reads tfrecords and parse the labels and data needed for the new dataset.
def read_tfrecord(file_path):
    raw_image_dataset = tf.data.TFRecordDataset(file_path)

    # Create a dictionary describing the features.
    image_feature_description = {
        SOURCE_ID: tf.io.FixedLenFeature([], tf.string),
        BBOX_CONFIDENCE: tf.io.VarLenFeature(tf.float32),
        BBOX_XMIN: tf.io.VarLenFeature(tf.float32),
        BBOX_YMIN: tf.io.VarLenFeature(tf.float32),
        BBOX_XMAX: tf.io.VarLenFeature(tf.float32),
        BBOX_YMAX: tf.io.VarLenFeature(tf.float32),
    }

    # Parse the input tf.Example proto using the dictionary above.
    def _parse_image_function(example_proto):
        return tf.io.parse_single_example(example_proto, image_feature_description)

    parsed_image_dataset = raw_image_dataset.map(_parse_image_function)
    return parsed_image_dataset

# Parse and cleanup the labels to a more straigtforward format.
def parse_labels(image_features):
    # the format of image_features['image/source_id'] is 'cns/path/to/image_file_name.jpg'
    img_name = str(image_features[SOURCE_ID].numpy()).split('/')[-1][:-1]
    confidence = tf.sparse_tensor_to_dense(image_features[BBOX_CONFIDENCE], default_value=0).numpy()
    xmin = tf.sparse_tensor_to_dense(image_features[BBOX_XMIN], default_value=0).numpy()
    ymin = tf.sparse_tensor_to_dense(image_features[BBOX_YMIN], default_value=0).numpy()
    xmax = tf.sparse_tensor_to_dense(image_features[BBOX_XMAX], default_value=0).numpy()
    ymax = tf.sparse_tensor_to_dense(image_features[BBOX_YMAX], default_value=0).numpy()
    
    bbox = np.vstack((xmin, ymin, xmax, ymax)) # Left, Top, Right, Bottom
    
    return img_name, confidence, bbox

# Transform raw image data and label into a tfexample format.
def img2example(img, label):
    imgByteArr = io.BytesIO()
    img.save(imgByteArr, format='JPEG')
    imgByteArr = imgByteArr.getvalue()

    example = tf.train.Example(features=tf.train.Features(feature={
        "image": tf.train.Feature(bytes_list=tf.train.BytesList(value=[imgByteArr])),
        "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))}))
    
    return example

# Write all images into the test TFrecord file.
def write_path2tfrecord(folder_path, label, writer):
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        
        try:
            img = Image.open(img_path, "r")
        except Exception as e:
            print(e)
            print(img_path + " is not valid")
            continue
            
        # Exclude all non RGB images
        if len(img.getbands()) != 3:
            continue

        img = img.resize(OUTPUT_IMAGE_SIZE)

        example = img2example(img, label)
        writer.write(example.SerializeToString())


## streetview_v1
Run this part of code to build streetview_v1 dataset

In [22]:
# Strip the bboxes from the parsed_image_dataset that are over threshold and write it into TFrecord file.
def write_tfrecord_ucf_v1(parsed_image_dataset, folder_path, writer):
    for image_features in parsed_image_dataset:
        img_name, confidence, bbox = parse_labels(image_features)
        
        if img_name:
            img_path = os.path.join(folder_path, img_name)
            label = 0
            
            if confidence.size > 0 and max(confidence) > CONF_THRESHOLD['pos']:
                label = 1
                pos = np.argmax(confidence)
                bbox = bbox[:, pos]
            elif confidence.size == 0 or (confidence.size > 0 and max(confidence) < CONF_THRESHOLD['neg']):
                label = 0
            else:
                continue

            try:
                img = Image.open(img_path, "r")
            except Exception as e:
                print(e)
                print(img_path + " is not valid")
                continue

            # Exclude all non RGB images
            if len(img.getbands()) != 3:
                continue

            if label:
                img = img.crop(bbox)
                
            img = img.resize(OUTPUT_IMAGE_SIZE)
            example = img2example(img, label)
            writer.write(example.SerializeToString())
        

In [23]:
# name format of the output tfrecord files
OUTPUT_TRAIN_RECORD_FILENAME = "streetview-train.tfrecord"
OUTPUT_TEST_RECORD_FILENAME = "streetview-test.tfrecord"

# tfrecord file writer
train_writer = tf.io.TFRecordWriter(OUTPUT_RECORD_PATH + OUTPUT_TRAIN_RECORD_FILENAME)
test_writer = tf.io.TFRecordWriter(OUTPUT_RECORD_PATH + OUTPUT_TEST_RECORD_FILENAME)

In [None]:
# Write train and test tfrecord for TC11 dataset
train_image_path = os.path.join(INPUT_TC11_IMG_DIR, 'train')
test_image_path = os.path.join(INPUT_TC11_IMG_DIR, 'test')

write_path2tfrecord(train_image_path, 1, train_writer)
write_path2tfrecord(test_image_path, 1, test_writer)

In [18]:
# Write train and test tfrecords for UCF dataset
for i in range(0.9 * INPUT_RECORD_CNT):
    file_name = "./streetlearn_detections_tfexample-" + str(i).zfill(5) + "-of-01000.tfrecord"
    parsed_image_dataset = read_tfrecord(os.path.join(INPUT_RECORD_PATH, file_name))
    write_tfrecord_ucf_v1(parsed_image_dataset, INPUT_UCF_IMG_DIR, train_writer)

for i in range(0.9 * INPUT_RECORD_CNT, INPUT_RECORD_CNT):
    file_name = "./streetlearn_detections_tfexample-" + str(i).zfill(5) + "-of-01000.tfrecord"
    parsed_image_dataset = read_tfrecord(os.path.join(INPUT_RECORD_PATH, file_name))
    write_tfrecord_ucf_v1(parsed_image_dataset, INPUT_UCF_IMG_DIR, test_writer)

In [24]:
train_writer.close()
test_writer.close()

## streetview_v2
This is a mixture of TC11 and UCF dataset. ALL bbox over threshold will be cropped and added into dataset.
Run this part of the code to create the datset.

In [None]:
# Strip the bboxes from the parsed_image_dataset that are over threshold and write it into TFrecord file.
def write_tfrecord_ucf_v2(parsed_image_dataset, folder_path, writer):
    for image_features in parsed_image_dataset:
        img_name, confidence, bbox = parse_labels(image_features)
        # The format fo the image_name is XXXXXX_Y.jpg, the Y represents the view of the image.
        view = img_name.split('.')[0][-1]
        
        if img_name and view!='5' and view!='0':
            img_path = os.path.join(folder_path, img_name)
            target = []
            if confidence.size > 0:
                for i in range(confidence.size):
                    if confidence[i] > CONF_THRESHOLD['pos'] or confidence[i] < CONF_THRESHOLD['neg']:
                        target.append({'label':int(round(confidence[i])), 'bbox':bbox[:, i]})
            else:
                continue

            try:
                img = Image.open(img_path, "r")
            except Exception as e:
                print(e)
                print(img_path + " is not valid")
                continue
        
            # Exclude all non RGB images
            if len(img.getbands()) != 3:
                continue

            for t in target:
                crop_img = img.crop(t['bbox'])
                crop_img = crop_img.resize(OUTPUT_IMAGE_SIZE)

                example = img2example(crop_img, t['label'])
                writer.write(example.SerializeToString())


In [None]:
# name format of the output tfrecord files
OUTPUT_TRAIN_RECORD_FILENAME = "streetview_v2_512-train.tfrecord"
OUTPUT_TEST_RECORD_FILENAME = "streetview_v2_512-test.tfrecord"

# tfrecord file writer
train_writer = tf.io.TFRecordWriter(OUTPUT_RECORD_PATH + OUTPUT_TRAIN_RECORD_FILENAME)
test_writer = tf.io.TFRecordWriter(OUTPUT_RECORD_PATH + OUTPUT_TEST_RECORD_FILENAME)

In [None]:
# Write train and test tfrecord for TC11 dataset
train_image_path = os.path.join(INPUT_TC11_img_dir, 'train')
test_image_path = os.path.join(INPUT_TC11_img_dir, 'test')

write_path2tfrecord(train_image_path, 1, train_writer)
write_path2tfrecord(test_image_path, 1, test_writer)

In [None]:
# Write train and test tfrecords for UCF dataset
for i in range(0.9 * INPUT_RECORD_CNT):
    file_name = "./streetlearn_detections_tfexample-" + str(i).zfill(5) + "-of-01000.tfrecord"
    parsed_image_dataset = read_tfrecord(os.path.join(INPUT_RECORD_PATH, file_name))
    write_tfrecord_ucf_v2(parsed_image_dataset, INPUT_UCF_IMG_DIR, train_writer)

for i in range(0.9 * INPUT_RECORD_CNT, INPUT_RECORD_CNT):
    file_name = "./streetlearn_detections_tfexample-" + str(i).zfill(5) + "-of-01000.tfrecord"
    parsed_image_dataset = read_tfrecord(os.path.join(INPUT_RECORD_PATH, file_name))
    write_tfrecord_ucf_v2(parsed_image_dataset, INPUT_UCF_IMG_DIR, test_writer)

In [None]:
train_writer.close()
test_writer.close()

## streetview_v3

In [None]:
from shutil import copyfile

INPUT_UCF_FILTER_IMG_DIR = './UCF_Streetview_Dataset/test/'
OUTPUT_UCF_IMG_DIR = './UCF_Streetview_Dataset/crop/'
OUTPUT_TEST_RECORD_FILENAME = 'streetview_v3_64-test.tfrecord'
OUTPUT_TRAIN_RECORD_FILENAME = 'streetview_v3_64-train.tfrecord'

### streetview_v3-test
This test set uses all TC11 images as positive cases, and handpick UCF images with confidence<0.2 as negative cases.

In [None]:
# filter the dataset with images lower than the threshold, and store the image names in a list. These images will be handpicked to be used as negative examples in the test set.
def filter_image(parsed_image_dataset, folder_path, threshold):
    res = []
    
    for image_features in parsed_image_dataset:
        img_name, confidence, bbox = parse_labels(image_features)

        if img_name:
            target = []
            if confidence.size==0 or (confidence.size>0 and max(confidence)<threshold):
                res.append(img_name)
    return res


In [None]:
# Filter UCF images and copy files for furthur handpicking test set.
res = []
for i in range(0.9 * INPUT_RECORD_CNT, INPUT_RECORD_CNT):
    file_name = "./streetlearn_detections_tfexample-" + str(i).zfill(5) + "-of-01000.tfrecord"
    parsed_image_dataset = read_TFRecord(os.path.join(INPUT_RECORD_PATH, file_name))
    res.extend(filter_image(parsed_image_dataset, INPUT_UCF_IMG_DIR, 0.2))

for file in res:
    # The format fo the image_name is XXXXXX_Y.jpg, the Y represents the view of the image.
    view = img_name.split('.')[0][-1]
    # ignore sky images
    if view != '5' and view != '0':
        copyfile(INPUT_UCF_IMG_DIR + file, OUTPUT_UCF_IMG_DIR + file)

In [None]:
# Build streetview_v3_64-test. Write test tfrecord from TC11 dataset and filtered UCF
test_writer = tf.io.TFRecordWriter(OUTPUT_RECORD_PATH + OUTPUT_TEST_RECORD_FILENAME)
test_image_path = os.path.join(INPUT_TC11_IMG_DIR, 'img')
write_path2tfrecord(test_image_path, 1, test_writer)
write_path2tfrecord(INPUT_UCF_FILTER_IMG_DIR, 0, test_writer)
test_writer.close()

### streetview_v3-train (Unbiased UCF training set)

In [None]:
def write_tfrecord_ucf_v3(parsed_image_dataset, folder_path, writer, balance_threshold):
    
    for image_features in parsed_image_dataset:
        img_name, confidence, bbox = parse_labels(image_features)
        # The format fo the image_name is XXXXXX_Y.jpg, the Y represents the view of the image.
        view = img_name.split('.')[0][-1]
        
        if img_name and view!='5' and view!='0':
            img_path = os.path.join(folder_path, img_name)
            target = []
            if confidence.size > 0:
                for i in range(confidence.size):
                    if confidence[i] > CONF_THRESHOLD['pos'] or confidence[i] < CONF_THRESHOLD['neg']:
                        target.append({'label':int(round(confidence[i])), 'bbox':bbox[:, i]})
            else:
                continue

            
            try:
                img = Image.open(img_path, "r")
            except Exception as e:
                print(e)
                print(img_path + " is not valid")
            
            # Exclude all non RGB images
            if len(img.getbands()) != 3:
                continue

            for t in target:
                if balance_threshold and cnt[t['label']] > balance_threshold:
                    continue
                    
                cnt[t['label']] += 1
                
                crop_img = img.crop(t['bbox'])
                crop_img = crop_img.resize(OUTPUT_IMAGE_SIZE)

                example = img2example(crop_img, t['label'])
                writer.write(example.SerializeToString())

In [None]:
# Build streetview_v3_64-train
train_writer = tf.io.TFRecordWriter(OUTPUT_RECORD_PATH + OUTPUT_TRAIN_RECORD_FILENAME)

for i in range(0.9 * INPUT_RECORD_CNT):
    file_name = "./streetlearn_detections_tfexample-" + str(i).zfill(5) + "-of-01000.tfrecord"
    parsed_image_dataset = read_tfrecord(os.path.join(INPUT_RECORD_PATH, file_name))
    UCF_train_cnt += write_tfrecord_ucf_v3(parsed_image_dataset, INPUT_UCF_IMG_DIR, train_writer, threshold=19887)

train_writer.close()