# Parse_TFRecord
This file parses bbox and confidence score from the tfrecord files generated by the storefront detector model on the UCF dataset.
And take the bbox information to generate streetview datasets for the Mixmatch SSL model.

There are several versions of streetview dataset that can be generated through this file:
- streetview_v1: This dataset mixes the TC11 and UCF images. For the UCF, only the max confidence bbox over threshold will be cropped and added into dataset. For the TC11, it will be added as positive examples for both train and test set.
- streetview_v2: This is a mixture of TC11 and UCF dataset. Compared to streetview_v1, ALL bbox over threshold will be cropped and added into dataset, instead of only cropping the highest bbox in a image. Also, from this version two views of UCF are removed, there is no marked view and skyview images included in the dataset.
- streetview_v3: It's similar to streetview_v2. The difference are:
    1. Trainset is balanced, which means positive and negative examples are equal in quantity.
    2. The trainset only contains data from UCF.
    3. Testset is a combination of TC11 as positive examples and handpick UCF with confidence < 0.2 as negative examples.

## Dependencies and global variables
To generate all versions of streetview dataset, this part of the code should be run beforehand.

In [None]:
from parse_data_to_tfrecord_lib import read_tfrecord, parse_labels, img_to_example, write_tfrecord_from_images, write_tfrecord_from_detection, filter_image, batch_read_write_tfrecords
import numpy as np
import tensorflow as tf
import os  # used for directory operations
from shutil import copyfile

tf.enable_eager_execution()

In [None]:
# Global constants
INPUT_RECORD_CNT = 1000
INPUT_RECORD_DIR = './streetlearn-detections/'
INPUT_UCF_IMG_DIR = './UCF_Streetview_Dataset/raw/'
INPUT_TC11_IMG_DIR = './TC11/svt1'
OUTPUT_RECORD_DIR = '../ML_DATA/'

## streetview_v1
Run this part of code to build streetview_v1 dataset

In [None]:
# name format of the output tfrecord files
OUTPUT_TRAIN_RECORD_FILENAME = "streetview-train.tfrecord"
OUTPUT_TEST_RECORD_FILENAME = "streetview-test.tfrecord"

# tfrecord file writer
train_writer = tf.io.TFRecordWriter(OUTPUT_RECORD_DIR + OUTPUT_TRAIN_RECORD_FILENAME)
test_writer = tf.io.TFRecordWriter(OUTPUT_RECORD_DIR + OUTPUT_TEST_RECORD_FILENAME)

In [None]:
# Write train and test tfrecord from TC11 dataset
train_image_path = os.path.join(INPUT_TC11_IMG_DIR, 'train')
test_image_path = os.path.join(INPUT_TC11_IMG_DIR, 'test')

write_tfrecord_from_images(train_image_path, 1, train_writer)
write_tfrecord_from_images(test_image_path, 1, test_writer)

In [None]:
# Write train and test tfrecords from UCF dataset
detection_property = {'include_top_camera':True, 'only_keep_top_confidence':True, 'balance_threshold':False}
train_batch_range, test_batch_range = [0, int(0.9 * INPUT_RECORD_CNT)], [int(0.9 * INPUT_RECORD_CNT), INPUT_RECORD_CNT]

batch_read_write_tfrecords(train_batch_range, INPUT_RECORD_DIR, INPUT_UCF_IMG_DIR, train_writer, detection_property)
batch_read_write_tfrecords(test_batch_range, INPUT_RECORD_DIR, INPUT_UCF_IMG_DIR, test_writer, detection_property)

In [None]:
train_writer.close()
test_writer.close()

## streetview_v2
Run this part of code to build streetview_v2 dataset

In [None]:
# name format of the output tfrecord files
OUTPUT_TRAIN_RECORD_FILENAME = "streetview_v2_512-train.tfrecord"
OUTPUT_TEST_RECORD_FILENAME = "streetview_v2_512-test.tfrecord"

# tfrecord file writer
train_writer = tf.io.TFRecordWriter(OUTPUT_RECORD_DIR + OUTPUT_TRAIN_RECORD_FILENAME)
test_writer = tf.io.TFRecordWriter(OUTPUT_RECORD_DIR + OUTPUT_TEST_RECORD_FILENAME)

In [None]:
# Write train and test tfrecord for TC11 dataset
train_image_path = os.path.join(INPUT_TC11_img_dir, 'train')
test_image_path = os.path.join(INPUT_TC11_img_dir, 'test')

write_tfrecord_from_images(train_image_path, 1, train_writer)
write_tfrecord_from_images(test_image_path, 1, test_writer)

In [None]:
# Write train and test tfrecords from UCF dataset
detection_property = {'include_top_camera':False, 'only_keep_top_confidence':False, 'balance_threshold':False}
train_batch_range, test_batch_range = [0, int(0.9 * INPUT_RECORD_CNT)], [int(0.9 * INPUT_RECORD_CNT), INPUT_RECORD_CNT]

batch_read_write_tfrecords(train_batch_range, INPUT_RECORD_DIR, INPUT_UCF_IMG_DIR, train_writer, detection_property)
batch_read_write_tfrecords(test_batch_range, INPUT_RECORD_DIR, INPUT_UCF_IMG_DIR, test_writer, detection_property)

In [None]:
train_writer.close()
test_writer.close()

## streetview_v3
Run this part of code to build streetview_v3 dataset

In [None]:
# Constant input and output path
# NOTE: The images within INPUT_UCF_FILTER_IMG_DIR will be handpicked from outputs of OUTPUT_UCF_IMG_DIR
INPUT_UCF_FILTER_IMG_DIR = './UCF_Streetview_Dataset/test/'
OUTPUT_UCF_IMG_DIR = './UCF_Streetview_Dataset/crop/'
OUTPUT_TEST_RECORD_FILENAME = 'streetview_v3_64-test.tfrecord'
OUTPUT_TRAIN_RECORD_FILENAME = 'streetview_v3_64-train.tfrecord'

### streetview_v3-test
This test set uses all TC11 images as positive cases, and handpick UCF images with confidence < 0.2 as negative cases.

In [None]:
# Filter UCF images and copy files for furthur handpicking test set.
res = []
for i in range(int(0.9 * INPUT_RECORD_CNT), INPUT_RECORD_CNT):
    file_name = "./streetlearn_detections_tfexample-" + str(i).zfill(5) + "-of-01000.tfrecord"
    parsed_image_dataset = read_TFRecord(os.path.join(INPUT_RECORD_DIR, file_name))
    res.extend(filter_image(parsed_image_dataset, INPUT_UCF_IMG_DIR, 0.2))

for file in res:
    # The format fo the image_name is XXXXXX_Y.jpg, the Y represents the view of the image.
    view = img_name.split('.')[0][-1]
    # ignore sky images
    if view != '5' and view != '0':
        copyfile(INPUT_UCF_IMG_DIR + file, OUTPUT_UCF_IMG_DIR + file)

In [None]:
# Build streetview_v3_64-test. Write test tfrecord from TC11 dataset and filtered UCF
test_writer = tf.io.TFRecordWriter(OUTPUT_RECORD_DIR + OUTPUT_TEST_RECORD_FILENAME)
test_image_path = os.path.join(INPUT_TC11_IMG_DIR, 'img')
write_tfrecord_from_images(test_image_path, 1, test_writer)
write_tfrecord_from_images(INPUT_UCF_FILTER_IMG_DIR, 0, test_writer)
test_writer.close()

### streetview_v3-train (Balanced UCF training set)

In [None]:
train_writer = tf.io.TFRecordWriter(OUTPUT_RECORD_DIR + OUTPUT_TRAIN_RECORD_FILENAME)

# Write train and test tfrecords from UCF dataset
detection_property = {'include_top_camera':False, 'only_keep_top_confidence':False, 'balance_threshold':19887}
train_batch_range = [0, int(0.9 * INPUT_RECORD_CNT)]

batch_read_write_tfrecords(train_batch_range, INPUT_RECORD_DIR, INPUT_UCF_IMG_DIR, train_writer, detection_property)
train_writer.close()