# TFRecord Builder

- num_tprecord: Calculate the number of tprecord files
- The rest will be treated separately as an if statement
- Save 100 images and labels using idx and IMAGE_PER_TFRRCORD
- The image_example function defined above allows you to set the storage type of images and labels

In [10]:
import glob
import os
import tensorflow as tf
import cv2

## Paths and Hyperparameters

In [12]:
DATASET_OK_PATTERN = '/Users/shim/dl-python-ImageDetection/dataset/3/OK/'
DATASET_FAIL_PATTERN = '/Users/shim/dl-python-ImageDetection/dataset/3/FAIL/'

TFRECORD_PATH = '/Users/shim/dl-python-ImageDetection/dataset/tfrecords/'
IMAGE_PER_TFRECORD = 100

## Import data

In [13]:
def get_file_list(directory, extension):
    file_list = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(extension):
                file_list.append(os.path.join(root, file))
    return file_list

In [14]:
ok_list = get_file_list(DATASET_OK_PATTERN, '.png')
fail_list = get_file_list(DATASET_FAIL_PATTERN, '.png')

num_ok = len(ok_list)
num_fail = len(fail_list)

#Oversampling
fail_list_new = list()
for _ in range(num_ok // num_fail):
    fail_list_new += fail_list

fail_list_new += fail_list[: num_ok % num_fail]
fail_list = fail_list_new

ok_label = [0] * len(ok_list)
fail_label = [1] * len(fail_list)

file_list = ok_list + fail_list
label_list = ok_label + fail_label

## TFRecord functions

In [15]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def image_example(image_string, label):
    image_shape = tf.image.decode_image(image_string).shape

    feature = {
        'height': _int64_feature(image_shape[0]),
        'width': _int64_feature(image_shape[1]),
        'depth': _int64_feature(image_shape[2]),
        'label': _int64_feature(label),
        'image_raw': _bytes_feature(image_string),
    }

    return tf.train.Example(features=tf.train.Features(feature=feature))

## Write TFRecords

In [16]:
if os.path.exists(TFRECORD_PATH) is False :
    os.mkdir(TFRECORD_PATH)

num_tfrecords = len(file_list) // IMAGE_PER_TFRECORD + 1
if len(file_list) % IMAGE_PER_TFRECORD != 0:
    num_tfrecords += 1

for idx in range(num_tfrecords):
    idx0 = idx + IMAGE_PER_TFRECORD
    idx1 = idx0 + IMAGE_PER_TFRECORD
    record_file = TFRECORD_PATH + '%05d.tfrecords' % idx
    with tf.io.TFRecordWriter(record_file) as writer:
        for filename, label in zip(file_list[idx0:idx1],
                                   label_list[idx0:idx1]):
            image_string = open(filename,'rb').read()
            tf_example = image_example(image_string,label)
            writer.write(tf_example.SerializeToString())