## Template for Preparing Data as TFRecords
TFRecord (*.tfrecord) is the recommended format for loading data into TensorFlow models.

This template can be used to save data as tfrecord files.

Relevant TensorFlow Docs
<ul>
    <li><a href='https://www.tensorflow.org/api_docs/python/tf/train/Int64List'>Int64List</a> and <a href='https://www.tensorflow.org/api_docs/python/tf/train/BytesList'>BytesList</a> and <a href='https://www.tensorflow.org/api_docs/python/tf/train/FloatList'>FloatList</a></li>
    <li><a href='https://www.tensorflow.org/versions/master/api_docs/python/tf/train/Feature'>Feature</a></li>
    <li><a href='https://www.tensorflow.org/versions/master/api_docs/python/tf/train/Example'>Example</a></li>
    <li><a href='https://www.tensorflow.org/api_docs/python/tf/python_io/TFRecordWriter'>TFRecordWriter</a></li>
    
</ul>

In [None]:
import tensorflow as tf
import numpy as np
import glob, os

def int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def float_feature(value):
    return tf.train.Feature(bytes_list=tf.train.FloatList(value=[value]))

In [None]:
''' Create a TFRecords file.
    
    inputs:
        filename - name of TFRecord file, <filename>.tfrecord
        examples - python list of examples to write to <filename>.tfrecord
                 - each element of the list is a dictionary of attributes, {'image': ..., 'label': ...}
        parse_fn - function that parses the example and returns its dictionary of 
                    feaures ready to be passed to tf.train.Features(feature=...)
'''
def create_tfrecord(filename, examples, parse_fn):
    num_examples = len(examples)
    writer = tf.python_io.TFRecordWriter(filename)
    
    # Write each data examplar to TFRecord File
    for e in range(num_examples):
        # Parse this example
        features = parse_fn(examples[e])
        
        # Create Example out of Features
        example = tf.train.Example(features=tf.train.Features(feature=features))

        # Write Example to TFRecord file
        writer.write(example.SerializeToString())

        # Monitor progress
        if not e % 1000 and e != 0:
            print(str(e) + ' examples written to ' + str(filename) + '.')

In [None]:
''' Parses the example and returns its dictionary of feaures 
        ready to be passed to tf.train.Features(feature=...)

    inputs: 
        example - dictionary of features
    
    outputs:
        features - dictionary of tf.train.Feature objects

'''
def parse(example):
    
    features = {

        
        
    }

    return features


In [None]:
# Gather Data
data_dirs = glob.glob('images/*')
data_files = []
for d in data_dirs:
    data_files.extend(glob.glob(d + '/*.png'))


In [None]:
# Any testing of dataset
#    Pre-computing mean/std of data
#    Count of class imbalance
#    Visualization of some data (i.e. plt.imshow(...))
#    etc.


In [None]:
# Some split into train and test as lists where each index of the 
#    list is a dictionary that maps feature names to values.
# Example:
#    train_data[0] == {'feature_name' : ..., 'label' : ...}
train_data = []
valid_data = []
# ...

base = {'train' : 'train_tfrecords/', 'valid' : 'valid_tfrecords/'}
files = {'train' : train_data, 'valid' : valid_data}

# Make these base directories if they don't already exist
for k in base:
    dirs = base[k].split('/')
    for d in range(len(dirs)):
        fdir = '/'.join(dirs[0:d+1])
        if not os.path.exists(fdir):
            os.mkdir(fdir)
        
records_per_file = 50
for mode in ['train', 'valid']:
    print('Creating ' + mode + ' TFRecords...')
    for r in range(0, len(files[mode]), records_per_file):
        file_name = base[mode] + str(r) + '.tfrecord'
        record_files = files[mode][r:r+records_per_file]
        create_tfrecord(file_name, record_files, parse)