In [1]:
import tensorflow as tf
from tensorflow.keras import layers
import keras
import PIL
from PIL import Image, ImageFilter
import numpy as np
import os
import pathlib
import glob
import time

# JPG to TFRECORD Converter

This noteboook takes a directory of JPG files assumed to be frames from a video and converts them to a .tfrecord file to be used with frame prediction models. <br>

It is reccomended to format the frames as something like frame0052.jpg (frame%04d) with a tool like ffmpeg or opencv. 

In [2]:
# Helper Functions
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
    
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def image_feature(value):
    """Returns a bytes_list from a string / byte."""
    return tf.train.Feature(
        bytes_list=tf.train.BytesList(value=[tf.io.encode_jpeg(value).numpy()])
    )

def make_square(im, min_size=512, fill_color=(0, 0, 0, 0)):
    x, y = im.size
    size = max(min_size, x, y)
    new_im = Image.new('RGB', (size, size), fill_color)
    new_im.paste(im, (int((size - x) / 2), int((size - y) / 2)))
    return new_im

In [3]:
# Here, take a directory of frames and prep for prediction
frame_dir = '/PATH/TO/DIR/OF/FRAMES/*'
frames = glob.glob(frame_dir)
frames.sort()


x_filenames = frames[:-5]

print("-"*20)
print("X Filenames Length: ", len(x_filenames))
print("-"*20)

--------------------
X Filenames Length:  29323
--------------------


In [4]:
# Turns image data into tensorflow Example
def create_example(x, y):
    feature = {
        "image1": image_feature(x[0]),
        "image2": image_feature(x[1]),
        "image3": image_feature(x[2]),
        "image4": image_feature(x[3]),
        "image5": image_feature(x[4]),
        "y": image_feature(y),
    }
    return tf.train.Example(features=tf.train.Features(feature=feature))

In [9]:
# Create tfrecord:
outs = '/PATH/TO/DATA/OUTPUT/training_data.tfrecord'

with tf.io.TFRecordWriter(outs) as writer:
    for i in range(len(x_filenames)-10):
        if i % 100 == 0:
            print("writing", i, "entry of", len(x_filenames)-10)
        
        x = np.zeros((512,512,3*5))
        x = []
        
        # Read 5 frames
        for j in range(5):
            if i == 0:
                Image.open(x_filenames[i+j]).show()
            x.append(tf.io.decode_jpeg(tf.io.read_file(x_filenames[i+j])))

        # Read 6th frame
        y = tf.io.decode_jpeg(tf.io.read_file(x_filenames[i+5]))
        if i == 0:
            time.sleep(1)
            Image.open(x_filenames[i+5]).show()
            

        # Write to tfrecord
        example = create_example(x, y)
        writer.write(example.SerializeToString())

writing 0 entry of 9341
writing 100 entry of 9341
writing 200 entry of 9341
writing 300 entry of 9341
writing 400 entry of 9341
writing 500 entry of 9341
writing 600 entry of 9341
writing 700 entry of 9341
writing 800 entry of 9341
writing 900 entry of 9341
writing 1000 entry of 9341
writing 1100 entry of 9341
writing 1200 entry of 9341
writing 1300 entry of 9341
writing 1400 entry of 9341
writing 1500 entry of 9341
writing 1600 entry of 9341
writing 1700 entry of 9341
writing 1800 entry of 9341
writing 1900 entry of 9341
writing 2000 entry of 9341
writing 2100 entry of 9341
writing 2200 entry of 9341
writing 2300 entry of 9341
writing 2400 entry of 9341
writing 2500 entry of 9341
writing 2600 entry of 9341
writing 2700 entry of 9341
writing 2800 entry of 9341
writing 2900 entry of 9341
writing 3000 entry of 9341
writing 3100 entry of 9341
writing 3200 entry of 9341
writing 3300 entry of 9341
writing 3400 entry of 9341
writing 3500 entry of 9341
writing 3600 entry of 9341
writing 3700 

## Validation code

In [6]:
# READING TFRECORD
def parse_tfrecord_fn(example):
    feature_description = {
        "image1": tf.io.FixedLenFeature([], tf.string),
        "image2": tf.io.FixedLenFeature([], tf.string),
        "image3": tf.io.FixedLenFeature([], tf.string),
        "image4": tf.io.FixedLenFeature([], tf.string),
        "image5": tf.io.FixedLenFeature([], tf.string),
        "y": tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(example, feature_description)
    example["image1"] = tf.io.decode_jpeg(example["image1"], channels=3)
    example["image2"] = tf.io.decode_jpeg(example["image2"], channels=3)
    example["image3"] = tf.io.decode_jpeg(example["image3"], channels=3)
    example["image4"] = tf.io.decode_jpeg(example["image4"], channels=3)
    example["image5"] = tf.io.decode_jpeg(example["image5"], channels=3)
    example["y"] = tf.io.decode_jpeg(example["y"], channels=3)
    
    return example

In [7]:
raw_dataset = tf.data.TFRecordDataset(outs)
parsed_dataset = raw_dataset.map(parse_tfrecord_fn)

In [8]:
for features in parsed_dataset.take(1):
    for key in features.keys():
        print(features[key].numpy().shape)
        Image.fromarray(features[key].numpy()).show()

(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
