# Setup:

Select a GPU before starting this ipython server. As an example, to use /gpu:4 on the server, use the following command:

*$ export CUDA_VISIBLE_DEVICES=4*

In case this variable is not set or mulitple devices are selected, TensorFlow will allocate memory on **all** devices, but will run only on /gpu:0.

In [5]:
# Force matplotlib to use inline rendering
%matplotlib inline

import os
import sys

# add path to libraries for ipython
sys.path.append(os.path.expanduser("~/libs"))

import time
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensortools as tt

import model.conv_deconv_model as model
# import model.conv_lstm_model as model

In [6]:
# Global config parameters
DATASET_URL = 'http://crcv.ucf.edu/data/UCF11_updated_mpg.rar'
DATASET_DIR = 'tmp'

SERIALIZED_SEQ_LENGTH = 30
INPUT_SEQ_LENGTH = 5

FRAME_SCALE_FACTOR = 1.0
FRAME_WIDTH = int(320 * FRAME_SCALE_FACTOR)
FRAME_HEIGHT = int(240 * FRAME_SCALE_FACTOR)
FRAME_CHANNELS = 1

MIN_FRACTION_EXAMPLES_IN_QUEUE = 0.15
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 1544
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 0

MOVING_AVERAGE_DECAY = 0.9999
NUM_EPOCHS_PER_DECAY = 250.0
LEARNING_RATE_DECAY_FACTOR = 0.2
INITIAL_LEARNING_RATE = 0.001

LAMBDA = 5e-4

BATCH_SIZE = 64

TRAIN_DIR = 'train'
MAX_STEPS = 30000

# Reminder: Uses the GPU that is selected by CUDA_VISIBLE_DEVICES!
GPU_MEMORY_FRACTION = 1.0

In [7]:
# For manual verification of used parameters
print("Frame size: {}x{}".format(FRAME_WIDTH, FRAME_HEIGHT))
print("Learning rate decay every {} steps".format(NUM_EPOCHS_PER_DECAY * NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
                                                  // BATCH_SIZE))

Frame size: 320x240
Learning rate decay every 6031.0 steps


# Input Data

In [8]:
rar_path = tt.utils.data.download(DATASET_URL, DATASET_DIR)
dataset_path = tt.utils.data.extract(rar_path, DATASET_DIR)

File UCF11_updated_mpg.rar has already been downloaded.
File UCF11_updated_mpg.rar has already been extracted.


### Optional Preprocessing of videos

Converting videos to frame bundled frame sequences, to create files that can be read using TensorFlows FixedLengthRecordReader.
In the future, this could be replaced with a custom OpenCvVideoReader, which has to be implemented in C++. This requires to work an the source code of TensorFlow, instead of the pip package.

In [6]:
video_filenames = tt.utils.path.get_filenames('.', '*.mpg')

for i, video_filename in enumerate(video_filenames):
    with tt.utils.video.VideoReader(video_filename) as vr:
        frames = []
        for f in xrange(SERIALIZED_SEQ_LENGTH):
            frame = frame = vr.next_frame(FRAME_SCALE_FACTOR)
            if frame is not None:
                # ensure frame is not too large
                h, w, c = np.shape(frame)
                if h > FRAME_HEIGHT or w > FRAME_WIDTH:
                    frame = frame[:FRAME_HEIGHT, :FRAME_WIDTH, :]
                if not h < FRAME_HEIGHT and not w < FRAME_WIDTH:
                    frame = np.reshape(frame, [FRAME_HEIGHT, FRAME_WIDTH, -1])
                    if FRAME_CHANNELS == 1:
                        frame = tt.utils.image.to_grayscale(frame)
                    frames.append(frame)
                else:
                    print('Warning: Frame bounds too small. Skipping.')
                    break
            else:
                print('Warning: Frame sequence too short. Skipping.')
                break

        if len(frames) == SERIALIZED_SEQ_LENGTH:
            # TODO: seqences from one folder to a single file?
            seq_filepath = os.path.splitext(video_filename)[0] + '.seq'
            tt.utils.image.write_as_binary(seq_filepath, np.asarray(frames))

print('Successfully extracted frame sequences.')

Successfully extracted frame sequences.


In [9]:
def read_record(filename_queue):
    class FrameSeqRecord(object):
        pass
    record = FrameSeqRecord()
    record.height = FRAME_HEIGHT
    record.width = FRAME_WIDTH
    record.depth = FRAME_CHANNELS

    frame_bytes = record.height * record.width * record.depth
    record_bytes = frame_bytes * (INPUT_SEQ_LENGTH + 1)
    total_file_bytes = frame_bytes * SERIALIZED_SEQ_LENGTH

    reader = tf.FixedLengthRecordReader(total_file_bytes)

    record.key, value = reader.read(filename_queue)
    decoded_record_bytes = tf.decode_raw(value, tf.uint8)
    
    record.data = decoded_record_bytes[0:(INPUT_SEQ_LENGTH)]
    record.prediction = decoded_record_bytes[INPUT_SEQ_LENGTH]
    
    decoded_record_bytes = tf.reshape(decoded_record_bytes,
                                      [SERIALIZED_SEQ_LENGTH, FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNELS])
    
    # calculcate tensors [start, 0, 0, 0] and [start + INPUT_SEQ_LENGTH, 0, 0, 0]
    rnd_start_index = tf.to_int32(tf.random_uniform([1], 0, SERIALIZED_SEQ_LENGTH - (INPUT_SEQ_LENGTH + 1), tf.int32))
    seq_start_offset = tf.SparseTensor(indices=[[0]], values=rnd_start_index, shape=[4])
    sequence_start = tf.sparse_tensor_to_dense(seq_start_offset)
    pred_start_offset = tf.SparseTensor(indices=[[0]], values=rnd_start_index + INPUT_SEQ_LENGTH, shape=[4])
    prediction_start = tf.sparse_tensor_to_dense(pred_start_offset)
    
    # take first n-1 frames as input
    record.data = tf.cast(tf.slice(decoded_record_bytes, sequence_start,
                                   [INPUT_SEQ_LENGTH, FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNELS]),
                          tf.float32)
    # take last frame as prediction
    record.prediction = tf.cast(tf.slice(decoded_record_bytes, prediction_start,
                                         [1, FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNELS]),
                                tf.float32)
    record.prediction = tf.squeeze(record.prediction, squeeze_dims=[0])
    return record

In [11]:
def generate_sequence_batch(sequence_in, prediction, min_queue_examples,
                            batch_size, shuffle):
    """Construct a queued batch of images and labels.
    Args:
        sequence: 3-D Tensor array of [[height, width, 3]] of type.float32.
        min_queue_examples: int32, minimum number of samples to retain in
                            the queue that provides of batches of examples.
        batch_size: Number of images per batch.
        shuffle: boolean indicating whether to use a shuffling queue.
    Returns:
        images: Images. 4D tensor of [batch_size, height, width, 3] size.
    """
    # Create a queue that shuffles the examples, and then
    # read 'batch_size' images + labels from the example queue.
    num_preprocess_threads = 8 # NOTE: random number generators get race conditions, when > 2
    # ==> IDEA: apply random contrast/brightness/flip BEFORE slicing the input sequence into frames and prediction
    if shuffle:
        sequence_batch, prediction_batch = tf.train.shuffle_batch(
            [sequence_in, prediction],
            batch_size=batch_size,
            num_threads=num_preprocess_threads,
            capacity=min_queue_examples + 3 * batch_size,
            min_after_dequeue=min_queue_examples)
    else:
        sequence_batch, prediction_batch = tf.train.batch(
            [sequence_in, prediction],
            batch_size=batch_size,
            num_threads=num_preprocess_threads,
            capacity=min_queue_examples + 3 * batch_size)

    return sequence_batch, prediction_batch

In [12]:
def distort_image(image):
    return image # NO DISTORION RIGHT NOW !!!
    flipped_image = tf.image.random_flip_left_right(image, seed=42)

    # Because these operations are not commutative, consider randomizing
    # the order their operation:
    contrast_image = tf.image.random_contrast(flipped_image, lower=0.2, upper=1.8, seed=43)
    brightness_image = tf.image.random_brightness(contrast_image, max_delta=0.2, seed=44)

    return brightness_image

In [13]:
def inputs(data_dir, batch_size):
    """Construct input using the Reader ops.
    Args:
        data_dir: Path to the data directory.
        batch_size: Number of image sequences per batch.
    Returns:
        images: Images. 4D tensor of [batch_size, FRAME_HEIGHT, FRAME_WIDTH, 3 * INPUT_SEQ_LENGTH] size.
    """
    # if not eval_data:
    # filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
    #             for i in xrange(1, 6)]
    num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
    # else:
    #    filenames = [os.path.join(data_dir, 'test_batch.bin')]
    #    num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_EVAL#

    # for f in filenames:
    #    if not tf.gfile.Exists(f):
    #        raise ValueError('Failed to find file: ' + f)
    
    seq_filenames = tt.utils.path.get_filenames(data_dir, '*.seq')
    # seq_filenames_overfit = []
    # for i in xrange(BATCH_SIZE):
    #     seq_filenames_overfit.append(seq_filenames[i])
    # seq_filenames = seq_filenames_overfit
    # print('Overfitting on file:', seq_filenames)
    
    print('Found %d frame sequence files' % len(seq_filenames))

    filename_queue = tf.train.string_input_producer(seq_filenames)
    seq_record = read_record(filename_queue)  
    
    reshaped_seq = tf.cast(seq_record.data, tf.float32)

    seq_record.prediction = (seq_record.prediction - 127.5) / 127.5
    reshaped_seq = (reshaped_seq - 127.5) / 127.5
    
    # distort images
    distorted_prediction = distort_image(seq_record.prediction)
    distorted_input = []
    for i in xrange(INPUT_SEQ_LENGTH):
        distorted_input.append(distort_image(reshaped_seq[i,:,:,:]))
    stacked_distorted_input = tf.concat(2, distorted_input)

    # Ensure that the random shuffling has good mixing properties.
    min_queue_examples = int(num_examples_per_epoch *
                             MIN_FRACTION_EXAMPLES_IN_QUEUE)
    print("Try retain min. {} examples in queue".format(min_queue_examples))
    
    # Generate a batch of sequences and labels by building up a queue of examples.
    return generate_sequence_batch(stacked_distorted_input, distorted_prediction, min_queue_examples, 
                                   batch_size, shuffle=True)

# Inference
Has been moved to *model/...* to allow code sharing.

# Loss

Has been moved to *model/...* to allow code sharing.

# Training

In [14]:
def train(total_cost, cost, global_step):
    """Train sequence model.
    Create an optimizer and apply to all trainable variables. Add moving
    average for all trainable variables.
    Args:
        total_cost: Total loss from loss function including regularization terms.
        cost: Raw loss from loss function without regularization terms.
        global_step: Integer Variable counting the number of training steps
                     processed.
    Returns:
        train_op: op for training.
    """
    # Variables that affect learning rate
    num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / BATCH_SIZE
    decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

    # Decay the learning rate exponentially based on the number of steps
    lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                    global_step,
                                    decay_steps,
                                    LEARNING_RATE_DECAY_FACTOR,
                                    staircase=True)
    tf.scalar_summary('learning_rate', lr)

    # Generate moving averages of all losses and associated summaries
    cost_averages_op = tt.board.loss_summary([total_cost, cost] +
                                             tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    # Compute gradients
    with tf.control_dependencies([cost_averages_op]):
        opt = tf.train.AdamOptimizer(lr)
        grads = opt.compute_gradients(total_cost)

    # Apply gradients
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    tt.board.variables_histogram_summary()

    # Add histograms for gradients
    tt.board.gradients_histogram_summary(grads)

    # Track the moving averages of all trainable variables
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
        train_op = tf.no_op(name='train')

    return train_op

# TensorFlow Session (Main)

In [None]:
with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)
    
    # get images batch from dataset
    seq_batch, prediction_batch = inputs('tmp', BATCH_SIZE)

    # build graph and compute predictions from the inference model
    model_output = model.inference(seq_batch, 
                                   FRAME_CHANNELS, 
                                   INPUT_SEQ_LENGTH, 
                                   LAMBDA)

    # calculate loss
    cost_with_reg, cost = model.loss(model_output, prediction_batch)

    # train the model
    train_op = train(cost_with_reg, cost, global_step)

    # Create a saver and merge all summaries
    saver = tf.train.Saver(tf.all_variables())
    summary_op = tf.merge_all_summaries()

    # Create the graph, etc.
    init_op = tf.initialize_all_variables()

    # Create a session for running operations in the Graph
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_MEMORY_FRACTION)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:

        # Initialize the variables (like the epoch counter)
        sess.run(init_op)

        # Visualize graph
        tt.visualization.show_graph(sess.graph_def)
        
        # Start input enqueue threads
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        summary_writer = tf.train.SummaryWriter(TRAIN_DIR, sess.graph)

        try:
            step = 0
            while not coord.should_stop():
                step += 1
                if (step > MAX_STEPS):
                    break

                start_time = time.time()

                _, cost_with_reg_value, cost_value = sess.run([train_op, cost_with_reg, cost])
                duration = time.time() - start_time

                assert not np.isnan(cost_with_reg_value), 'Model diverged with cost = NaN'

                if step % 10 == 0:
                    num_examples_per_step = BATCH_SIZE
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = float(duration)

                    format_str = ('%s: step %d, loss = %.2f, lost w/o reg = %.2f (%.1f examples/sec; %.3f '
                                  'sec/batch)')
                    print (format_str % (datetime.now().time(), step, cost_with_reg_value, cost_value,
                                         examples_per_sec, sec_per_batch))

                if step % 100 == 0:
                    summary_str = sess.run(summary_op)
                    summary_writer.add_summary(summary_str, step)
                    summary_writer.flush() 

                # Save the model checkpoint periodically.
                if step % 1000 == 0 or (step + 1) == MAX_STEPS:
                    checkpoint_path = os.path.join(TRAIN_DIR, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=step)

        except tf.errors.OutOfRangeError:
            print('Done training -- epoch limit reached')
        finally:
            # When done, ask the threads to stop
            coord.request_stop()

        # Wait for threads to finish
        coord.join(threads)

        sequences, targets, predictions = sess.run([seq_batch, prediction_batch, model_output])

        # print predictions of a batch
        for idx in xrange(BATCH_SIZE):
            tt.utils.image.write(
                'out/{}-pred.png'.format(idx),
                predictions[idx] * 127.5 + 127.5)

        for idx in xrange(BATCH_SIZE):
            for s in xrange(INPUT_SEQ_LENGTH):
                frames = sequences[:, : , :,(s * FRAME_CHANNELS):((s + 1) * FRAME_CHANNELS)]
                tt.utils.image.write(
                    'out/{}-seq{}.png'.format(idx, s),
                    frames[idx] * 127.5 + 127.5)

        for idx in xrange(BATCH_SIZE):
            tt.utils.image.write(
                'out/{}-target.png'.format(idx),
                targets[idx] * 127.5 + 127.5)


Found 1542 frame sequence files
Try retain min. 231 examples in queue


  def _ipython_display_formatter_default(self):
  def _singleton_printers_default(self):


12:00:42.294844: step 10, loss = 1134.27, lost w/o reg = 1134.13 (22.9 examples/sec; 2.789 sec/batch)
12:01:10.971012: step 20, loss = 1093.09, lost w/o reg = 1092.95 (22.2 examples/sec; 2.882 sec/batch)
12:01:39.305760: step 30, loss = 1162.96, lost w/o reg = 1162.82 (23.0 examples/sec; 2.780 sec/batch)
