In [30]:
import os
import sys
import time
import math

import cv2
from scipy.misc import imresize
import numpy as np
import tensorflow as tf

# import model.conv_deconv_model as model
import model.conv_lstm_model as model

In [31]:
# VIDEO_FILE = 'tmp/UCF11_updated_mpg/golf_swing/v_golf_10/v_golf_10_02.mpg'
VIDEO_FILE = 'tmp/UCF11_updated_mpg/tennis_swing/v_tennis_01/v_tennis_01_01.mpg'
# VIDEO_FILE = 'tmp/UCF11_updated_mpg/golf_swing/v_golf_14/v_golf_14_03.mpg' # Overfitting file
VIDEO_START_FRAME = 0 

TRAIN_DIR = 'train_30k_lstm_120_160_euc_loss'
CHECKPOINT_FILE = 'model.ckpt-30000'
VIDEO_OUTPUT_NAME = 'predicted_video'

INPUT_SEQ_LENGTH = 5

PREDICTION_LENGTH = 120
GROUND_TRUTH_LENGTH = 30

FRAME_SCALE_FACTOR = 0.2
FRAME_WIDTH = int(320 * FRAME_SCALE_FACTOR)
FRAME_HEIGHT = int(240 * FRAME_SCALE_FACTOR)
FRAME_CHANNELS = 3

LAMBDA = 5e-4

BATCH_SIZE = 1

GPU_MEMORY_FRACTION = 1.0

In [32]:
# TODO: refactor
def open_video(videofile, from_time=0):
    vidcap = cv2.VideoCapture(videofile)
    if from_time != 0:
        vidcap.set(cv2.CAP_PROP_POS_MSEC, from_time)
    return vidcap

# TODO: refactor
def read_next_frame(vidcap):
    success, image = vidcap.read()
    if success:
        image = imresize(image, FRAME_SCALE_FACTOR)
        return image
    else:
        return None

In [42]:
def get_video_writer(filename):
    # Define the codec and create VideoWriter object
    fourcc = cv2.cv.CV_FOURCC(*'XVID')
    out = cv2.VideoWriter('{}.avi'.format(filename), fourcc, 24.0, (max(128, FRAME_WIDTH), max(128, FRAME_HEIGHT)))
    return out

In [43]:
FF_MIN_BUFFER_SIZE = 16384  # from OpenCV C++ code

def ensure_minimum_framesize(frame):
    h, w, c = np.shape(frame)
    size = h * w * c
    if (size < FF_MIN_BUFFER_SIZE):
        min_h = min_w = math.sqrt(FF_MIN_BUFFER_SIZE)
        outframe = np.zeros([min_h, min_w, c], np.uint8)
        top = (min_h - h) // 2
        left = (min_w - w) // 2
        outframe[top:(top + h), left:(left + w),:] = frame
        frame = outframe     
    return frame

# MAIN

In [44]:
with tf.Graph().as_default():
    seq_batch = tf.placeholder(tf.float32, shape=[1, FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNELS * INPUT_SEQ_LENGTH])
    
    # build graph and compute predictions from the inference model
    model_output = model.inference(seq_batch, BATCH_SIZE, FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNELS,
                                   INPUT_SEQ_LENGTH, LAMBDA)

    # Create a saver and merge all summaries
    saver = tf.train.Saver(tf.all_variables())

    # Create a session for running operations in the Graph
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_MEMORY_FRACTION)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        checkpoint_path = os.path.join(TRAIN_DIR, CHECKPOINT_FILE)
        saver.restore(sess, checkpoint_path)
        
        vidcap = open_video(VIDEO_FILE, VIDEO_START_FRAME)
        vidwriter = get_video_writer(VIDEO_OUTPUT_NAME)
        
        input_frames = []
        
        for i in xrange(GROUND_TRUTH_LENGTH):
            frame = read_next_frame(vidcap)
            
            if frame is not None:
                vidframe = ensure_minimum_framesize(frame)
                vidwriter.write(vidframe)

                if i >= GROUND_TRUTH_LENGTH - INPUT_SEQ_LENGTH:
                    frame = (frame - 128.0) / 128.0
                    input_frames.append(frame)
            else:
                print('Warning: Error while reading frame.')
                ensure_minimum_framesize
        # insert an empty frame in between:
        black_frame = np.zeros((FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNELS), dtype=np.uint8) 
        black_frame = ensure_minimum_framesize(black_frame)
        # vidwriter.write(black_frame)
        # vidwriter.write(black_frame)
        
        for j in xrange(PREDICTION_LENGTH):
            seq_input = input_frames[0]
            for f in xrange(1, INPUT_SEQ_LENGTH):
                seq_input = np.concatenate([seq_input, input_frames[f]], axis=2)
            seq_input = np.expand_dims(seq_input, axis=0)
            
            predicted_frame = sess.run([model_output], feed_dict={seq_batch: seq_input})
            predicted_frame = np.squeeze(predicted_frame)
            np.place(predicted_frame, predicted_frame > 1, [1])
            np.place(predicted_frame, predicted_frame < -1, [-1])
            video_frame = predicted_frame * 128.0 + 128.0
            np.place(video_frame, video_frame > 255, [255]) # values are 0-256?!
            video_frame = video_frame.astype(np.uint8)
            video_frame = ensure_minimum_framesize(video_frame)
            vidwriter.write(video_frame)
            
            del input_frames[0]
            input_frames.append(predicted_frame)
            
        vidwriter.release()
        vidcap.release()
        
        print('DONE')



(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
(128, 128, 3)
DONE
