In [26]:
import os
import sys
import time

import cv2
import numpy as np
import tensorflow as tf

import model.conv_deconv_model as model

In [27]:
# VIDEO_FILE = 'tmp/UCF11_updated_mpg/golf_swing/v_golf_10/v_golf_10_02.mpg'
VIDEO_FILE = 'tmp/UCF11_updated_mpg/tennis_swing/v_tennis_01/v_tennis_01_01.mpg'
VIDEO_START_FRAME = 0 

TRAIN_DIR = 'train_10k_5seq'

INPUT_SEQ_LENGTH = 5

PREDICTION_LENGTH = 120
GROUND_TRUTH_LENGTH = 30

FRAME_WIDTH = 320
FRAME_HEIGHT = 240
FRAME_CHANNELS = 3

LAMBDA = 5e-4

BATCH_SIZE = 1

GPU_MEMORY_FRACTION = 0.5

In [28]:
# TODO: refactor
def open_video(videofile, from_time=0):
    vidcap = cv2.VideoCapture(videofile)
    if from_time != 0:
        vidcap.set(cv2.CAP_PROP_POS_MSEC, from_time)
    return vidcap

# TODO: refactor
def read_next_frame(vidcap):
    success, image = vidcap.read()
    if success:
        return image
    else:
        return None

In [29]:
def get_video_writer(filename):
    # Define the codec and create VideoWriter object
    # fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fourcc = cv2.cv.CV_FOURCC(*'XVID')
    out = cv2.VideoWriter('{}.avi'.format(filename), fourcc, 24.0, (320,240))
    return out

# MAIN

In [41]:
with tf.Graph().as_default():
    seq_batch = tf.placeholder(tf.float32, shape=[1, FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNELS * INPUT_SEQ_LENGTH])
    
    # with tf.device(GPU_TO_USE):
    # build graph and compute predictions from the inference model
    model_output = model.inference(seq_batch, BATCH_SIZE, FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNELS,
                                   INPUT_SEQ_LENGTH, LAMBDA)

    # Create a saver and merge all summaries
    saver = tf.train.Saver(tf.all_variables())

    # Create a session for running operations in the Graph
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_MEMORY_FRACTION)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        checkpoint_path = os.path.join(TRAIN_DIR, 'model.ckpt-10000')
        saver.restore(sess, checkpoint_path)
        
        vidcap = open_video(VIDEO_FILE, VIDEO_START_FRAME)
        vidwriter = get_video_writer('predicted_video')
        
        input_frames = []
        
        for i in xrange(GROUND_TRUTH_LENGTH):
            frame = read_next_frame(vidcap)
            
            if frame is not None:
                vidwriter.write(frame)

                if i >= GROUND_TRUTH_LENGTH - INPUT_SEQ_LENGTH:
                    frame = (frame - 128.0) / 128.0
                    input_frames.append(frame)
            else:
                print('Warning: Error while reading frame.')
                
        # insert an empty frame in between:
        black_frame = np.zeros((FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNELS), dtype=np.uint8) 
        vidwriter.write(black_frame)
        vidwriter.write(black_frame)
        
        for j in xrange(PREDICTION_LENGTH):
            seq_input = np.concatenate([input_frames[0], input_frames[1], input_frames[2], input_frames[3], input_frames[4]],
                                       axis=2)
            seq_input = np.expand_dims(seq_input, axis=0)
            
            predicted_frame = sess.run([model_output], feed_dict={seq_batch: seq_input})
            predicted_frame = np.squeeze(predicted_frame)
            np.place(predicted_frame, predicted_frame > 1, [1])
            np.place(predicted_frame, predicted_frame < -1, [-1])
            # print(np.min(predicted_frame))
            # print(np.max(predicted_frame))
            video_frame = predicted_frame * 128.0 + 128.0
            np.place(video_frame, video_frame > 255, [255]) # values are 0-256?!
            video_frame = video_frame.astype(np.uint8)
            # print(np.min(video_frame))
            # print(np.max(video_frame))
            vidwriter.write(video_frame)
            
            del input_frames[0]
            input_frames.append(predicted_frame)
            
        vidwriter.release()
        vidcap.release()