In [1]:
#-----------------prednet prediction preprocess step -----------------------------#

'''
Things to do:
1. use multiple frames to predict next frame.
2. Test different sample frequencies.
'''

import matplotlib.pyplot as plt
import tensorflow as tf
import gym
import os
import imageio
import sys

import numpy as np
from scipy.misc import imread, imresize
import hickle as hkl
import pandas as pd


ADVERSARIAL_PATH = '../adversarial'
sys.path.append(ADVERSARIAL_PATH)

from component import Model_, Experiment, Optimizer, Component
from parameter import Parameter, params_from_json

# you might want to run this if you don't have ffmpeg installed
# imageio.plugins.ffmpeg.download()

context = {}
context['home'] = os.getcwd()
context['fname_input'] = 'test_avi/test.mp4'

# current analysis must be done with 2.0 or 5.0 fps
context['frate'] = 2.0
assert context['frate'] in (2.0, 5.0)

file_name_input = Parameter('file-name-input', default_value='test_avi/test.mp4')
frame_rate = Parameter('frame-rate', default_value=2.0)
assert frame_rate.get_value('frame-rate') in (2.0, 5.0)



frames_per_clip = Parameter('frames-per-clip', default_value = 10)
max_clip_number = Parameter('max-clip-number', default_value = 1000)
desired_im_sz = Parameter('desired-image-size', default_value = (128, 160))

file_name_rate = Parameter('frames-max-clip', values = [frames_per_clip, max_clip_number, file_name_input, frame_rate, desired_im_sz])

class process_convert_video_hkl(Component):
    
    def __init__(self):
        super(process_convert_video_hkl,self).__init__(file_name_rate)
    
    def read_video(self):
        global context
        home = context['home']
        
        file_name_input = self.config_params.get_value('file-name-input').get_value()        
        
        frame_rate = self.config_params.get_value('frame-rate').get_value()
        
       
        fname_full = os.path.join(home, file_name_input)
        vid = imageio.get_reader(fname_full, 'ffmpeg')
        vid_info = vid.get_meta_data()
    
        context['s_hz'] = vid_info['fps']
        s_length = vid_info['duration']
        context['s_length'] = s_length

        min_duration = 1.0 / frame_rate
        if s_length < min_duration:
            msg = 'The length of this video ({0}s) is smaller than the minimum duration ({1}s).'\
                 .format(s_length, min_duration)
            raise ValueError(msg)
        else:
            return vid

    


    def process_im(self, im, desired_sz):
        '''
    resize and crop image
    params: im - an image represented in a 5d array
            desired_sz - a tuple of width and height
    returns: a transformed image
    '''
        target_ds = float(desired_sz[0])/im.shape[0]
        im = imresize(im, (desired_sz[0], int(np.round(target_ds * im.shape[1]))))
        d = (im.shape[1] - desired_sz[1]) / 2
        im = im[:, d:d + desired_sz[1]]
        return im


# This takes the frames of the avi and converts them into the .hkl files


    def convert_video_to_hkl(self, vid, fname_input, des_im_sz):
        
        global context
        frate = context['frate']
        s_hz = context['s_hz']
        s_length = context['s_length']
    
        frames_per_clip = self.config_params.get_value('frames-per-clip').get_value()
        max_clip_number = self.config_params.get_value('max-clip-number').get_value()
        context['frames_per_clip'] = frames_per_clip
        max_clip = max_clip_number # limit of clips for a part
        split = 'test'

        # step_im
        s_num_frames = int(round(s_hz * s_length))
        s_orig_ms_per_frame = 1000 / s_hz
        s_targ_ms_per_frame = 1000.0 / frate
        context['s_orig_ms_per_frame'] = s_orig_ms_per_frame

        # skip: step of the sampling method
        skip = round(s_targ_ms_per_frame / s_orig_ms_per_frame)
        context['skip'] = skip

        s_actual_hz = s_hz / skip
        s_actual_ms_per_frame = 1000 / s_actual_hz
        s_total_clips = int(s_num_frames / (frames_per_clip * skip))

        # parts: each part contains no more than <max_clip> number of clips
        parts = int(s_total_clips / max_clip) + 1
        context['parts'] = parts
        step_im = max_clip * frames_per_clip
        suffix = '_P' + str(parts) + '_'
        context['suffix'] = suffix

        # num_im: total number of images sampled
        num_im = s_total_clips * frames_per_clip 

        # initialize the test data
        X = np.zeros((num_im,) + des_im_sz + (3,), np.uint8)
        # source in each frame makes sure each clip has frames in the same video
        source_list = [fname_input] * num_im

        # sample the images from vid
        ct = 0
        for i, im_out in enumerate(vid):
            if i % skip == 0:
                im = vid.get_data(i)
                X[ct] = self.process_im(im, des_im_sz)
                ct = ct + 1
                if ct == num_im:
                    break

        # store the images and sources into respective hickle file
        home = context['home']
        if not (os.path.isdir(home + "/test_avi")):
            os.mkdir(home + '/test_avi')
        else:
            for temp in os.listdir(home + '/test_avi'):
                if temp[-4:] == '.hkl':
                    os.remove(os.path.join(home, 'test_avi', temp))

        for part in range(1, parts + 1):
            xbeg = int((part - 1) * step_im)
            if (part == parts):
                xend = num_im    
            else:
                xend = int(part * step_im) 
            print 'Test data has {0} part(s), starts at image {1} and ends at image {2}.'.format(part,xbeg,xend)
            hkl.dump(X[xbeg:xend], os.path.join(home, 'test_avi', 'X_' + split + suffix+ str(part)+'.hkl'))
            hkl.dump(source_list[xbeg:xend], os.path.join(home, 'test_avi', 'sources_' + split + suffix + str(part)+ '.hkl'))
    
vid_object = process_convert_video_hkl()
vid = vid_object.read_video()
vid_object.convert_video_to_hkl(vid, context['fname_input'], desired_im_sz.get_value('desired-image-size'))


Using Theano backend.


Test data has 1 part(s), starts at image 0 and ends at image 210.


In [2]:
#---------------- Evaluate trained PredNet on test video and Calculates mean-squared error------------#

from six.moves import cPickle
from keras import backend as K
from keras.models import Model, model_from_json
from keras.layers import Input, Dense, Flatten
import sys

ADVERSARIAL_PATH = '../adversarial'
sys.path.append(ADVERSARIAL_PATH)

from component import Model_, Experiment, Optimizer, Component
from parameter import Parameter, params_from_json


PREDNET_PATH = '../models/prednet'
sys.path.append(PREDNET_PATH)
from prednet import PredNet
from data_utils import SequenceGenerator


DATA_DIR = './test_avi/'
WEIGHTS_DIR = os.path.join(PREDNET_PATH, 'model_data')
RESULTS_DIR = './test_results_avi/'

fpath_weights = os.path.join(WEIGHTS_DIR, 'prednet_kitti_weights.hdf5')
fpath_json = os.path.join(WEIGHTS_DIR, 'prednet_kitti_model.json')


fpath_weights = Parameter('weights-file-path',default_value=fpath_weights)
fpath_json = Parameter('model-file-path', default_value=fpath_json)

nt = Parameter('number-of-frames-per-clip',default_value=10)#repeated parameter
batch_size = Parameter('size-of-one-batch', default_value=10)#repeated parameter

params_weight_model = Parameter('fpath-weight-model', values=[fpath_weights, fpath_json, batch_size, nt]) 


# these are set in the cell above, uncomment this and this cell should
# work for avi's less than 1000 frames (1000 clips at 10 frames/clip)
# This would be 500 sec at 2Hz or 200 sec at 5Hz 
# parts = 1
# suffix = '_P1_'

class assemble_pred_net(Model_):
    
    def __init__(self):
        super(assemble_pred_net,self).__init__(params_weight_model)
        
    
    def assemble_model(self):
        global context
        batch_size = self.config_params.get_value('size-of-one-batch').get_value()
        context['batch_size'] = batch_size
        nt = self.config_params.get_value('number-of-frames-per-clip').get_value()  # number of frames per clip
        context['nt'] = nt
        
        # prednet used KITTI dataset that has 10 frames per clip, which is changable

        fpath_weights = self.config_params.get_value('weights-file-path').get_value()
        fpath_json = self.config_params.get_value('model-file-path').get_value()
       

        # Load trained model
        with open(fpath_json, 'r') as fj:
            json_string = fj.read()
        trained_model = model_from_json(json_string, custom_objects = {'PredNet': PredNet})
        trained_model.load_weights(fpath_weights)

        # get configs from the trained model
        layer_config = trained_model.layers[1].get_config()
        layer_config['output_mode'] = 'prediction'
        dim_ordering = layer_config['dim_ordering']
        context['dim_ordering'] = dim_ordering

        # assemble test model (to output predictions) 
        '''what does this means? only using one layer'''
        test_prednet = PredNet(weights=trained_model.layers[1].get_weights(), **layer_config)
        input_shape = list(trained_model.layers[0].batch_input_shape[1:])
        input_shape[0] = nt
        inputs = Input(shape=tuple(input_shape))
        predictions = test_prednet(inputs)
        self.model = self.configure(inputs, predictions)
        
    
        return self.model


class pred_net_predict(Experiment):
    #Computes the MSE of the predicted video
    
    def __init__(self):
        super(pred_net_predict,self).__init__(params_weight_model)
        self.mse_frame2 = None
        self.X_hat = None
        self.X_test = None
        
    def predict_and_evaluate(self,test_model):
        
        self.model = test_model
        
        global context
        batch_size = self.config_params.get_value('size-of-one-batch').get_value()
        nt = self.config_params.get_value('number-of-frames-per-clip').get_value()
        dim_ordering = context['dim_ordering']
        
        if not os.path.exists(RESULTS_DIR): 
            os.mkdir(RESULTS_DIR)

        parts = context['parts']
        for part in range(1, parts+1):
            suffix = context['suffix']
            curr_test = 'X_test' + suffix + str(part) + '.hkl'
            curr_sources = 'sources_test' + suffix + str(part) + '.hkl'
            test_file = os.path.join(DATA_DIR, curr_test)
            test_sources = os.path.join(DATA_DIR, curr_sources)

            # generate inputs from the test hickle file
            test_generator = SequenceGenerator(test_file, test_sources, nt,\
                                           sequence_start_mode='unique', dim_ordering=dim_ordering)

            # final X_test.shape = (26, 10, 128, 160, 3)
            self.X_test = test_generator.create_all()
            self.X_hat = self.start_predict(self.model, self.X_test, batch_size) #from experiment2
            
            
            if dim_ordering == 'th':
                
                self.X_test = np.transpose(self.X_test, (0, 1, 3, 4, 2))
                self.X_hat = np.transpose(self.X_hat, (0, 1, 3, 4, 2))

            curr_mse_frame2 = 'mse_frame2' + suffix + str(part) + '.csv'
            mse_frame2_out = os.path.join(RESULTS_DIR, curr_mse_frame2)

            # [2, 3, 4] is the x, y, grb
            mse_point = (self.X_test - self.X_hat) ** 2
            mse_point[:, :2] = 0  # the errors of first two frames not helpful
            self.mse_frame2 = np.squeeze(np.apply_over_axes(np.mean, mse_point, [2,3,4]))
            np.savetxt(mse_frame2_out, self.mse_frame2, delimiter=",")

            print 'mse_frame2'
            print type(self.mse_frame2)
            print self.mse_frame2.shape
            print 'Errors in the first clip:'
            print self.mse_frame2[0, :]

        return self.mse_frame2
    
    
model_object = assemble_pred_net()
predict_object = pred_net_predict()
mse_frame2 = predict_object.predict_and_evaluate(model_object.assemble_model())

  mode='max')
  mode='max')
  mode='max')


mse_frame2
<type 'numpy.ndarray'>
(21, 10)
Errors in the first clip:
[ 0.          0.          0.00311169  0.00279602  0.00242331  0.0022822
  0.00215916  0.00227766  0.00208603  0.00218403]


In [3]:
import numpy as np
import cv2
import sys
import imageio

#Computes the Threshold value
#Returns the time interval at which accident happens
def get_accidents(mse_frame2):
    '''
    '''
    global context
    frate = context['frate']
    frames_per_clip = context['frames_per_clip']
    skip = context['skip']
    s_orig_ms_per_frame = context['s_orig_ms_per_frame']
    
    # take first five seconds to calculate threshold
    sample = mse_frame2[:int(frate), 2:].reshape(1, -1)
    mean = np.mean(sample)
    stdev = np.std(sample) 

    accident_threshold = mean + 5 * stdev
    context['threshold'] = accident_threshold
    print 'Using threshold: {}.'.format(accident_threshold)
    
    accident_frames = np.where(mse_frame2 > accident_threshold, True, False)
    
    # for i, clip in enumerate(accident_frames):
    #     print i * 5, clip
   
    second_per_frame = 1 / frate
    accidents = []
    accident_in_last_clip = False
    for i, frame_arr in enumerate(accident_frames):
        if np.sum(frame_arr) > 2:
            if accident_in_last_clip:
                start = 0
            else:
                start = np.argmax(frame_arr)
            end = len(frame_arr) - 1
            while not frame_arr[end]:
                end -= 1
            accidents.append(((i * 10 + start) * second_per_frame,
                              (i * 10 + end) * second_per_frame))
            accident_in_last_clip = True
        else:
            accident_in_last_clip = False
    
    return accidents


accidents = get_accidents(mse_frame2)
for accident in accidents:
    print "An accident happened from {0}s to {1}s.".format(accident[0], accident[1])

# Write overlay Text on each frame 
def return_overlay(frame):
    
    img = frame
    font = cv2.FONT_HERSHEY_COMPLEX_SMALL
    cv2.putText(img,'Accident...',(0,200), font, 1,(255,255,255),2)
    return img

#read video using imageio library
reader = imageio.get_reader('test_avi/test.mp4','ffmpeg')
fps = reader.get_meta_data()['fps']

writer = imageio.get_writer('test_results_avi/Video_overlay.mp4', fps=fps)
f=1
a=0
for im in reader:
    t=f/fps
    f=f+1
    accident=accidents[a]
    if accident[0]<=t<=accident[1]:
        im=return_overlay(im)
        writer.append_data(im)
        cv2.imshow('video_overlay',im)
        if cv2.waitKey(50) & 0xFF == ord('q'):
            break
    else:
        writer.append_data(im)
        cv2.imshow('video_overlay',im)
        if cv2.waitKey(50) & 0xFF == ord('q'):
            break
    if t>accident[1] and float(a) < float(len(accidents)-1):
        a=a+1
        
cv2.destroyAllWindows()
writer.close()

Using threshold: 0.00405593650066.
An accident happened from 36.0s to 39.5s.




In [4]:
from IPython.display import HTML
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib import animation
from matplotlib import style
style.use('ggplot')


def update(i, x, y, line):
    line.set_data(x[:i], y[:i])
    line.axes.axis([0, 106, 0, 0.02])
    return line,

#Plots Threshold value and time-error graph
def plot_error(interval=500, output_video=False):
    
    global context
    threshold = context['threshold']
    s_length = context['s_length']
    
    fig = plt.figure()  # figsize=(10, 4.5), dpi=80)
    ax = fig.add_subplot(1, 1, 1)
    ax.set_title('Error Over Time (Plotted each 0.5s.)')
    ax.set_xlim(0, s_length)
    # ax.set_xlabel('Time (s)', ha='right')
    ax.set_ylabel('Error')
    
    plt.plot([0, s_length], [threshold, threshold], '--c', label='Threshold')
    plt.legend(loc='upper right')
    
    line, = ax.plot([], [])

    y = mse_frame2.reshape(1, -1)[0]
    x = np.arange(0, len(y)/2, 0.5)

    anim = animation.FuncAnimation(fig, update, len(x), fargs=[x, y, line], interval=interval, blit=False)
    if output_video:
        save_path = os.path.join('test_results_avi', 'test_error.mp4')
        anim.save(save_path)  # , writer='imagemagick')
    return anim


anim = plot_error(interval=100)

In [5]:
%matplotlib inline
HTML(anim.to_html5_video())