In [1]:
# parameters to run demo, change these as needed

frate = 2.0 # choose either 2.0 or 5.0
fname_in = 'test2.avi'

# Current analysis must be done with 2.0 or 5.0 fps
assert frate in (2.0, 5.0)

In [2]:
# this reads in the avi

import os
import imageio
import sys
# you might to run this if you don't have ffmpeg.exe
# imageio.plugins.ffmpeg.download()xf

home = os.getcwd()
fname_full = os.path.join(home, fname_in)
vid = imageio.get_reader(fname_full, 'ffmpeg')
vid_info = vid.get_meta_data()
s_hz = vid_info['fps']
s_length = vid_info['duration']

min_duration = 1.0 / frate
if s_length < min_duration:
    msg = 'The length of this video ({0}s) is smaller than the minimum duration ({1}s).'\
        .format(s_length, min_duration)
    raise ValueError(msg)

In [3]:
# This takes the frames of the avi and converts them into the .hkl files
# The script is based on process_kitti

import numpy as np
from scipy.misc import imread, imresize
import hickle as hkl
import pandas as pd

desired_im_sz = (128, 160)


def process_im(im, desired_sz):
    '''
    resize and crop image
    params: im - an image represented in a 5d array
            desired_sz - a tuple of width and height
    returns: a transformed image
    '''
    target_ds = float(desired_sz[0])/im.shape[0]
    im = imresize(im, (desired_sz[0], int(np.round(target_ds * im.shape[1]))))
    d = (im.shape[1] - desired_sz[1]) / 2
    im = im[:, d:d + desired_sz[1]]
    return im

split = 'test'
frames_per_clip = 10
max_clip = 1000 # a limit to the batch file size to run on my container

# step_im
s_num_frames = int(round(s_hz * s_length))
s_orig_ms_per_frame = 1000 / s_hz
s_targ_ms_per_frame = 1000.0 / frate

# skip: step of the sampling method
skip = round(s_targ_ms_per_frame / s_orig_ms_per_frame)

s_actual_hz = s_hz / skip
s_actual_ms_per_frame = 1000 / s_actual_hz
s_total_clips = int(s_num_frames / (frames_per_clip * skip))

# parts: each part contains no more than <max_clip> number of clips
parts = int(s_total_clips / max_clip) + 1
step_im = max_clip * frames_per_clip
suffix = '_P' + str(parts) + '_'

# num_im: total number of images sampled
num_im = s_total_clips * frames_per_clip 

# initialize the test data
X = np.zeros((num_im,) + desired_im_sz + (3,), np.uint8)
# source in each frame makes sure each clip has frames in the same video
source_list = [fname_in] * num_im

# sample the images from vid
ct = 0
for i, im_out in enumerate(vid):
    if i % skip == 0:
        im = vid.get_data(i)
        X[ct] = process_im(im, desired_im_sz)
        ct = ct + 1
        if ct == num_im:
            break

# store the images and sources into repective hickle file
if not (os.path.isdir(home + "/test_avi")):
    os.mkdir(home + '/test_avi')
else:
    allfiles =os.listdir(home + '/test_avi')
    for temp in allfiles:
        os.remove(home + '/test_avi' + '/' + temp)
    os.rmdir(home + '/test_avi')
    os.mkdir(home + '/test_avi')
    
for part in range(1,parts+1):
    xbeg = int((part - 1) * step_im)
    if (part == parts):
        xend = num_im    
    else:
        xend = int(part * step_im) 
    print 'Test data has {0} part(s), starts at image {1} and ends at image {2}.'.format(part,xbeg,xend)
    hkl.dump(X[xbeg:xend], os.path.join(home, 'test_avi', 'X_' + split + suffix+ str(part)+'.hkl'))
    hkl.dump(source_list[xbeg:xend], os.path.join(home, 'test_avi', 'sources_' + split + suffix + str(part)+ '.hkl'))

Test data has 1 part(s), starts at image 0 and ends at image 240.


In [5]:
'''
Evaluate trained PredNet on test video.
Calculates mean-squared error
'''

from six.moves import cPickle
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from keras import backend as K
from keras.models import Model, model_from_json
from keras.layers import Input, Dense, Flatten

PREDNET_PATH = '../models/prednet'
sys.path.append(PREDNET_PATH)

from prednet import PredNet
from data_utils import SequenceGenerator

DATA_DIR = './test_avi/'
WEIGHTS_DIR = os.path.join(PREDNET_PATH, 'model_data')
RESULTS_DIR = './test_results_avi/'

# these are set in the cell above, uncomment this and this cell should
# work for avi's less than 1000 frames (1000 clips at 10 frames/clip)
# This would be 500 sec at 2Hz or 200 sec at 5Hz 
# parts = 1
# suffix = '_P1_'

batch_size = 10
nt = 10  # number of frames per clip
# prednet used KITTI dataset that has 10 frames per clip, which is changable

fpath_weights = os.path.join(WEIGHTS_DIR, 'prednet_kitti_weights.hdf5')
fpath_json = os.path.join(WEIGHTS_DIR, 'prednet_kitti_model.json')

# Load trained model
with open(fpath_json, 'r') as fj:
    json_string = fj.read()
trained_model = model_from_json(json_string, custom_objects = {'PredNet': PredNet})
trained_model.load_weights(fpath_weights)

# get configs from the trained model
layer_config = trained_model.layers[1].get_config()
layer_config['output_mode'] = 'prediction'
dim_ordering = layer_config['dim_ordering']

# assemble test model (to output predictions) 
'''what does this means? only using one layer'''
test_prednet = PredNet(weights=trained_model.layers[1].get_weights(), **layer_config)
input_shape = list(trained_model.layers[0].batch_input_shape[1:])
input_shape[0] = nt
inputs = Input(shape=tuple(input_shape))
predictions = test_prednet(inputs)
test_model = Model(input=inputs, output=predictions)

if not os.path.exists(RESULTS_DIR): 
    os.mkdir(RESULTS_DIR)
    
f = open(RESULTS_DIR + 'prediction_scores.txt', 'w')

for part in range(1, parts+1):

    curr_test = 'X_test' + suffix + str(part) + '.hkl'
    curr_sources = 'sources_test' + suffix + str(part) + '.hkl'
    test_file = os.path.join(DATA_DIR, curr_test)
    test_sources = os.path.join(DATA_DIR, curr_sources)

    # generate inputs from the test hickle file
    test_generator = SequenceGenerator(test_file, test_sources, nt,\
                                       sequence_start_mode='unique', dim_ordering=dim_ordering)
    
    # final X_test.shape = (26, 10, 128, 160, 3)
    X_test = test_generator.create_all()
    X_hat = test_model.predict(X_test, batch_size)
    if dim_ordering == 'th':
        X_test = np.transpose(X_test, (0, 1, 3, 4, 2))
        X_hat = np.transpose(X_hat, (0, 1, 3, 4, 2))

    curr_mse_frame2 = 'mse_frame2' + suffix + str(part) + '.csv'
    mse_frame2_out = os.path.join(RESULTS_DIR, curr_mse_frame2)
    
    # [2, 3, 4] is the x, y, grb
    mse_point = (X_test[:, 1:] - X_hat[:, 1:]) ** 2
    mse_frame2 = np.squeeze(np.apply_over_axes(np.mean, mse_point, [2,3,4]))
    np.savetxt(mse_frame2_out, mse_frame2, delimiter=",")

    print 'mse_frame2'
    print type(mse_frame2)
    print mse_frame2.shape
    
    # Compare overall MSE's write results to prediction_scores.txt
    mse_model = np.mean( (X_test[:, 1:] - X_hat[:, 1:])**2 )  # look at all timesteps except the first
    mse_prev = np.mean( (X_test[:, :-1] - X_test[:, 1:])**2 )
    mse_last = np.mean( (X_test[:, -1] - X_hat[:, -1])**2 )  # look only the last frame
    f.write("part number: %d\n" % part)
    f.write("Model MSE: %f\n" % mse_model)
    f.write("Last Frame MSE: %f" % mse_last)
    f.write("Previous Frame MSE: %f" % mse_prev)
    
f.close()


Using Theano backend.


mse_frame2
<type 'numpy.ndarray'>
(24, 9)


In [6]:
# see https://github.com/startupml/video/blob/master/results/README.md for rationale for these thresholds
if frate == 2.0:
    threshold = 0.00777565  # mse is away from 3 stdev
else:
    threshold = 0.004869763 
    
# overall mean of mse per clip, since mse of 2nd and 3rd frames are always higher 
mse_clip = pd.Series(np.squeeze(np.apply_over_axes(np.mean, mse_frame2, [1])))

clip_size = mse_clip.shape[0]
t_start = pd.Series(np.zeros(clip_size))
t_end = pd.Series(np.zeros(clip_size))
above_threshold = pd.Series(np.zeros(clip_size))

for i in range(0, clip_size):
    if mse_clip[i] >= threshold:
        above_threshold[i] = 1
    t_start[i] = i * (frames_per_clip * skip * s_orig_ms_per_frame)
    t_end[i] = (i + 1) * (frames_per_clip * skip * s_orig_ms_per_frame) - (skip * s_orig_ms_per_frame)

mse_clip_out = pd.concat([mse_clip,t_start,t_end,above_threshold],axis=1)
mse_clip_out.columns = ['MSE_ave_over_clip','time_start','time_end','above_threshold']    
mse_clip_out.to_csv(os.path.join(RESULTS_DIR, 'test_avi_mse_clip.csv'))  
# the test video is 29.97HZ, making the duration of each clip not exactly 5s

# if label the images by frame, should label before the mse arise

print 'Assessing possible accidents'
if (np.sum(mse_clip_out.above_threshold) == 0):
    print "No possible accidents found"
else:
    for i in range(0,clip_size):
        if (mse_clip_out.above_threshold.iloc[i] == 1):
            print 'Possible accident in clip ',i, 'starting time = ', mse_clip_out.time_start.iloc[i], \
            'ending time = ',mse_clip_out.time_end.iloc[i]


Assessing possible accidents
Possible accident in clip  1 starting time =  5005.00500501 ending time =  9509.50950951
Possible accident in clip  3 starting time =  15015.015015 ending time =  19519.5195195
Possible accident in clip  7 starting time =  35035.035035 ending time =  39539.5395395
Possible accident in clip  8 starting time =  40040.04004 ending time =  44544.5445445
Possible accident in clip  11 starting time =  55055.0550551 ending time =  59559.5595596
Possible accident in clip  12 starting time =  60060.0600601 ending time =  64564.5645646
Possible accident in clip  13 starting time =  65065.0650651 ending time =  69569.5695696
Possible accident in clip  14 starting time =  70070.0700701 ending time =  74574.5745746
Possible accident in clip  15 starting time =  75075.0750751 ending time =  79579.5795796
Possible accident in clip  16 starting time =  80080.0800801 ending time =  84584.5845846
Possible accident in clip  17 starting time =  85085.0850851 ending time =  895