Use James' code for calculating optical flow from Baseline_Optical_Flow_3D_Descriptor.ipynb

In [2]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
from scipy import sqrt, pi, arctan2, cos, sin
from scipy.ndimage import uniform_filter

# Gets the optical flow [<dx,dy>] from two frames
def getOpticalFlow(imPrev, imNew):
    flow = cv2.calcOpticalFlowFarneback(imPrev, imNew, flow=None, pyr_scale=.5, levels=3, winsize=9, iterations=1, poly_n=3, poly_sigma=1.1, flags=cv2.OPTFLOW_FARNEBACK_GAUSSIAN)
    return flow

# Compute the Histogram of Optical Flow (HoF) from the given optical flow
def hof(flow, orientations=9, pixels_per_cell=(10, 10),
        cells_per_block=(4, 3), normalise=False, motion_threshold=1.):
    flow = np.atleast_2d(flow)

    if flow.ndim < 3:
        raise ValueError("Requires dense flow in both directions")

    if normalise:
        flow = sqrt(flow)

    if flow.dtype.kind == 'u':
        flow = flow.astype('float')

    gx = np.zeros(flow.shape[:2])
    gy = np.zeros(flow.shape[:2])

    gx = flow[:,:,1]
    gy = flow[:,:,0]

    magnitude = sqrt(gx**2 + gy**2)
    orientation = arctan2(gy, gx) * (180 / pi) % 180

    sy, sx = flow.shape[:2]
    cx, cy = pixels_per_cell
    bx, by = cells_per_block

    n_cellsx = int(np.floor(sx // cx))
    n_cellsy = int(np.floor(sy // cy))

    orientation_histogram = np.zeros((n_cellsy, n_cellsx, orientations))
    subsample = np.index_exp[cy / 2:cy * n_cellsy:cy, cx / 2:cx * n_cellsx:cx]
    for i in range(orientations-1):
        temp_ori = np.where(orientation < 180 / orientations * (i + 1),
                            orientation, -1)
        temp_ori = np.where(orientation >= 180 / orientations * i,
                            temp_ori, -1)

        cond2 = (temp_ori > -1) * (magnitude > motion_threshold)
        temp_mag = np.where(cond2, magnitude, 0)

        temp_filt = uniform_filter(temp_mag, size=(cy, cx))
        orientation_histogram[:, :, i] = temp_filt[subsample]

    temp_mag = np.where(magnitude <= motion_threshold, magnitude, 0)

    temp_filt = uniform_filter(temp_mag, size=(cy, cx))
    orientation_histogram[:, :, -1] = temp_filt[subsample]

    n_blocksx = (n_cellsx - bx) + 1
    n_blocksy = (n_cellsy - by) + 1
    normalised_blocks = np.zeros((n_blocksy, n_blocksx,
                                  by, bx, orientations))

    for x in range(n_blocksx):
        for y in range(n_blocksy):
            block = orientation_histogram[y:y+by, x:x+bx, :]
            eps = 1e-5
            normalised_blocks[y, x, :] = block / sqrt(block.sum()**2 + eps)

    return normalised_blocks.ravel()

FIXED_WIDTH = 160
FIXED_HEIGHT = 120
def normalizeFrame(frame_original):
    frame_gray = cv2.cvtColor(frame_original,cv2.COLOR_BGR2GRAY)
    frame_gray_resized = cv2.resize(frame_gray, (FIXED_WIDTH, FIXED_HEIGHT))
    return frame_gray_resized

# get the Histogram of Optical Flow from two images
def getHoF(frame1, frame2):
    flow = getOpticalFlow(frame1, frame2)
    return hof(flow, pixels_per_cell=(20,20), cells_per_block=(5,5))

# get the Histogram of Optical Flows of a video grouped sequentially in a 1D array
def getSequentialHoF(video_path):
    hofs = []
    cap = cv2.VideoCapture(video_path)
    ret1, frame1 = cap.read()
    frame1 = normalizeFrame(frame1)
    while(cap.isOpened()):
        ret2, frame2 = cap.read()
        if ret2 == True:
            frame2 = normalizeFrame(frame2)
            hof_array = getHoF(frame1, frame2)
            hofs = np.concatenate((hofs, hof_array),axis=0)
            frame1 = frame2
        else:
            break
    return hofs

This descriptor returns features of different length, depending on the length of the input videos. Padding the HoF feature vector of each video to the length of the largest video does not give great accuracy: another solution to this problem would be to trim all videos to the size of the smallest video in the set

In [3]:
import os
import sys

# Determine the length in frames of the shortest video in the provided dataset
def shortest(data_dir):
    # get list of files in the directory. directory should be flat with only video files in it
    files = os.listdir(data_dir)
    
    # Find the length of the shortest video (in frames)
    shortestLen = sys.maxint
    for i in range(len(files)):
        cap = cv2.VideoCapture(data_dir+'/'+files[i])
        
        # This line tries to get the length of the video from the header.
        # NOTE: how to access the property I use here varies from system to system,
        #  so you may have to play around with it to get it to work on different machines
        length = int(cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT))
        if length < shortestLen:
            #if length < shortestLen:
            shortestLen = length
            # If it didn't work, we need to count the frames
        #else:
         #   length = 0
          #  # Using grab here as an optimistic estimate (assuming here all grabbed frames can be decoded)
           # while (cap.grab()): length += 1
            #if length < shortestLen: shortestLen = length
        
        #print(length)
        cap.release()
        
    return shortestLen

Try running this on the walking video dataset from http://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/

In [4]:
print (len(os.listdir('./hof/walk')))
print (shortest('./hof/walk'))

548
38


Now, use this amount of frames from the middle of each video as a representative sample of that video to calculate HoF feature vector

In [134]:
# get the Histogram of Optical Flows of a video grouped sequentially in a 1D array
# use only the specified amount of frames, from the middle of the video
def getSequentialHoFMiddle(video_path, label, frames):
#     print "Processing", video_path
    seg_hofs = []
    hofs = []
    cap = cv2.VideoCapture(video_path)
    length = int(cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)) # I'm going to assume this works once stuff is fixed
#     startIdx = ((length - 1) - frames)/2
#     if startIdx < 1:
#         startIdx = 0
#     # skip through beginning unneeded frames
#     frameNum = 0
#     while (frameNum < startIdx):
#         cap.grab()
#         frameNum += 1
#     frameNum = 0
    
    # Calculate HoF from necessary frames
    ret1, frame1 = cap.read()
    frame1 = normalizeFrame(frame1)
    while(cap.isOpened()):
        hofs = []
        for i in range(frames-1):
            ret2, frame2 = cap.read()
            if ret2 == True:
                frame2 = normalizeFrame(frame2)
                hof_array = getHoF(frame1, frame2)
                hofs = np.concatenate((hofs, hof_array),axis=0)
                frame1 = frame2
            else:
                cap.release()
                break
        seg_hofs.append(hofs)
    
    print frames, length, len(seg_hofs), [len(x) for x in seg_hofs]
    seg_labels = np.full(len(seg_hofs), label)
    return seg_hofs, seg_labels

Now, create features from trimmed videos

In [135]:
from sklearn import svm
import os

# Collect the file path of all the running and walking videos,
# we will only be using these 2 classes
RUN_DIR = "./hof/run/"
RUN_FILES = os.listdir(RUN_DIR)
RUN_FILES = [RUN_DIR + f for f in RUN_FILES]
WALK_DIR = "./hof/walk/"
WALK_FILES = os.listdir(WALK_DIR)
WALK_FILES = [WALK_DIR + f for f in WALK_FILES]

# Use equal number of data from each class
nc = min(len(RUN_FILES), len(WALK_FILES))
print "nc:", nc
RUN_FILES = RUN_FILES[0:nc]
WALK_FILES = WALK_FILES[0:nc]

RATIO = 0.9
offset = int(np.floor(nc*RATIO))
print "offset:", offset

# Split test and training at a ratio of 1:9
train_files = RUN_FILES[0:offset] + WALK_FILES[0:offset]
test_files = RUN_FILES[offset:nc] + WALK_FILES[offset:nc]

# Put the labels in vectors
train_labels = np.zeros(offset*2, int)
train_labels[0:offset] = 1 #RUN=1
train_labels[offset:offset*2] = 2 #WALK=2

test_len = nc-offset
test_labels = np.zeros(test_len*2, int)
test_labels[0:test_len] = 1 #RUN=1
test_labels[test_len:test_len*2] = 2 #WALK=2

print "train files:", len(train_files)
print "train labels:", len(train_labels)
print "test files:", len(test_files)
print "test labels:", len(test_labels)

numFrames = min(shortest(RUN_DIR), shortest(WALK_DIR))
print "numFrames: ", numFrames

nc: 232
offset: 208
train files: 416
train labels: 416
test files: 48
test labels: 48
numFrames:  22


In [235]:
train_result = [a for t,l in zip(train_files, train_labels) for a in getSequentialHoFMiddle(t, l, numFrames)]
test_result = [a for t,l in zip(test_files, test_labels) for a in getSequentialHoFMiddle(t, l, numFrames)]

22 23 2 [37800, 0]
22 34 2 [37800, 19800]
22 48 3 [37800, 37800, 7200]
22 38 2 [37800, 27000]
22 69 4 [37800, 37800, 37800, 7200]
22 63 3 [37800, 37800, 34200]
22 49 3 [37800, 37800, 9000]
22 42 2 [37800, 34200]
22 34 2 [37800, 19800]
22 59 3 [37800, 37800, 27000]
22 50 3 [37800, 37800, 10800]
22 51 3 [37800, 37800, 12600]
22 80 4 [37800, 37800, 37800, 27000]
22 48 3 [37800, 37800, 7200]
22 105 5 [37800, 37800, 37800, 37800, 34200]
22 88 5 [37800, 37800, 37800, 37800, 3600]
22 65 4 [37800, 37800, 37800, 0]
22 78 4 [37800, 37800, 37800, 23400]
22 79 4 [37800, 37800, 37800, 25200]
22 134 7 [37800, 37800, 37800, 37800, 37800, 37800, 10800]
22 113 6 [37800, 37800, 37800, 37800, 37800, 10800]
22 106 5 [37800, 37800, 37800, 37800, 36000]
22 79 4 [37800, 37800, 37800, 25200]
22 158 8 [37800, 37800, 37800, 37800, 37800, 37800, 37800, 16200]
22 43 2 [37800, 36000]
22 117 6 [37800, 37800, 37800, 37800, 37800, 18000]
22 89 5 [37800, 37800, 37800, 37800, 5400]
22 50 3 [37800, 37800, 10800]
22 46 3

In [283]:
train = [y for x in train_result[::2] for y in x]
new_train_labels = np.concatenate(train_result[1::2])
test = [y for x in test_result[::2] for y in x]
new_test_labels = np.concatenate(test_result[1::2])

In [285]:
threshold = 0.5
print max([len(x) for x in train]), max([len(x) for x in test])
train_threshold = int(max([len(x) for x in train]) * threshold)
test_threshold = int(max([len(x) for x in test]) * threshold)
print train_threshold, test_threshold
train_trimmed = [x for x,y in zip(train,new_train_labels) if len(x) > train_threshold]
train_labels_trimmed = [y for x,y in zip(train,new_train_labels) if len(x) > train_threshold]
test_trimmed = [x for x,y in zip(test,new_test_labels) if len(x) > test_threshold]
test_labels_trimmed = [y for x,y in zip(test,new_test_labels) if len(x) > test_threshold]

37800 37800
18900 18900


In [286]:
max_width = max(np.array([len(i) for i in test_trimmed]).max(),np.array([len(i) for i in test_trimmed]).max())
def numpy_fillna(data, width):
    # Get lengths of each row of data
    lens = np.array([len(i) for i in data])

    # Mask of valid places in each row
    mask = np.arange(width) < lens[:,None]

    # Setup output array and put elements from data into masked positions
    out = np.zeros(mask.shape, dtype=data.dtype)
    out[mask] = np.concatenate(data)
    return out
train_pad = numpy_fillna(np.array(train_trimmed), max_width)
test_pad = numpy_fillna(np.array(test_trimmed), max_width)

In [288]:
from sklearn import svm

clf = svm.SVC()
clf.fit(train_pad, train_labels_trimmed)
predict = clf.predict(test_pad)

from sklearn import metrics
print(metrics.accuracy_score(predict, test_labels_trimmed))

0.525510204082


Create and test decision tree model

In [290]:
from sklearn import tree
tree_clf = tree.DecisionTreeClassifier()
tree_clf.fit(train_pad, train_labels_trimmed)
predict = tree_clf.predict(test_pad)
print(metrics.accuracy_score(predict, test_labels_trimmed))

0.612244897959


Create and test random forest model

In [292]:
from sklearn import ensemble
tree_clf = ensemble.RandomForestClassifier()
tree_clf.fit(train_pad, train_labels_trimmed)
predict = tree_clf.predict(test_pad)
print(metrics.accuracy_score(predict, test_labels_trimmed))

0.642857142857


Create and test logistic regression

In [293]:
from sklearn import linear_model
tree_clf = linear_model.LogisticRegression()
tree_clf.fit(train_pad, train_labels_trimmed)
predict = tree_clf.predict(test_pad)
print(metrics.accuracy_score(predict, test_labels_trimmed))

0.663265306122
