#### Before you run the notebook, make sure the homework files provided are in the same directory.
```
150180705/  
    hw4.ipynb  
    biped_1.avi
    biped_2.avi
    biped_3.avi
```

## PART 1

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import moviepy.editor as mpy
from scipy.signal import convolve2d
import cv2

import os

In [2]:
def get_video_frames(fname):
    video = mpy.VideoFileClip(fname)
    n_frames = video.reader.nframes
    fps = video.fps
    
    frames = []
    for i in range(n_frames):
        frame = video.get_frame(i*1.0/fps)
        frames.append(frame)
        
    return frames, n_frames, fps

In [3]:
def lucas_kanade(image1, image2, points, window_size=5, blur_window_size=None, tau=1e-2, grayscale=False):
    assert type(points) == list, "Param 'points' must be type list"
    assert type(grayscale) == bool, "Param 'grayscale' must be type bool"
    
    if grayscale:
        gray_im1 = image1
        gray_im2 = image2
    else:
        gray_im1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
        gray_im2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
    
    if blur_window_size:
        assert type(blur_window_size) == int, "Param 'blur_window_size' must be type int"
        gray_im1 = cv2.GaussianBlur(gray_im1, (blur_window_size, blur_window_size), 0)
        gray_im2 = cv2.GaussianBlur(gray_im2, (blur_window_size, blur_window_size), 0)

    of_directions = []
    points_next = []
    for point in points:
        x_coord, y_coord = point
        w = window_size // 2

        x_filter = np.array([[-1, 1], [-1, 1]], np.float64) * 1/4
        y_filter = np.array([[-1, -1], [1, 1]], np.float64) * 1/4
        t_filter = np.array([[1, 1], [1, 1]], np.float64) * 1/4

        Ix = convolve2d(gray_im1, x_filter, "same") + convolve2d(gray_im2, x_filter, "same")    # Derivative in X direction
        Iy = convolve2d(gray_im1, y_filter, "same") + convolve2d(gray_im2, y_filter, "same")    # Derivative in Y direction
        It = convolve2d(gray_im1, t_filter, "same") + convolve2d(gray_im2, -t_filter, "same")    # Derivative in XY direction

        # Method applied below is explained here in details
        # http://www.inf.fu-berlin.de/inst/ag-ki/rojas_home/documents/tutorials/Lucas-Kanade2.pdf
        Sx = Ix[y_coord-w:y_coord+w+1, x_coord-w:x_coord+w+1].flatten()
        Sy = Iy[y_coord-w:y_coord+w+1, x_coord-w:x_coord+w+1].flatten()
        St = It[y_coord-w:y_coord+w+1, x_coord-w:x_coord+w+1].flatten()

        S = np.array([[np.sum(np.power(Sx, 2)), np.sum(np.multiply(Sx, Sy))],
                      [np.sum(np.multiply(Sx, Sy)), np.sum(np.power(Sy, 2))]])
        U = np.array([[np.sum(np.multiply(Sx, St))],
                      [np.sum(np.multiply(Sy, St))]])

        # Instead of calculating the eigenvalues matrix by hand, I used pinv for a similar result
        # pinv handles the ill condition for us <3
        of_direction = np.dot(np.linalg.pinv(S), -U)
        
        of_directions.append(np.array([of_direction[0], of_direction[1]]))
        x_next_coord = int(x_coord + np.round(of_direction[0]))
        y_next_coord = int(y_coord + np.round(of_direction[1]))
        points_next.append((x_next_coord, y_next_coord))
    
    of_directions = np.array(of_directions)
    return points_next, of_directions

In [4]:
def draw_arrow(frame, points, of_directions, scale_factor=5, color=(255, 190, 25), thickness=1, tip_length=0.3):
    of_directions = of_directions.reshape((-1, 2))
    points_next = np.round(np.add(points, np.multiply(of_directions, scale_factor)))
    
    start_pos = tuple(np.mean(points, axis=0, dtype=np.int32))
    end_pos = tuple(np.mean(points_next, axis=0, dtype=np.int32))

    # Default value for color is yellow (RGB)
    frame = cv2.arrowedLine(frame, start_pos, end_pos, color, thickness, tipLength=tip_length)
    return frame

In [5]:
def render_video(frames, fname, fps=30):
    clip = mpy.ImageSequenceClip(frames, fps=fps)
    clip.write_videofile(fname, codec='libx264')

Driver code below

In [6]:
video_fname = "biped_1.avi"
frames, n_frames, fps = get_video_frames(video_fname)

points = [(396, 334), (405, 335)]    # Left bot and right bot corners of hand

i = 0
while i < n_frames - 1:
    f = frames[i]
    f_next = frames[i+1]
    points_next, of_directions = lucas_kanade(f, f_next, points, window_size=3, blur_window_size=3)
    
    frames[i] = draw_arrow(f, points, of_directions, scale_factor=10, thickness=2, tip_length=0.2)
    points = points_next
    i += 1
    
render_video(frames, "biped_1_tracked.avi", fps)

t:  31%|███       | 48/154 [00:00<00:00, 406.67it/s, now=None]

Moviepy - Building video biped_1_tracked.avi.
Moviepy - Writing video biped_1_tracked.avi



                                                               

Moviepy - Done !
Moviepy - video ready biped_1_tracked.avi




## PART 2

In [7]:
def background_subtraction(frame, sub_corners, upper_threshold, lower_threshold, grayscale=False):
    assert type(grayscale) == bool, "Param 'grayscale' must be type bool"
    
    if grayscale:
        gray_frame = frame
    else:
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    x1, y1 = sub_corners[0]
    x2, y2 = sub_corners[1]
    
    # Turn whites into blacks
    full_mask = np.zeros(gray_frame.shape, dtype=np.bool)
    full_mask[y1:y2, x1:x2] = np.logical_or(gray_frame[y1:y2, x1:x2] > upper_threshold, gray_frame[y1:y2, x1:x2] < lower_threshold)
    gray_frame[full_mask] = 0
    return gray_frame

In [8]:
video_fname = "biped_2.avi"
frames, n_frames, fps = get_video_frames(video_fname)

sub_corners = [(360, 280), (450, 380)]    # Top left, bottom right corners of the subtractable region

sub_frames = [background_subtraction(frame, sub_corners, upper_threshold=235, lower_threshold=20) for frame in frames]

corner_p1 = [(212, 185)]    # Top left
corner_p2 = [(305, 184)]    # Top right
corner_p3 = [(211, 314)]    # Bottom left
corner_p4 = [(305, 314)]    # Bottom right

corner_p1_ofs = []
corner_p2_ofs = []
corner_p3_ofs = []
corner_p4_ofs = []

hand_p = [(396, 334), (405, 335)]    # Left bot and right bot corners of hand
hand_p_ofs = []

i = 0
while i < n_frames - 2:
    f = sub_frames[i]
    f_next = sub_frames[i+1]
    f_next2 = sub_frames[i+2]
    
    # Use two frames ahead in time
    corner_p1_next, cp1_of = lucas_kanade(f, f_next2, corner_p1, window_size=25, blur_window_size=3, grayscale=True)
    frames[i] = draw_arrow(frames[i], corner_p1, cp1_of, scale_factor=10, thickness=2, tip_length=0.2)
    corner_p1_ofs.append(cp1_of)
    corner_p2_next, cp2_of = lucas_kanade(f, f_next2, corner_p2, window_size=25, blur_window_size=3, grayscale=True)
    frames[i] = draw_arrow(frames[i], corner_p2, cp2_of, scale_factor=10, thickness=2, tip_length=0.2)
    corner_p2_ofs.append(cp2_of)
    corner_p3_next, cp3_of = lucas_kanade(f, f_next2, corner_p3, window_size=25, blur_window_size=3, grayscale=True)
    frames[i] = draw_arrow(frames[i], corner_p3, cp3_of, scale_factor=10, thickness=2, tip_length=0.2)
    corner_p3_ofs.append(cp3_of)
    corner_p4_next, cp4_of = lucas_kanade(f, f_next2, corner_p4, window_size=25, blur_window_size=3, grayscale=True)
    frames[i] = draw_arrow(frames[i], corner_p4, cp4_of, scale_factor=10, thickness=2, tip_length=0.2)
    corner_p4_ofs.append(cp4_of)
    # Use one frame ahead in time
    hand_p_next, hand_of = lucas_kanade(f, f_next, hand_p, window_size=7, blur_window_size=3, grayscale=True)
    frames[i] = draw_arrow(frames[i], hand_p, hand_of, scale_factor=10, thickness=2, tip_length=0.2)
    hand_p_ofs.append(hand_of)
    hand_p = hand_p_next
    i += 1
    
render_video(frames, "biped_2_tracked.avi", fps)

t:  31%|███       | 48/154 [00:00<00:00, 352.13it/s, now=None]

Moviepy - Building video biped_2_tracked.avi.
Moviepy - Writing video biped_2_tracked.avi



                                                               

Moviepy - Done !
Moviepy - video ready biped_2_tracked.avi


## PART 3

In [9]:
def calculate_mse(of1, of2, verbose=True):
    assert type(of1) == np.ndarray, "Param 'of1' must be of type numpy array"
    assert type(of2) == np.ndarray, "Param 'of2' must be of type numpy array"
    mse = np.mean(np.square(of1 - of2), axis=1).reshape((-1, 2))    # MSE per frame
    mse = np.mean(mse, axis=0)    # Mean of MSEs along x and y axes
    if verbose:
        print("~o~ Mean Squared Error ~o~\nAlong Horizontal Axis: {:.4f}\nAlong Vertical Axis: {:.4f}\nTotal Error: {:.4f}".format(mse[0], mse[1], np.sum(mse)))
    return np.sum(mse)

In [10]:
video_fname = "biped_3.avi"
frames, n_frames, fps = get_video_frames(video_fname)

sub_corners = [(360, 240), (450, 420)]    # Top left, bottom right corners of the subtractable region
sub_frames = [background_subtraction(frame, sub_corners, upper_threshold=220, lower_threshold=130) for frame in frames]

corner_p1 = [(212, 185)]    # Top left
corner_p2 = [(305, 184)]    # Top right
corner_p3 = [(211, 314)]    # Bottom left
corner_p4 = [(305, 314)]    # Bottom right

corner_p1_ofs2 = []
corner_p2_ofs2 = []
corner_p3_ofs2 = []
corner_p4_ofs2 = []

hand_p = [(396, 334), (405, 335)]    # Left bot and right bot corners of hand
hand_p_ofs2 = []

i = 0
while i < n_frames - 2:
    f = sub_frames[i]
    f_next = sub_frames[i+1]
    f_next2 = sub_frames[i+2]
    
    # Use two frames ahead in time
    corner_p1_next, cp1_of = lucas_kanade(f, f_next2, corner_p1, window_size=25, blur_window_size=3, grayscale=True)
#     frames[i] = draw_arrow(frames[i], corner_p1, cp1_of, scale_factor=10, thickness=2, tip_length=0.2)
    corner_p1_ofs2.append(cp1_of)
    corner_p2_next, cp2_of = lucas_kanade(f, f_next2, corner_p2, window_size=25, blur_window_size=3, grayscale=True)
#     frames[i] = draw_arrow(frames[i], corner_p2, cp2_of, scale_factor=10, thickness=2, tip_length=0.2)
    corner_p2_ofs2.append(cp2_of)
    corner_p3_next, cp3_of = lucas_kanade(f, f_next2, corner_p3, window_size=25, blur_window_size=3, grayscale=True)
#     frames[i] = draw_arrow(frames[i], corner_p3, cp3_of, scale_factor=10, thickness=2, tip_length=0.2)
    corner_p3_ofs2.append(cp3_of)
    corner_p4_next, cp4_of = lucas_kanade(f, f_next2, corner_p4, window_size=25, blur_window_size=3, grayscale=True)
#     frames[i] = draw_arrow(frames[i], corner_p4, cp4_of, scale_factor=10, thickness=2, tip_length=0.2)
    corner_p4_ofs2.append(cp4_of)
    # Use one frame ahead in time
    hand_p_next, hand_of = lucas_kanade(f, f_next, hand_p, window_size=13, blur_window_size=3, grayscale=True)
    frames[i] = draw_arrow(frames[i], hand_p, hand_of, scale_factor=10, thickness=2, tip_length=0.2)
    hand_p_ofs2.append(hand_of)
    hand_p = hand_p_next
    i += 1

render_video(frames, "biped_3_tracked.avi", fps)

t:  31%|███       | 48/154 [00:00<00:00, 352.90it/s, now=None]

Moviepy - Building video biped_3_tracked.avi.
Moviepy - Writing video biped_3_tracked.avi



                                                               

Moviepy - Done !
Moviepy - video ready biped_3_tracked.avi


#### Error Before Correction

In [11]:
hand_gt = np.mean(hand_p_ofs, axis=1)
hand_shaky = np.mean(hand_p_ofs2, axis=1)
total_error = calculate_mse(hand_gt, hand_shaky, verbose=True)

~o~ Mean Squared Error ~o~
Along Horizontal Axis: 1.2143
Along Vertical Axis: 1.3136
Total Error: 2.5279


#### Correction

In [12]:
corners_gt = np.concatenate((corner_p1_ofs, corner_p2_ofs, corner_p3_ofs, corner_p4_ofs), axis=1)
corners_shaky = np.concatenate((corner_p1_ofs2, corner_p2_ofs2, corner_p3_ofs2, corner_p4_ofs2), axis=1)
corners_gt_mean = np.mean(corners_gt, axis=1)
corners_shaky_mean = np.mean(corners_shaky, axis=1)

norm_term = -np.power(np.mean(corners_gt_mean) + np.mean(corners_shaky_mean), 3)
correction = np.subtract(corners_shaky_mean, corners_gt_mean)/norm_term
hand_corrected = np.add(hand_shaky, correction)

#### Error After Correction

In [13]:
total_corrected_error = calculate_mse(hand_gt, hand_corrected, verbose=True)

~o~ Mean Squared Error ~o~
Along Horizontal Axis: 1.1903
Along Vertical Axis: 1.2914
Total Error: 2.4816


In [14]:
print("Error Reduction Percentage: {:.4f}%".format(100 - total_corrected_error*100/total_error))

Error Reduction Percentage: 1.8312%


In [15]:
video_fname = "biped_3.avi"
frames, n_frames, fps = get_video_frames(video_fname)

sub_corners = [(360, 240), (450, 420)]    # Top left, bottom right corners of the subtractable region
sub_frames = [background_subtraction(frame, sub_corners, upper_threshold=220, lower_threshold=130) for frame in frames]
hand_p = [(396, 334), (405, 335)]    # Left bot and right bot corners of hand

i = 0
while i < n_frames - 2:
    f = sub_frames[i]
    f_next = sub_frames[i+1]
    hand_p_next, hand_of = lucas_kanade(f, f_next, hand_p, window_size=13, blur_window_size=3, grayscale=True)
    # Draw the tracked arrows in blue
    frames[i] = draw_arrow(frames[i], hand_p, hand_of, scale_factor=10, color=(0, 0, 255), thickness=3, tip_length=0.3)
    # Overlay with corrected arrows in yellow
    frames[i] = draw_arrow(frames[i], hand_p, hand_corrected[i], scale_factor=10, thickness=2, tip_length=0.2)
    hand_p = hand_p_next
    i += 1
    
render_video(frames, "biped_3_corrected.avi", fps)

t:  30%|██▉       | 46/154 [00:00<00:00, 458.75it/s, now=None]

Moviepy - Building video biped_3_corrected.avi.
Moviepy - Writing video biped_3_corrected.avi



                                                               

Moviepy - Done !
Moviepy - video ready biped_3_corrected.avi
