# Computer Vision Assignment 5 - Optical Flow
### Roll number: 20171100
This notebook contains the report for the questions of Computer Vision Assignment 5 - Optical Flow.

## Imports

These are the libraries used for the assignment:

In [None]:
import numpy as np
import os
from scipy import signal
import cv2
from matplotlib import pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure

%matplotlib inline

The function below is a helper function which is used to display an image.

In [None]:
def showImage(img1, img2, title=['Output 1', 'Output 2'], shape=(15,15), isNeg=[False, False]):
    fig = plt.figure(figsize=shape)
    plt.subplot(121)
    if not isNeg[0]:
        plt.imshow(img1, cmap='gray')
    else:
        temp_max = np.max(img1)
        plt.imshow(np.stack((np.clip(img1,0,None)/temp_max,-np.clip(img1,None,0)/temp_max,np.zeros_like(img1)), axis=-1))
    plt.axis('off')
    plt.title(title[0])
    plt.subplot(122)
    if not isNeg[1]:
        plt.imshow(img2, cmap='gray')
    else:
        temp_max = np.max(img2)
        plt.imshow(np.stack((np.clip(img2,0,None)/temp_max,-np.clip(img2,None,0)/temp_max,np.zeros_like(img2)), axis=-1))
    plt.axis('off')
    plt.title(title[1])
    plt.show()

In this assignment our goal is to apply optical flow for tracking objects in a moving scene .

The method works by finding movement of pixels in the 2 frames. Here we find the least square solution on a vector using the gradient in the x&y direaction to predicts the movement of the pixels.

The below function calculates optical flow using Lukas-Kanade Algorithm for the given pair of input images.

In [None]:
def optical_flow(img1, img2, window_size, threshold=1e-2):
    kernel_x = np.array([[-1, 1], [-1, 1]], dtype='float32')
    kernel_y = np.array([[-1, -1], [1, 1]], dtype='float32')
    kernel_t = np.array([[1, 1], [1, 1]], dtype='float32')
    kernel_tn = np.array([[-1, -1], [-1, -1]], dtype='float32')
    w = window_size//2
    img1 = img1 / 255
    img2 = img2 / 255
    fx = signal.convolve2d(img1, kernel_x, boundary='symm', mode='same')
    fy = signal.convolve2d(img1, kernel_y, boundary='symm', mode='same')
    ft = signal.convolve2d(img2, kernel_t, boundary='symm', mode='same') + signal.convolve2d(img1, kernel_tn, boundary='symm', mode='same')
    u = np.zeros_like(img1)
    v = np.zeros_like(img1)
    for i in range(img1.shape[0]-2*w):
        for j in range(img1.shape[1]-2*w):
            Ix = fx[i:i+2*w+1, j:j+2*w+1].flatten()
            Iy = fy[i:i+2*w+1, j:j+2*w+1].flatten()
            It = ft[i:i+2*w+1, j:j+2*w+1].flatten()
            b = It[:, np.newaxis]
            A = np.vstack((Ix, Iy)).T

            if np.min(abs(np.linalg.eigvals(A.T @ A))) >= threshold:
                nu = np.linalg.pinv(A) @ b
                u[i,j]=nu[0]
                v[i,j]=nu[1]
 
    return (u,v)

## Examples 

In [None]:
im1 = cv2.imread('./eval-data-gray/Basketball/frame10.png', cv2.IMREAD_GRAYSCALE)
im2 = cv2.imread('./eval-data-gray/Basketball/frame11.png', cv2.IMREAD_GRAYSCALE)
u,v = optical_flow(im1, im2, 15)
showImage(im1, im2, ['Input Image 1', 'Input Image 2'], shape=(20,20))
showImage(u, v, ['U', 'V'], shape=(20,20), isNeg=[True, True])
showImage(u**2 + v**2, np.arctan2(v,u), ['Magnitude', 'Angle'], shape=(20,20), isNeg=[False, True])

fig = Figure(figsize=(16,16))
canvas = FigureCanvas(fig)
ax = fig.gca()
ax.imshow(im1, cmap='gray')
kp = cv2.goodFeaturesToTrack(im1, 100, 0.01, 10, 3)
for arrow_ind in kp:
    x,y = arrow_ind[0]
    y = int(y)
    x = int(x)
    ax.arrow(x,y,5*u[y,x],5*v[y,x], width=2, color = (0,0,1))
ax.axis("off")
canvas.draw()
l, b, w, h = fig.bbox.bounds
arrow_image = np.frombuffer(canvas.tostring_rgb(), dtype='uint8').reshape((int(h), int(w), 3))
segmented_image = (u*u + v*v>1)

showImage(arrow_image, segmented_image, ['Arrow for keypoints', 'Segmented Image'], shape=(20,20))

In [None]:
im1 = cv2.imread('./eval-data-gray/Wooden/frame10.png', cv2.IMREAD_GRAYSCALE)
im2 = cv2.imread('./eval-data-gray/Wooden/frame11.png', cv2.IMREAD_GRAYSCALE)
u,v = optical_flow(im1, im2, 15)
showImage(im1, im2, ['Input Image 1', 'Input Image 2'], shape=(20,20))
showImage(u, v, ['U', 'V'], shape=(20,20), isNeg=[True, True])
showImage(u**2 + v**2, np.arctan2(v,u), ['Magnitude', 'Angle'], shape=(20,20), isNeg=[False, True])

fig = Figure(figsize=(16,16))
canvas = FigureCanvas(fig)
ax = fig.gca()
ax.imshow(im1, cmap='gray')
kp = cv2.goodFeaturesToTrack(im1, 100, 0.01, 10, 3)
for arrow_ind in kp:
    x,y = arrow_ind[0]
    y = int(y)
    x = int(x)
    ax.arrow(x,y,5*u[y,x],5*v[y,x], width=2, color = (0,0,1))
ax.axis("off")
canvas.draw()
l, b, w, h = fig.bbox.bounds
arrow_image = np.frombuffer(canvas.tostring_rgb(), dtype='uint8').reshape((int(h), int(w), 3))
segmented_image = (u*u + v*v>1)

showImage(arrow_image, segmented_image, ['Arrow for keypoints', 'Segmented Image'], shape=(20,20))

In [None]:
im1 = cv2.imread('./eval-data-gray/Yosemite/frame10.png', cv2.IMREAD_GRAYSCALE)
im2 = cv2.imread('./eval-data-gray/Yosemite/frame11.png', cv2.IMREAD_GRAYSCALE)
u,v = optical_flow(im1, im2, 15)
showImage(im1, im2, ['Input Image 1', 'Input Image 2'], shape=(20,20))
showImage(u, v, ['U', 'V'], shape=(20,20), isNeg=[True, True])
showImage(u**2 + v**2, np.arctan2(v,u), ['Magnitude', 'Angle'], shape=(20,20), isNeg=[False, True])

fig = Figure(figsize=(16,16))
canvas = FigureCanvas(fig)
ax = fig.gca()
ax.imshow(im1, cmap='gray')
kp = cv2.goodFeaturesToTrack(im1, 100, 0.01, 10, 3)
for arrow_ind in kp:
    x,y = arrow_ind[0]
    y = int(y)
    x = int(x)
    ax.arrow(x,y,5*u[y,x],5*v[y,x], width=2, color = (0,0,1))
ax.axis("off")
canvas.draw()
l, b, w, h = fig.bbox.bounds
arrow_image = np.frombuffer(canvas.tostring_rgb(), dtype='uint8').reshape((int(h), int(w), 3))
segmented_image = (u*u + v*v>1)

showImage(arrow_image, segmented_image, ['Arrow for keypoints', 'Segmented Image'], shape=(20,20))

In [None]:
im1 = cv2.imread('./eval-data-gray/Mequon/frame10.png', cv2.IMREAD_GRAYSCALE)
im2 = cv2.imread('./eval-data-gray/Mequon/frame11.png', cv2.IMREAD_GRAYSCALE)
u,v = optical_flow(im1, im2, 15)
showImage(im1, im2, ['Input Image 1', 'Input Image 2'], shape=(20,20))
showImage(u, v, ['U', 'V'], shape=(20,20), isNeg=[True, True])
showImage(u**2 + v**2, np.arctan2(v,u), ['Magnitude', 'Angle'], shape=(20,20), isNeg=[False, True])

fig = Figure(figsize=(16,16))
canvas = FigureCanvas(fig)
ax = fig.gca()
ax.imshow(im1, cmap='gray')
kp = cv2.goodFeaturesToTrack(im1, 100, 0.01, 10, 3)
for arrow_ind in kp:
    x,y = arrow_ind[0]
    y = int(y)
    x = int(x)
    ax.arrow(x,y,5*u[y,x],5*v[y,x], width=2, color = (0,0,1))
ax.axis("off")
canvas.draw()
l, b, w, h = fig.bbox.bounds
arrow_image = np.frombuffer(canvas.tostring_rgb(), dtype='uint8').reshape((int(h), int(w), 3))
segmented_image = (u*u + v*v>1)

showImage(arrow_image, segmented_image, ['Arrow for keypoints', 'Segmented Image'], shape=(20,20))

In [None]:
im1 = cv2.imread('./eval-data-gray/Dumptruck/frame10.png', cv2.IMREAD_GRAYSCALE)
im2 = cv2.imread('./eval-data-gray/Dumptruck/frame11.png', cv2.IMREAD_GRAYSCALE)
u,v = optical_flow(im1, im2, 15)
showImage(im1, im2, ['Input Image 1', 'Input Image 2'], shape=(20,20))
showImage(u, v, ['U', 'V'], shape=(20,20), isNeg=[True, True])
showImage(u**2 + v**2, np.arctan2(v,u), ['Magnitude', 'Angle'], shape=(20,20), isNeg=[False, True])

fig = Figure(figsize=(16,16))
canvas = FigureCanvas(fig)
ax = fig.gca()
ax.imshow(im1, cmap='gray')
kp = cv2.goodFeaturesToTrack(im1, 100, 0.01, 10, 3)
for arrow_ind in kp:
    x,y = arrow_ind[0]
    y = int(y)
    x = int(x)
    ax.arrow(x,y,5*u[y,x],5*v[y,x], width=2, color = (0,0,1))
ax.axis("off")
canvas.draw()
l, b, w, h = fig.bbox.bounds
arrow_image = np.frombuffer(canvas.tostring_rgb(), dtype='uint8').reshape((int(h), int(w), 3))
segmented_image = (u*u + v*v>1)

showImage(arrow_image, segmented_image, ['Arrow for keypoints', 'Segmented Image'], shape=(20,20))

In [None]:
im1 = cv2.imread('./input_data/taxi1.jpg', cv2.IMREAD_GRAYSCALE)
im2 = cv2.imread('./input_data/taxi2.jpg', cv2.IMREAD_GRAYSCALE)
u,v = optical_flow(im1, im2, 15)
showImage(im1, im2, ['Input Image 1', 'Input Image 2'], shape=(20,20))
showImage(u, v, ['U', 'V'], shape=(20,20), isNeg=[True, True])
showImage(u**2 + v**2, np.arctan2(v,u), ['Magnitude', 'Angle'], shape=(20,20), isNeg=[False, True])

fig = Figure(figsize=(16,16))
canvas = FigureCanvas(fig)
ax = fig.gca()
ax.imshow(im1, cmap='gray')
kp = cv2.goodFeaturesToTrack(im1, 100, 0.01, 10, 3)
for arrow_ind in kp:
    x,y = arrow_ind[0]
    y = int(y)
    x = int(x)
    ax.arrow(x,y,5*u[y,x],5*v[y,x], width=2, color = (0,0,1))
ax.axis("off")
canvas.draw()
l, b, w, h = fig.bbox.bounds
arrow_image = np.frombuffer(canvas.tostring_rgb(), dtype='uint8').reshape((int(h), int(w), 3))
segmented_image = (u*u + v*v>1)

showImage(arrow_image, segmented_image, ['Arrow for keypoints', 'Segmented Image'], shape=(20,20))

### Video Output

In [None]:
def videoToImages(vid, out_dir, temp_fps):
    out_dir = os.path.normpath(out_dir)
    
    cap = cv2.VideoCapture(vid)
    success, im = cap.read()
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = 0
    
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    old_files = [os.path.join(out_dir, f) for f in os.listdir(out_dir) if os.path.isfile(os.path.join(out_dir, f))]
    for f in old_files:
        os.remove(f)
    
    temp_cnt = 0
    while success:
        temp_cnt += 1
        if temp_cnt == temp_fps:
            temp_cnt = 0
            cv2.imwrite(os.path.join(out_dir, str(frame_count) + '.png'), im)
            frame_count += 1
        success, im = cap.read()
    return fps, frame_count

In [None]:
def processFrames(in_dir, out_dir):
    in_dir = os.path.normpath(in_dir)
    out_dir = os.path.normpath(out_dir)
    
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    old_files = [os.path.join(out_dir, f) for f in os.listdir(out_dir) if os.path.isfile(os.path.join(out_dir, f))]
    for f in old_files:
        os.remove(f)
    
    files = sorted(os.listdir(in_dir), key = lambda x : int(x.split('.')[0]))
    files = [os.path.join(in_dir, f) for f in files if os.path.isfile(os.path.join(in_dir, f))]
    for i in range(len(files)-1):
        im1 = cv2.imread(files[i], cv2.IMREAD_GRAYSCALE)
        im2 = cv2.imread(files[i+1], cv2.IMREAD_GRAYSCALE)
        u, v = optical_flow(im1, im2, 15)
        im = u**2 + v**2
        cv2.imwrite(os.path.join(out_dir, str(i) + '.png'), im)

In [None]:
def framesToVideo(inp_dir, frame_rate, out_file):
    inp_dir = os.path.normpath(inp_dir)
    
    files = sorted(os.listdir(inp_dir), key = lambda x : int(x.split('.')[0]))
    files = [os.path.join(inp_dir, f) for f in files if os.path.isfile(os.path.join(inp_dir, f))]
    
    im = cv2.imread(files[0])
    height, width, num_channels = im.shape

    fourcc = cv2.VideoWriter_fourcc(*'DIVX')
    writer = cv2.VideoWriter(out_file, fourcc, frame_rate, (width, height))

    for f in files:
        writer.write(cv2.imread(f))
    writer.release()

In [None]:
vid_path = './input_data/MOT16-09.mp4'
in_path = './temp_frames/'
out_path = './temp_frames2/'
out_file_path = './output_video.avi'
fps, numFrames = videoToImages(vid_path, in_path, 15);
print("The FPS of the video is %d and the total number of frames extracted are %d" % (fps, numFrames))
processFrames(in_path, out_path)
framesToVideo(out_path, fps//15, out_file_path);

In [None]:
vid_path = './input_data/taxi.mp4'
in_path = './temp_frames/'
out_path = './temp_frames2/'
out_file_path = './output_video.avi'
fps, numFrames = videoToImages(vid_path, in_path, 2);
print("The FPS of the video is %d and the total number of frames extracted are %d" % (fps, numFrames))
processFrames(in_path, out_path)
framesToVideo(out_path, fps//2, out_file_path);

The output is available on https://drive.google.com/drive/folders/1M8hfj52eYDxQ0VrcO0fVjnSChFTJw2-M?usp=sharing

## When camera is moving?

When the camera is moving, object tracking becomes tougher since there is lot more noise added because there is no particular isolated object which is moving, instead the entire image seens to be moving.

In [None]:
im1 = cv2.imread('./eval-data-gray/Wooden/frame10.png', cv2.IMREAD_GRAYSCALE)
im2 = cv2.imread('./eval-data-gray/Wooden/frame11.png', cv2.IMREAD_GRAYSCALE)
u,v = optical_flow(im1, im2, 15)
showImage(im1, im2, ['Input Image 1', 'Input Image 2'], shape=(20,20))
showImage(u, v, ['U', 'V'], shape=(20,20), isNeg=[True, True])
showImage(u**2 + v**2, np.arctan2(v,u), ['Magnitude', 'Angle'], shape=(20,20), isNeg=[False, True])

fig = Figure(figsize=(16,16))
canvas = FigureCanvas(fig)
ax = fig.gca()
ax.imshow(im1, cmap='gray')
kp = cv2.goodFeaturesToTrack(im1, 100, 0.01, 10, 3)
for arrow_ind in kp:
    x,y = arrow_ind[0]
    y = int(y)
    x = int(x)
    ax.arrow(x,y,5*u[y,x],5*v[y,x], width=2, color = (0,0,1))
ax.axis("off")
canvas.draw()
l, b, w, h = fig.bbox.bounds
arrow_image = np.frombuffer(canvas.tostring_rgb(), dtype='uint8').reshape((int(h), int(w), 3))
segmented_image = (u*u + v*v>1)

showImage(arrow_image, segmented_image, ['Arrow for keypoints', 'Segmented Image'], shape=(20,20))