# Proj 2 Augmented Reality

In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
def get_points(im, num_pts=None):
    print('Please select points on image.')
    plt.imshow(im)
    pts = np.array(plt.ginput(num_pts if num_pts else 100, timeout=0, mouse_pop=None, mouse_stop=3))
    plt.close()
    pts = np.roll(pts, shift=1, axis=1)
    return pts

def draw(img, imgpts):
#     imgpts = np.int32(imgpts).reshape(-1,2)

    # draw ground floor in green
    img = cv2.drawContours(img, [imgpts[:4]],-1,(0,255,0),-3)
    # draw pillars in blue color
    for i,j in zip(range(4),range(4,8)):
        img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]),(255),3)
    # draw top layer in red color
    img = cv2.drawContours(img, [imgpts[4:]],-1,(0,0,255),3)
    return img

## read fist frame of the video and choose Region of Interest

In [None]:
# read fist frame of the video and choose Region of Interest
%matplotlib qt
video = cv2.VideoCapture("cube2.mov")
# the channel order is in BGR
ok, init_frame = video.read()
pts = get_points(init_frame, num_pts=25)

In [None]:
# get tracked ROI in every frame 
%matplotlib inline
num_ROI = pts.shape[0]
video = cv2.VideoCapture("cube2.mov")
# the channel order is in BGR
ok, frame = video.read()
height, width, channels = frame.shape

bboxes = [(pt[1]-9, pt[0]-9, 17, 17) for pt in pts]

# trackers = [cv2.TrackerGOTURN_create() for i in range(num_ROI)]
trackers = [cv2.TrackerMedianFlow_create() for i in range(num_ROI)]
oks = [tracker.init(frame, bbox) for tracker, bbox in zip(trackers, bboxes)]

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
videowriter = cv2.VideoWriter('test.mp4',fourcc, 60, (width,height))

im_coords_all = []
idxes_all = []
while ok:
    im_coords = []
    idxes = []
    cnt = 0
    for ok, bbox in zip(oks, bboxes):
        if ok:
            p1 = (int(bbox[0]), int(bbox[1]))
            p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
            cv2.rectangle(frame, p1, p2, (0,255,0), 2, 1)
            center = (np.array(p1) + np.array(p2)) // 2
            im_coords.append(center)
            idxes.append(cnt)
        cnt += 1
    assert len(idxes) == len(im_coords)
    im_coords_all.append(im_coords)
    idxes_all.append(idxes)
#     cv2.imshow("Tracking", frame)
#     videowriter.write(frame)
    # Exit if ESC pressed
#     k = cv2.waitKey(1) & 0xff
#     if k == 27 : break
    ok, frame = video.read()
    oks_bboxes = [tracker.update(frame) for tracker in trackers]
    oks = [ok_bbox[0] for ok_bbox in oks_bboxes]
    bboxes = [ok_bbox[1] for ok_bbox in oks_bboxes]
videowriter.release()
video.release()
cv2.destroyAllWindows() 

In [None]:
# corresponding 3-D coordinates
world_coords = np.array([[0, 0, 4], [0, 1.5, 4], [0, 3, 4], [0, 4.5, 4], [0, 6, 4],
                        [1.5, 6, 4], [1.5, 4.5, 4], [1.5, 3, 4], [1.5, 1.5, 4], [1.5, 0, 4],
                        [3.5, 0, 4], [3.5, 1.5, 4], [3.5, 3, 4], [3.5, 4.5, 4], [3.5, 6, 4],
                        [5, 6, 4], [5, 4.5, 4], [5, 3, 4], [5, 1.5, 4], [5, 0, 4],
                        [5, 0, 2], [5, 1.5, 2], [5, 3, 2], [5, 4.5, 2], [5, 6, 2]])

In [None]:
# get projection matrix M for every frame
m_list = []
for idxes, im_coords in tqdm(zip(idxes_all, im_coords_all)):
    # do least square for every frame
    b = np.array([[im_coord[0], im_coord[1], 1] for im_coord in im_coords]).flatten()
    A_list = []
    for idx in idxes:
        A_temp = np.concatenate([world_coords[idx], [1], np.zeros(8)])
        A_temp = np.vstack([A_temp, np.roll(A_temp, 4), np.roll(A_temp, 8)])
        A_list.append(A_temp)
#     print(len(A_list))
    A = np.vstack(A_list)
#     print(A.shape, b.shape)
#     break
    m = np.linalg.lstsq(A, b)[0].reshape(3, 4)
    m_list.append(m)

In [None]:
# project 3-D coordinates to each frame using projection matrix M 
# and see if they deviate from tracked points too much
video = cv2.VideoCapture("cube2.mov")
# the channel order is in BGR
ok, frame = video.read()
m0 = m_list[0]
for m, idxes in zip(m_list, idxes_all):
    for idx in idxes:
        coord = world_coords[idx]
        im_xy = m @ np.concatenate([coord, [1]]) 
        cv2.circle(frame, (int(im_xy[0]),int(im_xy[1])), radius=5, color=(0,255,0), thickness=-1)
    cv2.imshow("recovered", frame)
    k = cv2.waitKey(1) & 0xff
    if k == 27 : break
    ok, frame = video.read()
    
cv2.destroyAllWindows() 

In [None]:
# project a cube into each frame of the original video
axis = np.float32([[1.5, 1.5, 4], [1.5, 4.5 ,4], [3.5, 4.5 ,4], [3.5, 1.5, 4],
                   [1.5, 1.5, 6], [1.5, 4.5, 6], [3.5, 4.5, 6], [3.5, 1.5, 6]])
video = cv2.VideoCapture("cube2.mov")
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
videowriter = cv2.VideoWriter('result.mp4',fourcc, 60, (width,height))
# the channel order is in BGR
ok, frame = video.read()
m0 = m_list[0]
for m, idxes in zip(m_list, idxes_all):
    axis_xy = []
    for pt_xyz in axis:
        pt_xy = m @ np.concatenate([pt_xyz, [1]])
        axis_xy.append(pt_xy[0:2])
    axis_xy = np.asarray(axis_xy)
    frame = draw(frame, axis_xy)
    cv2.imshow("recovered", frame)
    videowriter.write(frame)
    k = cv2.waitKey(1) & 0xff
    if k == 27 : break
    ok, frame = video.read()
video.release()
videowriter.release()
cv2.destroyAllWindows() 