# Assignment 3

Github repo for assignment: https://github.com/brentonjackson/csc-4980/tree/master/Assignment3

I'll be using Python for the assignments in this class, as opposed to Matlab.

**Credit goes to Amani Hunter for assets and code**


## Part I:



1. Capture 10s handheld video footage and pan from left to right or right to left.


2. Pick any image frame from the 10 sec video footage.


3. Pick a region of interest corresponding to an object in the image.


4. Crop this region from the image.


5. Then use this cropped region to compare with randomly picked 10 images in the dataset of 10 sec video frames, to see if there is a match for the object in the scenes from the 10 images. For comparison use sum of squared differences (SSD) or normalized correlation.

Below is the code for determining if there is a match for the object in the images and the output of SSD and correlation functions:

**VideoRecorder.py** records the video footage and allows the user to choose specific frames by pressing SPACE bar

```python
import cv2
import numpy as np
import numba as nb
import depthai as dai

streams = []
streams.append('isp')

@nb.njit(nb.uint16[::1] (nb.uint8[::1], nb.uint16[::1], nb.boolean), parallel=True, cache=True)
def unpack_raw10(input, out, expand16bit):
    lShift = 6 if expand16bit else 0

    for i in np.arange(input.size // 5):
        b4 = input[i * 5 + 4]
        out[i * 4]     = ((input[i * 5]     << 2) | ( b4       & 0x3)) << lShift
        out[i * 4 + 1] = ((input[i * 5 + 1] << 2) | ((b4 >> 2) & 0x3)) << lShift
        out[i * 4 + 2] = ((input[i * 5 + 2] << 2) | ((b4 >> 4) & 0x3)) << lShift
        out[i * 4 + 3] = ((input[i * 5 + 3] << 2) |  (b4 >> 6)       ) << lShift

    return out

print("depthai version:", dai.__version__)
pipeline = dai.Pipeline()

cam = pipeline.createColorCamera()
cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)

if 'isp' in streams:
    xout_isp = pipeline.createXLinkOut()
    xout_isp.setStreamName('isp')
    cam.isp.link(xout_isp.input)

device = dai.Device(pipeline)
device.startPipeline()

q_list = []
for s in streams:
    q = device.getOutputQueue(name=s, maxSize=3, blocking=True)
    q_list.append(q)
    cv2.namedWindow(s, cv2.WINDOW_NORMAL)
    cv2.resizeWindow(s, (960, 540))

capture_flag = False
image_counter = 0
while True:
    for q in q_list:
        name = q.getName()
        data = q.get()
        width, height = data.getWidth(), data.getHeight()
        payload = data.getData()
        capture_file_info_name = f"frame{image_counter}"
        if name == 'isp':
            shape = (height * 3 // 2, width)
            yuv420p = payload.reshape(shape).astype(np.uint8)
            bgr = cv2.cvtColor(yuv420p, cv2.COLOR_YUV2BGR_IYUV)
            grayscale_img = cv2.cvtColor(bgr,cv2.COLOR_BGR2GRAY)
        if capture_flag:
            filename = capture_file_info_name + '.png'
            print(filename)
            grayscale_img = np.ascontiguousarray(grayscale_img)
            cv2.imwrite(filename, grayscale_img)
        bgr = np.ascontiguousarray(bgr)
        cv2.imshow(name, grayscale_img)
    capture_flag = False
    key = cv2.waitKey(5)
    if key == ord('q'):
        break
    elif key%256 == 32:
        capture_flag = True
        image_counter += 1
  
```

**Question1.py**:

```python
import cv2
import numpy as np

frame0 = cv2.imread('frame10.png')
frame0 = cv2.cvtColor(frame0, cv2.COLOR_BGR2GRAY)
frame1 = cv2.imread('frame1.png')
frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
frame2 = cv2.imread('frame2.png')
frame2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
frame3 = cv2.imread('frame3.png')
frame3 = cv2.cvtColor(frame3, cv2.COLOR_BGR2GRAY)
frame4 = cv2.imread('frame4.png')
frame4 = cv2.cvtColor(frame4, cv2.COLOR_BGR2GRAY)
frame5 = cv2.imread('frame5.png')
frame5 = cv2.cvtColor(frame5, cv2.COLOR_BGR2GRAY)
frame6 = cv2.imread('frame6.png')
frame6 = cv2.cvtColor(frame6, cv2.COLOR_BGR2GRAY)
frame7 = cv2.imread('frame7.png')
frame7 = cv2.cvtColor(frame7, cv2.COLOR_BGR2GRAY)
frame8 = cv2.imread('frame8.png')
frame8 = cv2.cvtColor(frame8, cv2.COLOR_BGR2GRAY)
frame9 = cv2.imread('frame9.png')
frame9 = cv2.cvtColor(frame9, cv2.COLOR_BGR2GRAY)

frame0 = cv2.resize(frame0, (87, 87))

frame1 = cv2.resize(frame1, (87, 87))

frame2 = cv2.resize(frame2, (87, 87))

frame3 = cv2.resize(frame3, (87, 87))

frame4 = cv2.resize(frame4, (87, 87))

frame5 = cv2.resize(frame5, (87, 87))

frame6 = cv2.resize(frame6, (87, 87))

frame7 = cv2.resize(frame7, (87, 87))

frame8 = cv2.resize(frame8, (87, 87))

frame9 = cv2.resize(frame9, (87, 87))

ssd0 = []

ssd01 = np.sum((np.square(frame0 - frame1)))

ssd0.append(ssd01)

ssd02 = np.sum((np.square(frame0 - frame2)))

ssd0.append(ssd02)

ssd03 = np.sum((np.square(frame0 - frame3)))

ssd0.append(ssd03)

ssd04 = np.sum((np.square(frame0 - frame4)))

ssd0.append(ssd04)

ssd05 = np.sum((np.square(frame0 - frame5)))

ssd0.append(ssd05)

ssd06 = np.sum((np.square(frame0 - frame6)))

ssd0.append(ssd06)

ssd07 = np.sum((np.square(frame0 - frame7)))

ssd0.append(ssd07)

ssd08 = np.sum((np.square(frame0 - frame8)))

ssd0.append(ssd08)

ssd09 = np.sum((np.square(frame0 - frame9)))

ssd0.append(ssd09)

print('SSD: ',ssd0)
corr0 = []

def correlation_coefficient(patch1, patch2):
    product = np.mean((patch1 - patch1.mean()) * (patch2 - patch2.mean()))
    stds = patch1.std() * patch2.std()
    if stds == 0:
        return 0
    else:
        product /= stds
        return product

corr01 = correlation_coefficient(frame0, frame1)

corr0.append(corr01)

corr02 = correlation_coefficient(frame0, frame2)

corr0.append(corr02)

corr03 = correlation_coefficient(frame0, frame3)

corr0.append(corr03)

corr04 = correlation_coefficient(frame0, frame4)

corr0.append(corr04)

corr05 = correlation_coefficient(frame0, frame5)

corr0.append(corr05)

corr06 = correlation_coefficient(frame0, frame6)

corr0.append(corr06)

corr07 = correlation_coefficient(frame0, frame7)

corr0.append(corr07)

corr08 = correlation_coefficient(frame0, frame8)

corr0.append(corr08)

corr09 = correlation_coefficient(frame0, frame9)

corr0.append(corr09)

print('Correlation: ',corr0)
```

**Output result:**

SSD:  [790104, 796043, 795369, 813209, 819670, 810515, 804582, 806397, 809074]


Correlation:  [-0.09805182753864389, -0.043593190599718695, -0.03368530968969049, -0.01546628121144833, 0.16660657838017948, 0.1741383019089876, 0.16875910526978377, 0.20127983457759524, 0.3952648668796892]

## Part II:

Implement the motion tracking equation from fundamental principles.

Select any 2 consecutive frames from the set from problem 1 and compute the motion function estimates.

Conduct image registration to realign the frames.

Repeat test for all consecutive pairs of frames in the video.

**Question2.py:**
```python
import numpy as np
from scipy import signal
import cv2
import matplotlib.pyplot as plt

def motion_constraints(Iref, Inext):
    kernel_x = np.array([[-1., 1.], [-1., 1.]])*.25
    kernel_y = np.array([[-1., -1.], [1., 1.]])*.25
    kernel_t = np.array([[1., 1.], [1., 1.]])*.25
    Iref = Iref / 255.
    Inext = Inext / 255.
    mode = 'same'

    Ix = signal.convolve2d(Iref, kernel_x, boundary='symm', mode=mode)
    Iy = signal.convolve2d(Iref, kernel_y, boundary='symm', mode=mode)
    It = signal.convolve2d(Inext, kernel_t, boundary='symm', mode=mode) + signal.convolve2d(Iref, -kernel_t, boundary='symm', mode=mode)
    return Ix, Iy, It
frameA = "../Part-1/frame1.png"
frameB = "../Part-1/frame2.png"
frame0 = cv2.imread(frameA)
frame0 = cv2.cvtColor(frame0, cv2.COLOR_BGR2GRAY)
frame0 = cv2.resize(frame0, (87,87))
frame1 = cv2.imread(frameB)
frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
frame1 = cv2.resize(frame1, (87,87))

Ix, Iy, It = motion_constraints(frame0, frame1)
print(Ix, Iy, It)
```

## Part III:

For the video (problem 1) you have taken, plot the optical flow vectors on each frame.

   - (i) treating every
     previous frame as a reference frame
   - (ii) treating every 11th frame as a reference frame
   - (iii) treating every 31st frame as a reference frame


**OpticalFlow.py** plots the optical flow vectors on each frame
```python
import cv2 as cv
import numpy as np


feature_params = dict(maxCorners = 300, qualityLevel = 0.2, minDistance = 2, blockSize = 7)

lk_params = dict(winSize = (15,15), maxLevel = 2, criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
cap = cv.VideoCapture("shibuya.mp4")
# cap = cv.VideoCapture("output.mp4")

color = (0, 255, 0)
red = (255,0,0)

ret, first_frame = cap.read()
# Converts frame to grayscale because we only need the luminance channel for detecting edges - less computationally expensive
prev_gray = cv.cvtColor(first_frame, cv.COLOR_BGR2GRAY)

prev = cv.goodFeaturesToTrack(prev_gray, mask = None, **feature_params)

mask = np.zeros_like(first_frame)

while(cap.isOpened()):

    ret, frame = cap.read()

    gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)

    prev = cv.goodFeaturesToTrack(prev_gray, mask = None, **feature_params)
    next, status, error = cv.calcOpticalFlowPyrLK(prev_gray, gray, prev, None, **lk_params)

    good_old = prev[status == 1].astype(int)

    good_new = next[status == 1].astype(int)

    for i, (new, old) in enumerate(zip(good_new, good_old)):

        a, b = new.ravel()

        c, d = old.ravel()

        mask = cv.line(mask, (a, b), (c, d), color, 2)

        frame = cv.circle(frame, (a, b), 3, red, -1)

    output = cv.add(frame, mask)

    prev_gray = gray.copy()

    prev = good_new.reshape(-1, 1, 2)

    cv.imshow("sparse optical flow", output)

    if cv.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()
```

## Part IV:

Implement a feature based object detection application (from scratch) for detecting an object of
   your choice.

   - Test it for at least 2 differently looking objects.

   - Validate your results by testing against
     built-in object detection functions/code in MATLAB/OpenCV.


**FeatureDetector.py** detects a user's hand

```python
import cv2
import depthai as dai
import numpy as np
import time

def nothing(x):
    pass


# Function to find angle between two vectors
def Angle(v1, v2):
    dot = np.dot(v1, v2)
    x_modulus = np.sqrt((v1 * v1).sum())
    y_modulus = np.sqrt((v2 * v2).sum())
    cos_angle = dot / x_modulus / y_modulus
    angle = np.degrees(np.arccos(cos_angle))
    return angle


# Function to find distance between two points in a list of lists
def FindDistance(A, B):
    return np.sqrt(np.power((A[0][0] - B[0][0]), 2) + np.power((A[0][1] - B[0][1]), 2))

# Creating a window for HSV track bars
cv2.namedWindow('HSV_TrackBar')

# Starting with 100's to prevent error while masking
h, s, v = 100, 100, 100

# Creating track bar
cv2.createTrackbar('h', 'HSV_TrackBar', 0, 179, nothing)
cv2.createTrackbar('s', 'HSV_TrackBar', 0, 255, nothing)
cv2.createTrackbar('v', 'HSV_TrackBar', 0, 255, nothing)

pipeline = dai.Pipeline()

camRgb = pipeline.create(dai.node.ColorCamera)
xoutVideo = pipeline.create(dai.node.XLinkOut)

xoutVideo.setStreamName("feature detector")

camRgb.setBoardSocket(dai.CameraBoardSocket.RGB)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
camRgb.setVideoSize(1000, 1000)

xoutVideo.input.setBlocking(False)
xoutVideo.input.setQueueSize(1)

camRgb.video.link(xoutVideo.input)

with dai.Device(pipeline) as device:
    video = device.getOutputQueue(name="feature detector", maxSize=1, blocking=False)
    while True:
        videoIn = video.get()
        frame = videoIn.getCvFrame()
        start_time = time.time()
        blur = cv2.blur(frame, (3, 3))

        # Convert to HSV color space
        hsv = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV)

        # Create a binary image with where white will be skin colors and rest is black
        mask2 = cv2.inRange(hsv, np.array([2, 50, 50]), np.array([15, 255, 255]))

        # Kernel matrices for morphological transformation
        kernel_square = np.ones((11, 11), np.uint8)
        kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))

        # Perform morphological transformations to filter out the background noise
        # Dilation increase skin color area
        # Erosion increase skin color area
        dilation = cv2.dilate(mask2, kernel_ellipse, iterations=1)
        erosion = cv2.erode(dilation, kernel_square, iterations=1)
        dilation2 = cv2.dilate(erosion, kernel_ellipse, iterations=1)
        filtered = cv2.medianBlur(dilation2, 5)
        kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (8, 8))
        dilation2 = cv2.dilate(filtered, kernel_ellipse, iterations=1)
        kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
        dilation3 = cv2.dilate(filtered, kernel_ellipse, iterations=1)
        median = cv2.medianBlur(dilation2, 5)
        ret, thresh = cv2.threshold(median, 127, 255, 0)

        # Find contours of the filtered frame
        contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

        # Draw Contours
        # cv2.drawContours(frame, cnt, -1, (122,122,0), 3)
        # cv2.imshow('Dilation',median)

        # Find Max contour area (Assume that hand is in the frame)
        max_area = 100
        ci = 0
        for i in range(len(contours)):
            cnt = contours[i]
            area = cv2.contourArea(cnt)
            if (area > max_area):
                max_area = area
                ci = i

            # Largest area contour
        if len(contours) > 0:
            cnts = contours[ci]

        # Find convex hull
            hull = cv2.convexHull(cnts)

        # Find convex defects
            hull2 = cv2.convexHull(cnts, returnPoints=False)
            defects = cv2.convexityDefects(cnts, hull2)

        # Get defect points and draw them in the original image
            FarDefect = []
            for i in range(defects.shape[0]):
                s, e, f, d = defects[i, 0]
                start = tuple(cnts[s][0])
                end = tuple(cnts[e][0])
                far = tuple(cnts[f][0])
                FarDefect.append(far)
                cv2.line(frame, start, end, [0, 255, 0], 1)
                cv2.circle(frame, far, 10, [100, 255, 255], 3)

        # Find moments of the largest contour
            moments = cv2.moments(cnts)

        # Central mass of first order moments
            if moments['m00'] != 0:
                cx = int(moments['m10'] / moments['m00'])  # cx = M10/M00
                cy = int(moments['m01'] / moments['m00'])  # cy = M01/M00
            centerMass = (cx, cy)

        # Draw center mass
            cv2.circle(frame, centerMass, 7, [100, 0, 255], 2)
            font = cv2.FONT_HERSHEY_SIMPLEX
            cv2.putText(frame, 'Center of Hand', tuple(centerMass), font, 2, (255, 255, 255), 2)

        # Distance from each finger defect(finger webbing) to the center mass
            distanceBetweenDefectsToCenter = []
            for i in range(0, len(FarDefect)):
                x = np.array(FarDefect[i])
                centerMass = np.array(centerMass)
                distance = np.sqrt(np.power(x[0] - centerMass[0], 2) + np.power(x[1] - centerMass[1], 2))
                distanceBetweenDefectsToCenter.append(distance)

        # Get an average of three shortest distances from finger webbing to center mass
            sortedDefectsDistances = sorted(distanceBetweenDefectsToCenter)
            AverageDefectDistance = np.mean(sortedDefectsDistances[0:2])

        # Get fingertip points from contour hull
        # If points are in proximity of 80 pixels, consider as a single point in the group
            finger = []
            for i in range(0, len(hull) - 1):
                if (np.absolute(hull[i][0][0] - hull[i + 1][0][0]) > 80) or (
                        np.absolute(hull[i][0][1] - hull[i + 1][0][1]) > 80):
                    if hull[i][0][1] < 500:
                        finger.append(hull[i][0])

        # The fingertip points are 5 hull points with largest y coordinates
            finger = sorted(finger, key=lambda x: x[1])
            fingers = finger[0:5]
            print('Fingers: ', fingers)

        # Calculate distance of each finger tip to the center mass
            fingerDistance = []
            for i in range(0, len(fingers)):
                distance = np.sqrt(np.power(fingers[i][0] - centerMass[0], 2) + np.power(fingers[i][1] - centerMass[0], 2))
                fingerDistance.append(distance)

        # Finger is pointed/raised if the distance of between fingertip to the center mass is larger
        # than the distance of average finger webbing to center mass by 130 pixels
            result = 0
            for i in range(0, len(fingers)):
                if fingerDistance[i] > AverageDefectDistance + 130:
                    result = result + 1

        # Print number of pointed fingers
            cv2.putText(frame, str(result), (100, 100), font, 2, (255, 255, 255), 2)

        # show height raised fingers
            #cv2.putText(frame,'finger1',tuple(finger[0]),font,2,(255,255,255),2)
            #cv2.putText(frame,'finger2',tuple(finger[1]),font,2,(255,255,255),2)
            #cv2.putText(frame,'finger3',tuple(finger[2]),font,2,(255,255,255),2)
            #cv2.putText(frame,'finger4',tuple(finger[3]),font,2,(255,255,255),2)
            #cv2.putText(frame,'finger5',tuple(finger[4]),font,2,(255,255,255),2)
            #cv2.putText(frame,'finger6',tuple(finger[5]),font,2,(255,255,255),2)
            #cv2.putText(frame,'finger7',tuple(finger[6]),font,2,(255,255,255),2)
            #cv2.putText(frame,'finger8',tuple(finger[7]),font,2,(255,255,255),2)

        # Print bounding rectangle
            x, y, w, h = cv2.boundingRect(cnts)
            img = cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

            cv2.drawContours(frame, [hull], -1, (255, 255, 255), 2)

        ##### Show final image ########
            cv2.imshow('Dilation', frame)
        ###############################

        # Print execution time
        # print time.time()-start_time

        # close the output video by pressing 'ESC'
            k = cv2.waitKey(5) & 0xFF
            if cv2.waitKey(1) == ord('q'):
                break

        #cv2.imshow('Face Detector', frame)
      #  if cv2.waitKey(1) == ord('q'):
           # break
cv2.destroyAllWindows()
```

## Part V:

Implement a real-time face tracking application that will detect as many faces there are with a
   scene, and identify the person’s facial region (draw a bounding box) whose is sought for by the user
   (you must ask for a person in your application and it should show a bounding box over the person
   of interest).

   - Validate at least 20 times and present the recognition performance metrics (accuracy,
     precision, recall and Intersection over Union (IoU)).

**FaceTracking.py** detects faces from the known faces given to it in real-time.
```python
import cv2
import depthai as dai
import numpy as np
import face_recognition

pipeline = dai.Pipeline()

camRgb = pipeline.create(dai.node.ColorCamera)
xoutVideo = pipeline.create(dai.node.XLinkOut)

xoutVideo.setStreamName("face detector")

camRgb.setBoardSocket(dai.CameraBoardSocket.RGB)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
camRgb.setVideoSize(860, 720)

xoutVideo.input.setBlocking(False)
xoutVideo.input.setQueueSize(1)

camRgb.video.link(xoutVideo.input)

amani_image = face_recognition.load_image_file("Amani_pic.jpeg")
amani_face_encoding = face_recognition.face_encodings(amani_image)[0]
brent_image = face_recognition.load_image_file("BJackson.jpg")
brent_face_encoding = face_recognition.face_encodings(brent_image)[0]

known_face_encodings = [
    amani_face_encoding,
    brent_face_encoding
]
known_face_names = [
    "Amani Hunter",
    "Brent"
]
val = input("Enter name of person to detect: ")
print(val)

face_locations = []
face_encodings = []
face_names = []
top_lip = []
bottom_lip = []
center_points = []
process_this_frame = True

with dai.Device(pipeline) as device:
    video = device.getOutputQueue(name="face detector", maxSize=1, blocking=False)

    while True:
        videoIn = video.get()
        frame = videoIn.getCvFrame()
        small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
        rgb_small_frame = small_frame[:, :, ::-1]
        if process_this_frame:
            face_locations = face_recognition.face_locations(rgb_small_frame)
            face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
            face_landmarks_list = face_recognition.face_landmarks(rgb_small_frame)
            face_names = []
            for index, face_encoding in enumerate(face_encodings):
                matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
                name = "Unknown Individual"

                face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
                best_match_index = np.argmin(face_distances)
                if matches[best_match_index]:
                    name = known_face_names[best_match_index]
                if name == 'Amani Hunter' or name == 'Brent' and val == name:
                    keys = list(face_landmarks_list[index].keys())
                    top_lip = face_landmarks_list[index][keys[-2]]
                    bottom_lip = face_landmarks_list[index][keys[-1]]
                    top_lip = np.array(top_lip, dtype=np.int32)
                    bottom_lip = np.array(bottom_lip, dtype=np.int32)
                    top_lip = top_lip * 4
                    bottom_lip = bottom_lip * 4
                    center_top_lip = np.mean(top_lip, axis=0)
                    center_top_lip = center_top_lip.astype('int')
                    center_points.append(center_top_lip)
                face_names.append(name)
        process_this_frame = not process_this_frame

       # cv2.polylines(frame, np.array([top_lip]), 1, (255, 255, 255))
        #cv2.polylines(frame, np.array([bottom_lip]), 1, (255, 255, 255))
        for i in range(1, len(center_points)):
            if center_points[i - 1] is None or center_points[i] is None:
                continue
           # cv2.line(frame, tuple(center_points[i - 1]), tuple(center_points[i]), (0, 0, 255), 2)
        for (top, right, bottom, left), name in zip(face_locations, face_names):
            top *= 4
            right *= 4
            bottom *= 4
            left *= 4

            cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)

            cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
            font = cv2.FONT_HERSHEY_DUPLEX
            cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)

        cv2.imshow('Face Detector', frame)
        if cv2.waitKey(1) == ord('q'):
            break
cv2.destroyAllWindows()

```

## Part VI:

Fix a marker on a wall or a flat vertical surface.

   - From a distance D, keeping the camera stationed static
     (not handheld and mounted on a tripod or placed on a flat surface), capture an image such that the marker
     is registered.

   - Then translate the camera by T units along the axis parallel to the ground (horizontal) and
     then capture another image, with the marker being registered.

   - Compute D using disparity based depth
     estimation in stereo-vision theory.

   (Note: you can pick any value for D and T. Keep in mind that T cannot
   be large as the marker may get out of view. Of course this depends on D)

Images captured:

![Input image](Part-6/frame1.png)
![Input image](Part-6/frame2.png)

**Question6.py** computes the disparity between these two images:
```python
import cv2

aruco = cv2.aruco

def computeDisparity(img, markerSize =4, totalMarkers=50, draw=True):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    key = getattr(aruco, f'DICT_{markerSize}X{markerSize}_{totalMarkers}')
    arucoDict = aruco.Dictionary_get(key)
    arucoParam = aruco.DetectorParameters_create()
    bboxs, ids, rejected = aruco.detectMarkers(gray, arucoDict, parameters = arucoParam)
    return (bboxs)

img0 = cv2.imread('frame1.png')
img1 = cv2.imread('frame2.png')

baseline = 11.5
focal_length = 1.636331765375964e+03
bbox1= computeDisparity(img0)
bbox2 = computeDisparity(img1)

d = (baseline * focal_length)/(bbox1[0][0][3][0]-bbox2[0][0][3][0])
print('Disparity: ', d)

```

**Output:**

Disparity:  19.561138567384184