QUESTION 1 TRACKING

In [None]:
import cv2
import numpy as np
import time
import math
import depthai as dai


DIM = (720, 480)

# Closer-in minimum depth, disparity range is doubled (from 95 to 190):
extended_disparity = False
# Better accuracy for longer distance, fractional disparity 32-levels:
subpixel = False
# Better handling for occlusions:
lr_check = True

# Create pipeline
pipeline = dai.Pipeline()

camRgb = pipeline.create(dai.node.ColorCamera)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
xoutRgb = pipeline.createXLinkOut()
xoutRgb.setStreamName("rgb")
camRgb.video.link(xoutRgb.input)

# Initialize the parameters
confThreshold = 0.5  # Confidence threshold
nmsThreshold = 0.4  # Non-maximum suppression threshold
inpWidth = 256  # Width of network's input image
inpHeight = 256  # Height of network's input image
start = time.time()

# Load names of classes
classesFile =r"E:\GSU\CV\Assignment4\coco.names"
classes = None
with open(classesFile, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

# modelConfiguration = "tiny-yolov2-trial13.cfg"
# modelWeights = "tiny-yolov2-trial13.weights"

modelConfiguration = r"E:\GSU\CV\Assignment4\tiny.cfg"
modelWeights = r"E:\GSU\CV\Assignment4\yolov3-tiny.weights"

# modelConfiguration = "yolov3.cfg"
# modelWeights = "yolov3.weights"

net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)


def region_of_interest(img, vertices):
    mask = np.zeros_like(img)
    match_mask_color = 255
    cv2.fillPoly(mask, vertices, match_mask_color)
    masked_image = cv2.bitwise_and(img, mask)
    return masked_image


# Get the names of the output layers
def getOutputsNames(net):
    # Get the names of all the layers in the network
    layersNames = net.getLayerNames()
    # Get the names of the output layers, i.e. the layers with unconnected outputs
    outputlayers = [layersNames[i - 1] for i in net.getUnconnectedOutLayers()]
    return outputlayers


# Remove the bounding boxes with low confidence using non-maxima suppression
def postprocess(frame, outs):
    frameHeight = frame.shape[0]
    frameWidth = frame.shape[1]

    # Scan through all the bounding boxes output from the network and keep only the
    # ones with high confidence scores. Assign the box's class label as the class with the highest score.
    classIds = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > confThreshold:
                center_x = int(detection[0] * frameWidth)
                center_y = int(detection[1] * frameHeight)
                width = int(detection[2] * frameWidth)
                height = int(detection[3] * frameHeight)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                classIds.append(classId)
                confidences.append(float(confidence))
                boxes.append([left, top, width, height])

    # Perform non maximum suppression to eliminate redundant overlapping boxes with
    # lower confidences.
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
    # print(indices)
    left, top, width, height = 0, 0 , 0, 0
    if len(indices) > 0:
        for i in indices:
            box = boxes[i]
            left = box[0]
            top = box[1]
            width = box[2]
            height = box[3]
            cv2.putText(frame, classes[classIds[i]], (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
            cv2.putText(frame, str(round(confidences[i] * 100, 2)) + "%", (left, top + height + 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
            cv2.rectangle(frame, (left, top), (left + width, top + height), (0, 255, 0), 3)



# Process inputs

# cap = cv2.VideoCapture(0)
counter = 0
time_elasped = 0

with dai.Device(pipeline) as device:
    # Output queue will be used to get the disparity frames from the outputs defined above
    qRgb = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
    while cv2.waitKey(1) < 0:

        # get frame from the video
        # hasFrame, frame = cap.read()
        inRgb = qRgb.get()
        frame = inRgb.getCvFrame()
        counter += 1

        visualize = frame.copy()

        # Create a 4D blob from a frame.
        blob = cv2.dnn.blobFromImage(frame, 1 / 255, (inpWidth, inpHeight), [0, 0, 0], 1, crop=False)

        # Sets the input to the network
        net.setInput(blob)

        # Runs the forward pass to get output of the output layers
        outs = net.forward(getOutputsNames(net))

        # Remove the bounding boxes with low confidence
        postprocess(frame, outs)
        cv2.putText(frame, "Q to Exit", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 255, 255), 4)
        time_elasped = int(time.time() - start)
        if time_elasped > 1:
            cv2.putText(frame, "FPS: " + str(counter // time_elasped), (50, 100), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (255, 255, 255), 3)
            print("FPS: ", counter // time_elasped)

        frame = cv2.resize(frame, (720, 480))

        cv2.imshow("Object Detection YOLO", frame)

        # Stop the program if reached end of video
        if cv2.waitKey(1) == ord('q'):
            print("Done processing !!!")
            # cap.release()
            end = time.time()
            print("Time Elasped: ", int(end - start))
            print("FPS: ", counter // (end - start))
            break

QUESTION 2 BuSINESS CARD

This idea is implemented using 
Image Detection - Detect an image
Feature Matching - Match the Features using
using the Augmented Reality displaying the image on the detected image

In [1]:
import cv2
import numpy as np


MIN_MATCHES = 5
detector = cv2.ORB_create(nfeatures=5000)
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=100)
flann = cv2.FlannBasedMatcher(index_params,search_params)


def load_input(inp):

	input_image = cv2.imread(r'E:\GSU\CV\Assignment4\Input\img{0}.jpg'.format(inp))
	augment_image = cv2.imread(r'E:\GSU\CV\Assignment4\mask\img{0}.jpg'.format(inp))

	input_image = cv2.resize(input_image, (300,400),interpolation=cv2.INTER_AREA)
	augment_image = cv2.resize(augment_image, (300,400))
	gray_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY)
	# find the keypoints with ORB
	keypoints, descriptors = detector.detectAndCompute(gray_image, None)

	return gray_image,augment_image,keypoints, descriptors


def compute_matches(descriptors_input, descriptors_output):
	# Match descriptors
	if(len(descriptors_output)!=0 and len(descriptors_input)!=0):
		matches = flann.knnMatch(np.asarray(descriptors_input,np.float32),np.asarray(descriptors_output,np.float32),k=2)
		good = []
		for m,n in matches:
			if m.distance < 0.69*n.distance:
				good.append(m)
		return good
	else:
		return None


if __name__=='__main__':

	#Getting Information form the Input image
	count=1
	input_image, aug_image, input_keypoints, input_descriptors = load_input(count)
	cap = cv2.VideoCapture(0)
	ret, frame = cap.read()
	while(ret):
		ret, frame = cap.read()
		if(len(input_keypoints)<MIN_MATCHES):
			continue
		frame = cv2.resize(frame, (600,450))
		frame_bw = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
		output_keypoints, output_descriptors = detector.detectAndCompute(frame_bw, None)
		
		matches = compute_matches(input_descriptors, output_descriptors)
		if(matches!=None):
			if(len(matches)>10):
				src_pts = np.float32([ input_keypoints[m.queryIdx].pt for m in matches ]).reshape(-1,1,2)
				dst_pts = np.float32([ output_keypoints[m.trainIdx].pt for m in matches ]).reshape(-1,1,2)

				#Finally find the homography matrix
				M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
				pts = np.float32([ [0,0],[0,399],[299,399],[299,0] ]).reshape(-1,1,2)
				dst = cv2.perspectiveTransform(pts,M)
				M_aug = cv2.warpPerspective(aug_image, M, (600,450))

				#getting the frame ready for addition operation with Mask Image
				frameb = cv2.fillConvexPoly(frame,dst.astype(int),0)
				Final = frameb+M_aug
				cv2.imshow('Final Output', Final)
			else:
				
				count+=1
				
				if count>2:
					count=1
					
				input_image, aug_image, input_keypoints, input_descriptors=load_input(count)
				cv2.imshow('Final Output', frame)
		else:
			cv2.imshow('Final Output', frame)
		key = cv2.waitKey(15)
		if(key==27):
			break

: 