<a href="https://colab.research.google.com/github/hany606/PAI_Fall21IU/blob/main/Final_Exam/PAI_Final_Exam_IUF21.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Final Exam - PAI - IU - Fall21

### Hany Hamed

In [None]:
! pip install mediapy

Collecting mediapy
  Downloading mediapy-1.0.3-py3-none-any.whl (24 kB)
Installing collected packages: mediapy
Successfully installed mediapy-1.0.3


In [None]:
# Import libraries
%matplotlib inline
from matplotlib import pyplot as plt
import cv2
import numpy as np
from copy import deepcopy
import scipy.stats as st
import base64
from IPython.display import clear_output, Image
from skimage.metrics import structural_similarity as compare_ssim

import time
import os
import zipfile
import tarfile
from collections import namedtuple
from google.colab.patches import cv2_imshow
from xml.dom import minidom

# Set inline plots size
plt.rcParams["figure.figsize"] = (20, 20) # (w, h)


## Prepare YOLO model

In [None]:
# Download YOLO model
!wget https://s3-us-west-2.amazonaws.com/static.pyimagesearch.com/opencv-yolo/yolo-object-detection.zip?__s=1essnpgyhz7jwwcpjszi -O yolo-object-detection.zip
clear_output()

In [None]:
with zipfile.ZipFile('yolo-object-detection.zip', 'r') as zip_ref:
    zip_ref.extractall('yolo_data')

In [None]:
# Source: ICV course - Assignment 2
PATH_TO_YOLO = 'yolo_data/yolo-object-detection/yolo-coco'
CONFIDENCE = 0.5
THRESHOLD = 0.3
def IoU(box1, box2):
    """
    box1: [x1,y1,x2,y2] coordinates of the ground truth box
    box2: [x1,y1,x2,y2] coordinates of the predicted box
    return: IoU between two boxes if they are overlapping, 0 otherwise

    with
    x1, y1:  coordinates of the upper left corner
    x2, y2: coordinates of the lower right corner
    """
    # determine coordinates of the intersection
    left_x = max(box2[0], box1[0])
    bottom_y = min(box2[3], box1[3])
    right_x = min(box2[2], box1[2])
    top_y = max(box2[1], box1[1])

    # compute intersection area
    interArea = abs(max((right_x - left_x), 0) * max((bottom_y - top_y), 0))

    # compute the area of both the prediction and ground-truth
    ground_area = abs((box1[0] - box1[2]) * (box1[1] - box1[3]))
    predicted_area = abs((box2[0] - box2[2]) * (box2[1] - box2[3]))

    # compute the IoU
    return interArea / (ground_area+predicted_area-interArea)


class YOLO:
    def __init__(self, PATH_TO_YOLO, CONFIDENCE, THRESHOLD):
        self.CONFIDENCE = CONFIDENCE
        self.THRESHOLD = THRESHOLD
        # load the COCO class labels our YOLO model was trained on
        labelsPath = os.path.sep.join([PATH_TO_YOLO, 'coco.names'])
        self.LABELS = open(labelsPath).read().strip().split("\n")

        # initialize a list of colors to represent each possible class label
        np.random.seed(42)
        self.COLORS = np.random.randint(0, 255, size=(len(self.LABELS), 3),	dtype="uint8")

        # derive the paths to the YOLO weights and model configuration
        weightsPath = os.path.sep.join([PATH_TO_YOLO, "yolov3.weights"])
        configPath = os.path.sep.join([PATH_TO_YOLO, "yolov3.cfg"])

        # load YOLO object detector trained on COCO dataset (80 classes)
        net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

        # determine only the *output* layer names that we need from YOLO
        ln = net.getLayerNames()
        ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

        self.net = net
        self.ln = ln
        # 80x80 @ 80x1 = 80x1
        self.class_weight = np.zeros((80,80))
        np.fill_diagonal(self.class_weight, 0.5)
        self.class_weight[15,15] = 1

    def non_max_supression(self):
        """
        perform non-maximum supression over boxes
        """
        # can be done using cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE, THRESHOLD)
        idxs = np.argsort(-self.confidences)
        confidences = self.confidences[idxs]
        boxes = self.boxes[idxs]
        classIDs = self.classIDs[idxs]

        for i in range(len(boxes)):
            x,y,w,h = boxes[i][0],  boxes[i][1], boxes[i][2], boxes[i][3]
            box1 = [x, y, x+w, y+h]
            for j in range(i+1, len(boxes)):
                # Get the coordinates for the 2nd box
                x,y,w,h = boxes[j][0],  boxes[j][1], boxes[j][2], boxes[j][3]
                box2 = [x, y, x+w, y+h]
                # If they are of the same class
                # and have a IoU above self.THRESHOLD
                if (classIDs[i] == classIDs[j] and IoU(box1, box2) > self.THRESHOLD):
                    # we regard them as describing the same object and
                    # set the confidence of the box with lower confidence to 0
                    low_confidence_idx = np.argmin([confidences[i], confidences[j]])
                    confidences[[i,j][low_confidence_idx]] = 0.0

        idxs = np.where(confidences>0)
        self.boxes = boxes[idxs]
        self.confidences = confidences[idxs]
        self.classIDs = classIDs[idxs]


    def detect(self, image):
        """
        detect objects, supress non maximums, draw boxes
        return image with boxes
        """
        self.forward(image)
        self.non_max_supression()
        # print(len(self.boxes))
        # draw boxes
        for i in range(len(self.boxes)):
            # extract the bounding box coordinates
            (x, y) = (self.boxes[i][0], self.boxes[i][1])
            (w, h) = (self.boxes[i][2], self.boxes[i][3])

            # draw a bounding box rectangle and label on the image
            color = self.COLORS[self.classIDs[i]].tolist()
            cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
            # print(yolo.LABELS[self.classIDs[i]],)
            # text = "{}: {:.4f}".format(yolo.LABELS[self.classIDs[i]], self.confidences[i])
            # cv2.putText(image, text, (x, y + h - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        return image

    def forward(self, image):
        """
        feed an image to YOLO network, filter weak boxes, 
        return boxes, confidence, class labels
        """
        (H, W) = image.shape[:2]
        # construct a blob from the input image and then perform a forward
        # pass of the YOLO object detector, giving us our bounding boxes and
        # associated probabilities
        blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
        self.net.setInput(blob)
        start = time.time()
        layerOutputs = self.net.forward(self.ln)
        end = time.time()

        print("[INFO] YOLO took {:.6f} seconds".format(end - start))

        # initialize our lists of detected bounding boxes, confidences, and
        # class IDs, respectively
        boxes = []
        confidences = []
        classIDs = []

        # loop over each of the layer outputs
        for output in layerOutputs:
            # loop over each of the detections
            for detection in output:
                # extract the class ID and confidence
                scores = detection[5:]
                # print(scores)
                # print(len(scores))
                # print(scores.shape)
                classID = np.argmax(self.class_weight@scores)
                confidence = scores[classID]

                # filter out weak predictions 
                if confidence > self.CONFIDENCE:
                    # scale the bounding box coordinates back relative to the
                    # size of the image, keeping in mind that YOLO actually
                    # returns the center (x, y)-coordinates of the bounding
                    # box followed by the boxes' width and height
                    box = detection[0:4] * np.array([W, H, W, H])
                    (centerX, centerY, width, height) = box.astype("int")

                    # use the center (x, y)-coordinates to derive the top and
                    # and left corner of the bounding box
                    x = int(centerX - (width / 2))
                    y = int(centerY - (height / 2))

                    # update our list of bounding box coordinates, confidences,
                    # and class IDs
                    boxes.append([x, y, int(width), int(height)])
                    confidences.append(float(confidence))
                    classIDs.append(classID)
          
        self.boxes = np.array(boxes)
        self.confidences = np.array(confidences)
        self.classIDs = np.array(classIDs)

## Utils

In [None]:
# Source: lab3 from ICV
def showInRow(list_of_images, titles = None, disable_ticks = False):
  count = len(list_of_images)
  for idx in range(count):
    subplot = plt.subplot(1, count, idx+1)
    if titles is not None:
      subplot.set_title(titles[idx])
      
    img = list_of_images[idx]
    cmap = 'gray' if (len(img.shape) == 2 or img.shape[2] == 1) else None
    subplot.imshow(img, cmap=cmap)
    if disable_ticks:
      plt.xticks([]), plt.yticks([])
  plt.show()


## Prepare the video

In [None]:
# Just display the video
from IPython.display import HTML
from base64 import b64encode
 
def show_video(video_path, video_width = 600):
   
  video_file = open(video_path, "r+b").read()
 
  video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
  return HTML(f"""<video width={video_width} controls><source src="{video_url}"></video>""")
 
show_video("cat.mp4")


In [None]:
video_capture = cv2.VideoCapture("cat.mp4")
frames = []
i = 0
while video_capture.isOpened():
    success, frame = video_capture.read()
    if(success):
        if(i%3 == 0):
            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    else:
        break

    i += 1



## Processing

In [None]:
# print(frames[100].shape)    # no need to resize
# showInRow([frames[30]])

yolo = YOLO(PATH_TO_YOLO, CONFIDENCE, THRESHOLD)  
# print(yolo)
detected_frames = []
# image = cv2.imread(PATH_TO_IMAGE)
# image = yolo.detect(frames[100].copy())
# cv2_imshow(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))
for i, frame in enumerate(frames[30:]):
    print(i)
    image = frame.copy()
    image = yolo.detect(image)
    detected_frames.append(image)
    # cv2_imshow(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))

0
[INFO] YOLO took 2.495022 seconds
1
[INFO] YOLO took 1.869110 seconds
2
[INFO] YOLO took 1.882118 seconds
3
[INFO] YOLO took 1.857769 seconds
4
[INFO] YOLO took 1.857963 seconds
5
[INFO] YOLO took 1.857785 seconds
6
[INFO] YOLO took 1.864060 seconds
7
[INFO] YOLO took 1.856611 seconds
8
[INFO] YOLO took 1.864714 seconds
9
[INFO] YOLO took 1.851375 seconds
10
[INFO] YOLO took 1.867091 seconds
11
[INFO] YOLO took 1.880196 seconds
12
[INFO] YOLO took 1.870725 seconds
13
[INFO] YOLO took 1.874565 seconds
14
[INFO] YOLO took 1.875767 seconds
15
[INFO] YOLO took 1.855671 seconds
16
[INFO] YOLO took 1.858137 seconds
17
[INFO] YOLO took 1.853854 seconds
18
[INFO] YOLO took 1.882697 seconds
19
[INFO] YOLO took 1.859164 seconds
20
[INFO] YOLO took 1.864467 seconds
21
[INFO] YOLO took 1.877532 seconds
22
[INFO] YOLO took 1.866834 seconds
23
[INFO] YOLO took 1.877448 seconds
24
[INFO] YOLO took 1.859785 seconds
25
[INFO] YOLO took 1.872041 seconds
26
[INFO] YOLO took 1.869529 seconds
27
[INFO] Y

In [None]:

# print(yolo)
detected_frames = []
# image = cv2.imread(PATH_TO_IMAGE)
# image = yolo.detect(frames[100].copy())
# cv2_imshow(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))
for i, frame in enumerate(frames[30:]):
    print(i)
    image = frame.copy()
    image = yolo.detect(image)
    detected_frames.append(image)
    # cv2_imshow(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))

## Prepare the output video

In [None]:
from mediapy import VideoWriter
output_path = 'Exam-Sol.mp4'
with VideoWriter(output_path, shape=detected_frames[0].shape[:2], fps=15) as w:
    for image in detected_frames:
        w.add_image(image)#cv2.cvtColor(image,cv2.COLOR_BGR2RGB))

In [None]:
show_video("Exam-Sol.mp4")

In [None]:
np.save("Exam-array", np.array(detected_frames))