# Object Detection - The Holy Grail of Computer 

### This is a combination of Image Detection and Location - We don't just identify what is in an image, we identify where it is as well

## R-CNN - Regions with CNN
- Segment and Image into various regions of interest
- Feed each region into a CNN to classify it
- Use linear regression to find optimal bounds for the prediction region for that classification


- We Figured out a way (Fast-R-CNN) to combine the 3 above steps (which used to all be separately trained models) into one model
- There are also YOLO architecture (You Only Look Once) to better optimize these models in real-time

In [1]:
import numpy as np
import cv2

In [2]:
# Helper funtion to show an image with cv2
def cv_show_img(title, image, wait=0):
    cv2.namedWindow(title)
    cv2.startWindowThread()
    cv2.imshow(title, image)
    cv2.waitKey(wait)
    cv2.waitKey(1)
    cv2.destroyAllWindows()
    cv2.waitKey(1)

# Helper funtion to show multiple images at the same time
def cv_show_mult_img(titleArr, imageArr, wait=0):
    for i in range(len(titleArr)):
        cv2.namedWindow(titleArr[i])
        cv2.startWindowThread()
        cv2.imshow(titleArr[i], imageArr[i])
    cv2.waitKey(wait)
    cv2.waitKey(1)
    cv2.destroyAllWindows()
    cv2.waitKey(1)

## SSD

In [4]:
# Load & Define the SSD Model
# Load SSD Model Config
pbtext = './model_objects/ssd_mobilenet_v1_coco.pbtxt'
weights = './model_objects/frozen_inference_graph.pb'
classes = 90
threshold = 0.2

# Read Network Model
net = cv2.dnn.readNetFromTensorflow(weights, pbtext)

# Need to set this to true to tell the model that we have BGR images not RGB images
swapRB = True

# Define Classes
classNames = { 0: 'background',
    1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus',
    7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant',
    13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat',
    18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear',
    24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag',
    32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard',
    37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove',
    41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle',
    46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon',
    51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange',
    56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut',
    61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed',
    67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse',
    75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'microwave', 79: 'oven',
    80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book', 85: 'clock',
    86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier', 90: 'toothbrush' }



In [5]:
# Load some test images
frames = [
    cv2.imread('./images/elephant.jpg'),
    cv2.imread('./images/traffic.jpg'),
    cv2.imread('./images/office.jpg'),
    cv2.imread('./images/zebra.jpg')
]

for f in frames:
    # Configure the images to be the correct size
    img = f.copy()
    w = 300
    h = 300
    whRatio = w / float(h)
    scaleFactor = 0.007843
    meanVal = 127.5
    
    # Convert image to 4D blob
    blob = cv2.dnn.blobFromImage(f, scaleFactor, (w, h), (meanVal, meanVal, meanVal), swapRB)

    # Get Detections from Blob
    net.setInput(blob)
    detections = net.forward()

    # Filter out dtected boxes that don't meet our threshold
    rows = f.shape[0]
    cols = f.shape[1]

    if cols / float(rows) > whRatio:
        cropSize = (int(rows * whRatio), rows)
    else:
        cropSize = (cols, int(cols / whRatio))

    y1 = int((rows - cropSize[1]) / 2)
    y2 = y1 + cropSize[1]
    x1 = int((cols - cropSize[0]) / 2)
    x2 = x1+ cropSize[0]

    # Crop the image to look at the box
    f = f[y1:y2, x1:x2]
    rows = f.shape[0]
    cols = f.shape[1]

    # Loop over detections
    for d in range(detections.shape[2]):
        confidence = int(detections[0,0,d,2])

        if confidence > threshold:
            # Draw the box
            class_id = int(detections[0,0,d,2])
            xLeftBottom = int(detections[0,0,d,3] * cols)
            yLeftBottom = int(detections[0,0,d,4] * rows)
            xRightTop = int(detections[0,0,d,5] * cols)
            yRightTop = int(detections[0,0,d,6] * rows)

            cv2.rectangle(f, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), (0,255,0))

            # Label the image witht eh class & confidence of the object
            if class_id in classNames:
                label = f'{classNames[class_id]}: {confidence}'
                labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

                yLeftBottom = max(yLeftBottom, labelSize[1])

                cv2.rectangle(
                    f,
                    (xLeftBottom, yLeftBottom - labelSize[1]),
                    (xLeftBottom + labelSize[0, yLeftBottom + baseLine]),
                    (255,255,255),
                    cv2.FILLED
                )
                cv2.putText(f, label, (xLeftBottom, yLeftBottom, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0)))

    cv_show_mult_img(
        ['Original','Detections'],
        [img,f]
    )




## YOLO

In [6]:
import argparse
import time
import os
from os import listdir
from os.path import isfile, join

: 

In [None]:
## Find a repo on GitHub