## YOLO (You Only Look Once)
YOLO is a state-of-the-art, real-time object detection algorithm.YOLO uses *Darknet*, an open source, deep neural network framework written by the creators of YOLO.

In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

from utils import load_class_names,detect_objects,print_objects
from darknet import Darknet

In [2]:
# Set the NMS threshold
nms_thresh = 0.6
# Set the IOU threshold
iou_thresh = 0.4

cfg_file = './cfg/yolov3.cfg'# Set the location and name of the cfg file
weight_file = './weights/yolov3.weights' # Set the location and name of the pre-trained weights file
namesfile = 'data/coco.names'# Set the location and name of the COCO object classes file

# Load the network architecture
model = Darknet(cfg_file)
# Load the pre-trained weights
model.load_weights(weight_file)
# Load the COCO object classes from utils.py
class_names = load_class_names(namesfile)


Loading weights. Please Wait...100.00% Complete

In [17]:
# dict1 = {idx:name for idx,name in enumerate(class_names)}
# dict1

{0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorbike',
 4: 'aeroplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',
 10: 'fire hydrant',
 11: 'stop sign',
 12: 'parking meter',
 13: 'bench',
 14: 'bird',
 15: 'cat',
 16: 'dog',
 17: 'horse',
 18: 'sheep',
 19: 'cow',
 20: 'elephant',
 21: 'bear',
 22: 'zebra',
 23: 'giraffe',
 24: 'backpack',
 25: 'umbrella',
 26: 'handbag',
 27: 'tie',
 28: 'suitcase',
 29: 'frisbee',
 30: 'skis',
 31: 'snowboard',
 32: 'sports ball',
 33: 'kite',
 34: 'baseball bat',
 35: 'baseball glove',
 36: 'skateboard',
 37: 'surfboard',
 38: 'tennis racket',
 39: 'bottle',
 40: 'wine glass',
 41: 'cup',
 42: 'fork',
 43: 'knife',
 44: 'spoon',
 45: 'bowl',
 46: 'banana',
 47: 'apple',
 48: 'sandwich',
 49: 'orange',
 50: 'broccoli',
 51: 'carrot',
 52: 'hot dog',
 53: 'pizza',
 54: 'donut',
 55: 'cake',
 56: 'chair',
 57: 'sofa',
 58: 'pottedplant',
 59: 'bed',
 60: 'diningtable',
 61: 'toilet',
 62: 'tvmonitor',
 63: 'laptop',
 64: 

## Testing Object Detection on some of the sample images

In [4]:
def plot_boxes_videos(img, boxes, class_names, plot_labels, color = None):

    red = (0,0,255)
    green = (0,255,0)
    blue = (255,0,0)
    
    # Get the width and height of the image
    width = img.shape[1]
    height = img.shape[0]
    
    # Plot the bounding boxes and corresponding labels on top of the image
    for i in range(len(boxes)):
        # Get the ith bounding box
        box = boxes[i]
        
        # Get the (x,y) pixel coordinates of the lower-left and lower-right corners
        # of the bounding box relative to the size of the image. 
        x1 = int(np.around((box[0] - box[2]/2.0) * width))
        y1 = int(np.around((box[1] - box[3]/2.0) * height))
        x2 = int(np.around((box[0] + box[2]/2.0) * width))
        y2 = int(np.around((box[1] + box[3]/2.0) * height))
        
        # Calculate the width and height of the bounding box relative to the size of the image.
        width_x = x2 - x1
        width_y = y1 - y2
            
        # Use the same color to plot the bounding boxes of the same object class
        if len(box) >= 7 and class_names:
            cls_conf = box[5]
            cls_id = box[6]
            classes = len(class_names)
            
        #ignores 'sports ball'
        if cls_id == 32:
            continue
       
        # Set the default rgb value to green
        color = green
        if cls_id == 0: #red color for person
            color = red
    
        #drawing the rectangle around the faces
        cv2.rectangle(img,(x1, y2),(x1+width_x, y2+width_y),color,2)
        
        # If plot_labels = True then plot the corresponding label
        if plot_labels:
#             Create a string with the object class name and the corresponding object class probability
            if cls_id in list(range(15,24)):
                conf_tx = "Animal" + ': {:.1f}'.format(cls_conf)
            else:
                conf_tx = class_names[cls_id] + ': {:.1f}'.format(cls_conf)
#             conf_tx = class_names[cls_id] + ': {:.1f}'.format(cls_conf)
            
            # Define x and y offsets for the labels
            lxc = int((img.shape[1] * 0.266) / 100)
            lyc = int((img.shape[0] * 1.180) / 100)

            #mask or no mask text
            cv2.putText(img, conf_tx, (x1 + lxc, y1 - lyc),cv2.FONT_HERSHEY_SIMPLEX,1,color,2)

In [12]:
def capture_snapshots(img,img_org, boxes, class_names,prev_id = None,prev_conf = None,count = 0):
    is_person = False
    max_count = 2
    red = (0,0,255)
    green = (0,255,0)
    for box in boxes:
        if box[6] == 0:
            is_person = True
            break
    if is_person:
        cv2.putText(img,"Not Capturing" , (10, 25),cv2.FONT_HERSHEY_SIMPLEX,1,red,2)
        return 0,0,0
    
    #take the first box
    box = boxes[0]
    cls_conf = box[5]
    cls_id = box[6]
    
    def save_img(img,count,cls_conf):
        count += 1
        cv2.putText(img,f"Capturing Image {count} {cls_conf}" , (10, 25),cv2.FONT_HERSHEY_SIMPLEX,1,green,2)
        num = len(os.listdir('output_imgs'))
        cv2.imwrite(f"output_imgs/img_{num}.png",img_org)
        return cls_id, cls_conf, count
    
    if cls_conf>0.9:
        if cls_id != prev_id:
            return save_img(img,count,cls_conf)
        elif cls_id == prev_id and count<max_count and cls_conf!=prev_conf:
            return save_img(img,count,cls_conf)
        else:
            return prev_id, prev_conf, count
    else:
        return prev_id,prev_conf,count

In [16]:
path_to_video = "Final_2.mp4"
# path_to_video = "05.mp4"

#setting the source as webcam
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture(path_to_video)
last_frame_num = cap.get(cv2.CAP_PROP_FRAME_COUNT)

prev_id = None
prev_conf = None
count = 0
frame_count = 0
while(True):
    #reading the image frames
    ret,img = cap.read()
    frame_count += 1
    # Convert the image to RGB
#     img_org = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_org = img.copy()

    # We resize the image to the input width and height of the first layer of the network.    
    img_resize = cv2.resize(img_org, (model.width, model.height))

    # Detect objects in the image
    boxes = detect_objects(model, img_resize, iou_thresh, nms_thresh)
    
    #plots the bbox around objects
    plot_boxes_videos(img, boxes, class_names, plot_labels = True) #,color = (0,0,255)
    
    #capture the frames
    if len(boxes)>0:
        prev_id,prev_conf,count = capture_snapshots(img,img_org, boxes, class_names,prev_id = prev_id, prev_conf = prev_conf,count = count)
    cv2.imshow('LIVE',img)
    key = cv2.waitKey(1)
    
    #break if escape key is pressed
    if(key == 27) or (frame_count >= last_frame_num):
        break
#destroy all the windows
cv2.destroyAllWindows()
cap.release()



Time Taken 0.771 seconds

Number of Objects Detected: 1 



Time Taken 0.989 seconds

Number of Objects Detected: 1 



Time Taken 0.909 seconds

Number of Objects Detected: 1 



Time Taken 0.914 seconds

Number of Objects Detected: 1 



Time Taken 0.876 seconds

Number of Objects Detected: 1 



Time Taken 0.911 seconds

Number of Objects Detected: 1 



Time Taken 0.798 seconds

Number of Objects Detected: 1 



Time Taken 0.933 seconds

Number of Objects Detected: 1 



Time Taken 0.838 seconds

Number of Objects Detected: 1 



Time Taken 0.890 seconds

Number of Objects Detected: 1 



Time Taken 0.932 seconds

Number of Objects Detected: 1 



Time Taken 0.900 seconds

Number of Objects Detected: 1 



Time Taken 0.780 seconds

Number of Objects Detected: 1 



Time Taken 0.802 seconds

Number of Objects Detected: 1 



Time Taken 0.809 seconds

Number of Objects Detected: 1 



Time Taken 0.797 seconds

Number of Objects Detected: 1 



Time Taken 0.895 seconds

Number of Ob



Time Taken 0.789 seconds

Number of Objects Detected: 0 



Time Taken 0.868 seconds

Number of Objects Detected: 0 



Time Taken 0.759 seconds

Number of Objects Detected: 1 



Time Taken 0.768 seconds

Number of Objects Detected: 1 



Time Taken 0.871 seconds

Number of Objects Detected: 1 



Time Taken 0.814 seconds

Number of Objects Detected: 1 



Time Taken 0.776 seconds

Number of Objects Detected: 1 



Time Taken 0.824 seconds

Number of Objects Detected: 1 



Time Taken 0.826 seconds

Number of Objects Detected: 1 



Time Taken 0.825 seconds

Number of Objects Detected: 1 



Time Taken 0.739 seconds

Number of Objects Detected: 1 



Time Taken 0.889 seconds

Number of Objects Detected: 1 



Time Taken 0.799 seconds

Number of Objects Detected: 1 



Time Taken 0.786 seconds

Number of Objects Detected: 1 



Time Taken 0.769 seconds

Number of Objects Detected: 1 



Time Taken 0.781 seconds

Number of Objects Detected: 0 



Time Taken 0.865 seconds

Number of Ob



Time Taken 0.777 seconds

Number of Objects Detected: 1 



Time Taken 0.841 seconds

Number of Objects Detected: 1 



Time Taken 0.794 seconds

Number of Objects Detected: 1 



Time Taken 0.830 seconds

Number of Objects Detected: 3 



Time Taken 0.825 seconds

Number of Objects Detected: 3 



Time Taken 0.914 seconds

Number of Objects Detected: 3 



Time Taken 0.811 seconds

Number of Objects Detected: 3 



Time Taken 0.902 seconds

Number of Objects Detected: 3 



Time Taken 0.781 seconds

Number of Objects Detected: 3 



Time Taken 0.813 seconds

Number of Objects Detected: 3 



Time Taken 0.889 seconds

Number of Objects Detected: 3 



Time Taken 0.810 seconds

Number of Objects Detected: 3 



Time Taken 0.806 seconds

Number of Objects Detected: 3 



Time Taken 0.858 seconds

Number of Objects Detected: 3 



Time Taken 0.833 seconds

Number of Objects Detected: 3 



Time Taken 0.871 seconds

Number of Objects Detected: 3 



Time Taken 0.817 seconds

Number of Ob

## Object Detection

`show_objects` function takes image as an arguement and displays all the objects inside that image

In [None]:
# Set the default figure size
import os

plt.rcParams['figure.figsize'] = [12.0, 7.0]
imgs_dir = "../Park_Images"
# imgs_dir = "park_pics"

def show_objects(img_name = 'img5.jpg'):

    # Load the image
#     image_path = './images/{}'.format(img_name)
    image_path = os.path.join(imgs_dir,img_name)
    img = cv2.imread(image_path)

    # Convert the image to RGB
    original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # We resize the image to the input width and height of the first layer of the network.    
    resized_image = cv2.resize(original_image, (m.width, m.height))

    # Detect objects in the image
    boxes = detect_objects(m, resized_image, iou_thresh, nms_thresh)
    # print('Boxes from detect_objects()',boxes)
    """
    Each bounding box contains 7 parameters: 
    the coordinates (x, y) of the center of the bounding box, 
    the width w and height h of the bounding box, 
    the confidence detection level, 
    the object class probability 
    and the object class id.
    """
    
    # Print the objects found and the confidence level
    print_objects(boxes, class_names)

    #Plot the image with bounding boxes and corresponding object class labels
    plot_boxes(original_image, boxes, class_names, plot_labels = True)

In [None]:

img_list = [img for img in os.listdir(imgs_dir) if not img.endswith(".mp4")]
img_list

In [None]:
# img_list = ['img3.jpg','dog.jpg','city_scene.jpg', 'food.jpg','wine.jpg']

count = 0
length = len(img_list)
while(count<length):
    show_objects(img_list[count])
    count+=1
    if(count !=length):
        print('-'*100)

In [None]:
def capture_snapshots(img_org, boxes, class_names,prev_id = None,img = None):
    for i in range(len(boxes)):
        # Get the ith bounding box
        box = boxes[i]
        cls_conf = box[5]
        cls_id = box[6]
        red = (0,0,255)
        
        if cls_id == 20 and cls_conf>0.95:
            # Get the width and height of the image
            width = img.shape[1]
            height = img.shape[0]
            # Get the (x,y) pixel coordinates of the lower-left and lower-right corners
            # of the bounding box relative to the size of the image. 
            x1 = int(np.around((box[0] - box[2]/2.0) * width))
            y1 = int(np.around((box[1] - box[3]/2.0) * height))
            x2 = int(np.around((box[0] + box[2]/2.0) * width))
            y2 = int(np.around((box[1] + box[3]/2.0) * height))

            # Calculate the width and height of the bounding box relative to the size of the image.
            width_x = x2 - x1
            width_y = y1 - y2
            roi = img_org[y1:y1+width_y,x1:x1+width_x,:]
            
            num = len(os.listdir('elephants'))
            cv2.imwrite(f"elephants/ele_{num}.png",roi)
#         if cls_id == 0:
#             cv2.putText(img,"Not Capturing" , (10, 10),cv2.FONT_HERSHEY_SIMPLEX,1,red,2)
#         el
    