<a href="https://colab.research.google.com/github/abhigyan13/object_detection_with_yolo_tiny_v3/blob/master/object_detection_with_yolo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np
import argparse
import cv2 as cv
import subprocess
import time
import os
from google.colab.patches import cv_imshow
confidence = 0.5
threshold = 0.3

In [7]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [8]:


def draw_labels_and_boxes(img, boxes, confidences, classids, idxs, colors, labels):
    if len(idxs) > 0:
        for i in idxs.flatten():
            # Get the bounding box coordinates
            x, y = boxes[i][0], boxes[i][1]
            w, h = boxes[i][2], boxes[i][3]

            # Get the unique color for this class
            color = [int(c) for c in colors[classids[i]]]

            # Draw the bounding box rectangle and label on the image
            cv.rectangle(img, (x, y), (x+w, y+h), color, 2)
            text = "{}: {:4f}".format(labels[classids[i]], confidences[i])
            cv.putText(img, text, (x, y-5), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    return img


def generate_boxes_confidences_classids(outs, height, width, tconf):
    boxes = []
    confidences = []
    classids = []

    for out in outs:
        for detection in out:
            #print (detection)
            #a = input('GO!')

            # Get the scores, classid, and the confidence of the prediction
            scores = detection[5:]
            classid = np.argmax(scores)
            confidence = scores[classid]

            # Consider only the predictions that are above a certain confidence level
            if confidence > tconf:
                # TODO Check detection
                box = detection[0:4] * np.array([width, height, width, height])
                centerX, centerY, bwidth, bheight = box.astype('int')

                # Using the center x, y coordinates to derive the top
                # and the left corner of the bounding box
                x = int(centerX - (bwidth / 2))
                y = int(centerY - (bheight / 2))

                # Append to list
                boxes.append([x, y, int(bwidth), int(bheight)])
                confidences.append(float(confidence))
                classids.append(classid)

    return boxes, confidences, classids

def infer_image(net, layer_names , height, width, img, colors , labels ,
            boxes=None, confidences=None, classids=None, idxs=None, infer=True):

    if infer:
        # Contructing a blob from the input image
        blob = cv.dnn.blobFromImage(img, 1 / 255.0, (416, 416),
                        swapRB=True, crop=False)

        # Perform a forward pass of the YOLO object detector
        net.setInput(blob)

        # Getting the outputs from the output layers
        outs = net.forward(layer_names)



        # Generate the boxes, confidences, and classIDs
        boxes, confidences, classids = generate_boxes_confidences_classids(outs, height, width, confidence)

        # Apply Non-Maxima Suppression to suppress overlapping bounding boxes
        idxs = cv.dnn.NMSBoxes(boxes, confidences, confidence, threshold)

    if boxes is None or confidences is None or idxs is None or classids is None:
        raise '[ERROR] Required variables are set to None before drawing boxes on images.'

    # Draw labels and boxes on the image
    img = draw_labels_and_boxes(img, boxes, confidences, classids, idxs, colors, labels)

    return img, boxes, confidences, classids, idxs

In [9]:
weights = "gdrive/My Drive/yolov3-tiny.weights"
config = "gdrive/My Drive/yolov3-tiny.cfg"
#name the output file
output = "gdrive/My Drive/outputs.mp4"
#Add your video here
video_path = "gdrive/My Drive/myvid.mp4"

label = "gdrive/My Drive/coco-labels"

labels = open(label).read().strip().split('\n')

colors = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')

net = cv.dnn.readNetFromDarknet( config, weights)

layer_names = net.getLayerNames()
layer_names = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]




In [None]:
vid = cv.VideoCapture(str(video_path))
height, width, writer= None, None, None
while True:

  grabbed, frame = vid.read()
  
  if not grabbed:
	  break

  if width is None or height is None:
    height, width = frame.shape[:2]

  frame, _, _, _, _ = infer_image(net, layer_names, height, width, frame, colors, labels)

  if writer is None:
	  fourcc = cv.VideoWriter_fourcc(*'mp4v')
	  writer = cv.VideoWriter(output, fourcc, 30,(frame.shape[1], frame.shape[0]), True)
  

  writer.write(frame)
  

writer.release()
vid.release()