1. Set up the environment

In [0]:
# Check python and CUDA version
!python --version
!nvcc --version

Python 3.6.7
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2018 NVIDIA Corporation
Built on Tue_Jun_12_23:07:04_CDT_2018
Cuda compilation tools, release 9.2, V9.2.148


In [0]:
# Map your google drive 
from google.colab import drive
drive.mount('/content/drive/')
!ls "drive/My Drive/app"

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive/
car.jpeg	  imagenet_class_index.json  run.mp4_yolo_out_py.avi
cat.png		  main.ipynb		     test.ipynb
dogs_and_cats	  mnist_cnn.py		     transferlearningUWB
hymenoptera_data  outout.txt		     uwb_ID


In [1]:
# Download pre-trained YOLOv3-tiny model
# You can download other YOLO versions to test 

!wget https://pjreddie.com/media/files/yolov3-tiny.weights
!wget https://github.com/pjreddie/darknet/blob/master/cfg/yolov3-tiny.cfg?raw=true -O ./yolov3-tiny.cfg
!wget https://github.com/pjreddie/darknet/blob/master/data/coco.names?raw=true -O ./coco.names

--2018-11-25 20:02:09--  https://pjreddie.com/media/files/yolov3-tiny.weights
Resolving pjreddie.com (pjreddie.com)... 128.208.3.39
Connecting to pjreddie.com (pjreddie.com)|128.208.3.39|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 35434956 (34M) [application/octet-stream]
Saving to: ‘yolov3-tiny.weights’


2018-11-25 20:02:12 (16.0 MB/s) - ‘yolov3-tiny.weights’ saved [35434956/35434956]

--2018-11-25 20:02:14--  https://github.com/pjreddie/darknet/blob/master/cfg/yolov3-tiny.cfg?raw=true
Resolving github.com (github.com)... 192.30.253.112, 192.30.253.113
Connecting to github.com (github.com)|192.30.253.112|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/pjreddie/darknet/raw/master/cfg/yolov3-tiny.cfg [following]
--2018-11-25 20:02:14--  https://github.com/pjreddie/darknet/raw/master/cfg/yolov3-tiny.cfg
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location:

In [0]:
# Initialize the parameters
confThreshold = 0.5  #Confidence threshold
nmsThreshold = 0.4   #Non-maximum suppression threshold
inpWidth = 416       #Width of network's input image
inpHeight = 416      #Height of network's input image

In [4]:
import cv2 as cv

# Load names of classes
classesFile = "coco.names";
classes = None
i = 0
print("Class names:")
with open(classesFile, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')
    print(classes)
 
# Give the configuration and weight files for the model and load the network using them.
modelConfiguration = "yolov3.cfg";
modelWeights = "yolov3.weights";
 
net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

print("Layer list:")
print(net.getLayerNames())

Class names:
1 :  ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
Layer list:
['conv_0', 'bn_0', 'relu_0', 'conv_1', 'bn_1', 'relu_1', 'conv_2', 'bn_2', 'relu_2', 'conv_3', 'bn_3', '

In [0]:
# Input video file
# You can change the input video from your own google drive
cap = cv.VideoCapture('drive/My Drive/app/street.mp4')
length = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
print('Total number of frames in the input video: ',length )


# Output video file with bounding boxes surrounding objects
outputFile = "street_yolo.avi"
vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 30, (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))
print('Output will be written to: ', outputFile)

Total number of frames in the input video:  150
Output will be written to:  street_yolo.avi


In [0]:
# Some helper functions for post processing the output from darknet

# Get the names of the output layers
def getOutputsNames(net):
    # Get the names of all the layers in the network
    layersNames = net.getLayerNames()
    # Get the names of the output layers, i.e. the layers with unconnected outputs
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# Remove the bounding boxes with low confidence using non-maxima suppression
def postprocess(frame, outs):
    frameHeight = frame.shape[0]
    frameWidth = frame.shape[1]
 
    classIds = []
    confidences = []
    boxes = []
    # Scan through all the bounding boxes output from the network and keep only the
    # ones with high confidence scores. Assign the box's class label as the class with the highest score.
    classIds = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > confThreshold:
                center_x = int(detection[0] * frameWidth)
                center_y = int(detection[1] * frameHeight)
                width = int(detection[2] * frameWidth)
                height = int(detection[3] * frameHeight)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                classIds.append(classId)
                confidences.append(float(confidence))
                boxes.append([left, top, width, height])
 
    # Perform non maximum suppression to eliminate redundant overlapping boxes with
    # lower confidences.
    indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
  
# Draw the predicted bounding box
def drawPred(classId, conf, left, top, right, bottom):
    # Draw a bounding box.
    cv.rectangle(frame, (left, top), (right, bottom), (0, 0, 255))
     
    label = '%.2f' % conf
         
    # Get the label for the class name and its confidence
    if classes:
        assert(classId < len(classes))
        label = '%s:%s' % (classes[classId], label)
 
    #Display the label at the top of the bounding box
    labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, labelSize[1])
    cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255))

    


In [0]:
import numpy as np

i = 0
while (1):
    # get frame from the video
    hasFrame, frame = cap.read()
     
    # Stop the program if reached end of video
    if not hasFrame:
        print("Done processing !!!")
        print("Output file is stored as ", outputFile)
        break

    i = i+1
    print('Frame #',i)


    # Create a 4D blob from a frame.
    blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)
 
    # Sets the input to the network
    net.setInput(blob)
 
    # Runs the forward pass to get output of the output layers
    outs = net.forward(getOutputsNames(net))
 
    # Remove the bounding boxes with low confidence
    postprocess(frame, outs)
 
    # Put efficiency information. The function getPerfProfile returns the 
    # overall time for inference(t) and the timings for each of the layers(in layersTimes)
    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
    cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
 
    vid_writer.write(frame.astype(np.uint8));

vid_writer.release()
cap.release()
        

Frame # 1
Frame # 2
Frame # 3
Frame # 4
Frame # 5
Frame # 6
Frame # 7
Frame # 8
Frame # 9
Frame # 10
Frame # 11
Frame # 12
Frame # 13
Frame # 14
Frame # 15
Frame # 16
Frame # 17
Frame # 18
Frame # 19
Frame # 20
Frame # 21
Frame # 22
Frame # 23
Frame # 24
Frame # 25
Frame # 26
Frame # 27
Frame # 28
Frame # 29
Frame # 30
Frame # 31
Frame # 32
Frame # 33
Frame # 34
Frame # 35
Frame # 36
Frame # 37
Frame # 38
Frame # 39
Frame # 40
Frame # 41
Frame # 42
Frame # 43
Frame # 44
Frame # 45
Frame # 46
Frame # 47
Frame # 48
Frame # 49
Frame # 50
Frame # 51
Frame # 52
Frame # 53
Frame # 54
Frame # 55
Frame # 56
Frame # 57
Frame # 58
Frame # 59
Frame # 60
Frame # 61
Frame # 62
Frame # 63
Frame # 64
Frame # 65
Frame # 66
Frame # 67
Frame # 68
Frame # 69
Frame # 70
Frame # 71
Frame # 72
Frame # 73
Frame # 74
Frame # 75
Frame # 76
Frame # 77
Frame # 78
Frame # 79
Frame # 80
Frame # 81
Frame # 82
Frame # 83
Frame # 84
Frame # 85
Frame # 86
Frame # 87
Frame # 88
Frame # 89
Frame # 90
Frame # 91
Frame # 

In [0]:
# Copy the output file to google drive

!cp street_yolo.avi drive/My\ Drive/app/