<a href="https://colab.research.google.com/github/jahongirazim/yolov3_object_detection/blob/master/colab_darknet_yolov3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#Get clone of darknet from github
!ls
!rm -fr darknet
!git clone https://github.com/pjreddie/darknet

In [0]:
#Check whether it is cloned correctly
!ls
%cd darknet
!ls
% cd ..

In [0]:
#Installing CUDA pre-requisites
!wget https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb
!dpkg -i cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb

In [0]:
!apt update
!apt upgrade

In [0]:
#Installing CUDA
!apt install cuda-8-0 -y

# If there is an issue installing CUDA, 
# try installing cuda-visual-tools again
# Comment the earlier command to install cuda
# Uncomment and run the below command

#!apt --fix-broken install cuda-visual-tools-8-0 -y

In [0]:
import os
os.environ['PATH'] += ':/usr/local/cuda/bin'

In [0]:
#Installing compilers
!apt install gcc-5 g++-5 -y

In [0]:
!ln -s /usr/bin/gcc-5 /usr/local/cuda/bin/gcc 
!ln -s /usr/bin/g++-5 /usr/local/cuda/bin/g++

In [0]:
import os
os.environ['PATH'] += ':/usr/local/cuda/bin'

In [0]:
#Changing the variables to include OpenCV and GPU in the Makefile, for traning you should uncomment CUDNN,and OPENMP should be enabled to be able to use high performance CPU or TPU
% cd darknet
!sed -i 's/OPENCV=0/OPENCV=1/g' Makefile
!sed -i 's/GPU=0/GPU=1/g' Makefile
#!sed -i 's/CUDNN=0/CUDNN=1/g' Makefile
#!sed -i 's/OPENMP=0/OPENMP=1/g' Makefile
!sed -i 's/LIBSO=0/LIBSO=1/g' Makefile

In [0]:
# And this is how our Makefile looks now
# Check if OPENCV = 1 and GPU = 1
!cat Makefile

In [0]:

# Apparently we need to install this so that OpenCV can work without any issues
# when we are making the file
!apt-get install libopencv-dev

In [0]:
#Running make!
!make

In [0]:
# Downloading YOLO weights
!wget https://pjreddie.com/media/files/yolov3.weights

In [0]:
# Let's see if it can detect object without any issues.
# Here is an example from YOLO

!./darknet detect cfg/yolov3.cfg yolov3.weights data/person.jpg

#!./darknet detect cfg/yolov3.cfg yolov3.weights data/dog.jpg

#!./darknet detect cfg/yolov3.cfg yolov3.weights data/eagle.jpg

#!./darknet detect cfg/yolov3.cfg yolov3.weights data/horses.jpg

#!./darknet detect cfg/yolov3.cfg yolov3.weights data/kite.jpg

#!./darknet detect cfg/yolov3.cfg yolov3.weights data/scream.jpg

In [0]:
#The DOG, The TRUCK and The Bicycle!
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

image = cv2.imread("predictions.jpg")
height, width = image.shape[:2]
resized_image = cv2.resize(image,(3*width, 3*height), interpolation = cv2.INTER_CUBIC)

fig = plt.gcf()
fig.set_size_inches(18, 10)
plt.axis("off")
#plt.rcParams['figure.figsize'] = [10, 5]
plt.imshow(cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB))
plt.show()

#rom google.colab.patches import cv2_imshow
#cv2_imshow(resized_image)
#cv2.waitKey(0)

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
%cd /content/darknet
!ls

from ctypes import *
import math
import random
import cv2
#import numpy as np

def sample(probs):
    s = sum(probs)
    probs = [a/s for a in probs]
    r = random.uniform(0, 1)
    for i in range(len(probs)):
        r = r - probs[i]
        if r <= 0:
            return i
    return len(probs)-1

def c_array(ctype, values):
    arr = (ctype*len(values))()
    arr[:] = values
    return arr

class BOX(Structure):
    _fields_ = [("x", c_float),
                ("y", c_float),
                ("w", c_float),
                ("h", c_float)]

class DETECTION(Structure):
    _fields_ = [("bbox", BOX),
                ("classes", c_int),
                ("prob", POINTER(c_float)),
                ("mask", POINTER(c_float)),
                ("objectness", c_float),
                ("sort_class", c_int)]


class IMAGE(Structure):
    _fields_ = [("w", c_int),
                ("h", c_int),
                ("c", c_int),
                ("data", POINTER(c_float))]

class METADATA(Structure):
    _fields_ = [("classes", c_int),
                ("names", POINTER(c_char_p))]

    

#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
lib = CDLL("/content/darknet/libdarknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int

predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)

set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]

make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE

get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)]
get_network_boxes.restype = POINTER(DETECTION)

make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)

free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]

free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]

network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]

reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]

load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p

do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]

do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]

free_image = lib.free_image
free_image.argtypes = [IMAGE]

letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE

load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA

load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE

rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]

predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)

def array_to_image(arr):
    import numpy as np
    # need to return old values to avoid python freeing memory
    arr = arr.transpose(2,0,1)
    c = arr.shape[0]
    h = arr.shape[1]
    w = arr.shape[2]
    arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0
    data = arr.ctypes.data_as(POINTER(c_float))
    im = IMAGE(w,h,c,data)
    return im, arr

def classify(net, meta, im):
    out = predict_image(net, im)
    res = []
    for i in range(meta.classes):
        res.append((meta.names[i], out[i]))
    res = sorted(res, key=lambda x: -x[1])
    return res

def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):
    im = load_image(image, 0, 0)
    num = c_int(0)
    pnum = pointer(num)
    predict_image(net, im)
    dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
    num = pnum[0]
    if (nms): do_nms_obj(dets, num, meta.classes, nms);

    res = []
    for j in range(num):
        for i in range(meta.classes):
            if dets[j].prob[i] > 0:
                b = dets[j].bbox
                res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))
    res = sorted(res, key=lambda x: -x[1])
    free_image(im)
    free_detections(dets, num)
    return res 

def detect_mine(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):
    
    im, image = array_to_image(image)
    rgbgr_image(im)
    num = c_int(0)
    pnum = pointer(num)
    predict_image(net, im)
    dets = get_network_boxes(net, im.w, im.h, thresh, 
                             hier_thresh, None, 0, pnum)
    num = pnum[0]
    if nms: do_nms_obj(dets, num, meta.classes, nms)

    res = []
    for j in range(num):
        a = dets[j].prob[0:meta.classes]
        if any(a):
            ai = np.array(a).nonzero()[0]
            for i in ai:
                b = dets[j].bbox
                res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))

    res = sorted(res, key=lambda x: -x[1])
    
    if isinstance(image, bytes):
      free_image(im)
      free_detections(dets, num)
    
    return res

import numpy as np
import time
import matplotlib.pyplot as plt
    
if __name__ == "__main__":
    #net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0)
    #im = load_image("data/wolf.jpg", 0, 0)
    #meta = load_meta("cfg/imagenet1k.data")
    #r = classify(net, meta, im)
    #print r[:10]
    net = load_net(b"cfg/yolov3.cfg", b"yolov3.weights", 0)
    meta = load_meta(b"cfg/coco.data")
    
    """
    #The DOG, The TRUCK and The Bicycle!
    image=cv2.imread("data/dog.jpg")
    
    start_mine = time.time()
    
    "the code you want to test stays here"
    r_mine = detect_mine(net, meta, image)
    
    end_mine = time.time()
    print(end_mine - start_mine)
    print (r_mine)
    
    start = time.time()
    
    for res in r_mine:
      print("label:",res[0], "confidence:",round(res[1]*100,2), "(left,top,bottom,right)", res[2])
    
    "the code you want to test stays here"
    r = detect(net, meta, b"data/dog.jpg")
    
    end = time.time()
    print(end - start)
    
    print (r)
    
    """
    # Create a VideoCapture object
    cap = cv2.VideoCapture("/content/gdrive/My Drive/Videos/Vehicles/tmroad.mp4")
 
    # Check if camera opened successfully
    if (cap.isOpened() == False):
      print("Unable to read camera feed")
 
    # Default resolutions of the frame are obtained.The default resolutions are system dependent.
    # We convert the resolutions from float to integer.
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
 
    # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
    out = cv2.VideoWriter('/content/darknet/res_tmroad.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 20, (frame_width,frame_height))
    
    colors = [(57, 255, 25), (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255), (255, 0, 255), (255, 127, 255), (127, 0, 255), (127, 0, 127)]
    
    counter=0
    while(True):
      ret, frame = cap.read()
      if ret == True:
        counter+=1
        print("Frame id:",counter)
        # Write the frame into the file 'output.avi'
        
        start_=time.time()
        results=detect_mine(net, meta, frame)
        end_=time.time()
        print(end_-start_)
        
        for res in results:
          
          label=str(res[0])
          label=label.replace("'","")[1:]
          confidence=round(res[1]*100,2)
          text=label+":"+str(confidence)
          
          
          left=int(res[2][0]-res[2][2]/2.0)
          top=int(res[2][1]-res[2][3]/2.0)
          right=int(left+res[2][2])
          bottom=int(top+res[2][3])
          
          if(label=='bicycle'):
            cv2.rectangle(frame,(left,top),(right,bottom),colors[0],2)
            cv2.putText(frame,text,(left,top-3), cv2.FONT_HERSHEY_SIMPLEX, 0.6, colors[0], 2)
          
          elif(label=='bus'):
            cv2.rectangle(frame,(left,top),(right,bottom),colors[1],2)
            cv2.putText(frame,text,(left,top-3), cv2.FONT_HERSHEY_SIMPLEX, 0.6, colors[1], 2)
          
          elif(label=='car'):
            cv2.rectangle(frame,(left,top),(right,bottom),colors[2],2)
            cv2.putText(frame,text,(left,top-3), cv2.FONT_HERSHEY_SIMPLEX, 0.6, colors[2], 2)
          
          elif(label=='motorbike'):
            cv2.rectangle(frame,(left,top),(right,bottom),colors[3],2)
            cv2.putText(frame,text,(left,top-3), cv2.FONT_HERSHEY_SIMPLEX, 0.6, colors[3], 2)
          
          
          elif(label=='person'):
            cv2.rectangle(frame,(left,top),(right,bottom),colors[4],2)
            cv2.putText(frame,text,(left,top-3), cv2.FONT_HERSHEY_SIMPLEX, 0.6, colors[4], 2)
                    
          elif(label=='truck'):
            cv2.rectangle(frame,(left,top),(right,bottom),colors[5],2)
            cv2.putText(frame,text,(left,top-3), cv2.FONT_HERSHEY_SIMPLEX, 0.6, colors[5], 2)
            
          else:
            cv2.rectangle(frame,(left,top),(right,bottom),colors[7],2)
            cv2.putText(frame,text,(left,top-3), cv2.FONT_HERSHEY_SIMPLEX, 0.6, colors[7], 2)

          
        #%matplotlib inline
        #image = cv2.imread("predictions.jpg")
        #height, width = frame.shape[:2]
        #resized_image = cv2.resize(frame,(3*width, 3*height), interpolation = cv2.INTER_CUBIC)

        #fig = plt.gcf()
        #fig.set_size_inches(18, 10)
        #plt.axis("off")
        #plt.rcParams['figure.figsize'] = [10, 5]
        #plt.imshow(cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB))
        #plt.show()

        out.write(frame)
  
        # Display the resulting frame    
        #cv2.imshow('frame',frame)
  
        # Press Q on keyboard to stop recording
        if cv2.waitKey(1) & 0xFF == ord('q'):
          break
 
      # Break the loop
      else:
        break 
 
    # When everything done, release the video capture and video write objects
    cap.release()
    #3out.release()
 
    # Closes all the frames
    cv2.destroyAllWindows()
