## FOR WEBCAM

In [None]:
import cv2
import numpy as np

cap = cv2.VideoCapture(0) # If there is a single camera connected, just pass 0.


classNames = []
with open('coco.names', 'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')


net = cv2.dnn.readNetFromDarknet("yolov3-320.cfg", "yolov3-320.weights")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)


def findObjects(outputs,img):
    
    hT, wT, cT = img.shape
    bbox = [] # bounding box, w, h, x, y
    classIds = [] # this list contains all the class ids
    confs = [] # this list contains confidence values

    for output in outputs: # there are three outputs
        for det in output: # we check every detection in outputs
            scores = det[5:] # we need the probability scores so we are checking after 5th index
            classId = np.argmax(scores) # we find index of maximum probability scores
            confidence = scores[classId] # we find number of probability
            if confidence > 0.5: # we select bigger than 0.5
                w,h = int(det[2]*wT) , int(det[3]*hT) # width and height of box
                x,y = int((det[0]*wT) - w/2) , int((det[1]*hT) - h/2) # center of box
                bbox.append([x, y, w, h])
                classIds.append(classId)
                confs.append(float(confidence))
            
    
    indices = cv2.dnn.NMSBoxes(bbox, confs, 0.6, 0.5) 
    
    """
        bboxes          : a set of bounding boxes to apply NMS.
        scores          : a set of corresponding confidences.
        score_threshold : a threshold used to filter boxes by score.
        nms_threshold   : a threshold used in non maximum suppression.
        indices         : the kept indices of bboxes after NMS.
        eta             : a coefficient in adaptive threshold formula: nms_thresholdi+1=eta⋅nms_thresholdi.
        top_k           :if >0, keep at most top_k picked indices.
    """
    
    
    for i in indices:
        i = i[0]
        box = bbox[i]
        x,y,w,h = box[0],box[1],box[2],box[3]
        cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,255),2)
        cv2.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.6,
                    (255,0,255),2)
    
while True:
    
    success, img = cap.read()
    
    blob = cv2.dnn.blobFromImage(img, 1/255, (320,320), [0,0,0], 1, crop=False)
    
    """
        image        : input image (with 1-, 3- or 4-channels).
        size         : spatial size for output image
        mean         : scalar with mean values which are subtracted from channels. 
                       Values are intended to be in (mean-R, mean-G, mean-B) order if image has BGR ordering and 
                       swapRB is true.
        scalefactor  : multiplier for image values.
        swapRB       : flag which indicates that swap first and last channels in 3-channel image is necessary.
        crop         : flag which indicates whether image will be cropped after resize or not
        ddepth       : Depth of output blob. Choose CV_32F or CV_8U.
        
        if crop is true, input image is resized so one side after resize is equal to corresponding dimension in size 
        and another one is equal or larger. Then, crop from the center is performed. 
        If crop is false, direct resize without cropping and preserving aspect ratio is performed.
    """
    
    net.setInput(blob) # we transfer videos to network this is input value in network
    
    
    layerNames = net.getLayerNames() # we select layer names in network
    
    last_layer_index = net.getUnconnectedOutLayers() # we take index number of output layer end of the network
    
    outputNames = [layerNames[i[0] -1] for i in last_layer_index]
    
    outputs = net.forward(outputNames)

    findObjects(outputs, img)
    
    cv2.imshow('Image', img)
    
    if cv2.waitKey(25) & 0xFF == ord('q'):
        break

## FOR VIDEOS

In [None]:
import cv2
import numpy as np

cap = cv2.VideoCapture("video/pedestrians.mp4")


classNames = []
with open('coco.names', 'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')


net = cv2.dnn.readNetFromDarknet("yolov3-320.cfg", "yolov3-320.weights")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)


def findObjects(outputs,img):
    
    hT, wT, cT = img.shape
    bbox = [] # bounding box, w, h, x, y
    classIds = [] # this list contains all the class ids
    confs = [] # this list contains confidence values

    for output in outputs: # there are three outputs
        for det in output: # we check every detection in outputs
            scores = det[5:] # we need the probability scores so we are checking after 5th index
            classId = np.argmax(scores) # we find index of maximum probability scores
            confidence = scores[classId] # we find number of probability
            if confidence > 0.5: # we select bigger than 0.5
                w,h = int(det[2]*wT) , int(det[3]*hT) # width and height of box
                x,y = int((det[0]*wT) - w/2) , int((det[1]*hT) - h/2) # center of box
                bbox.append([x, y, w, h])
                classIds.append(classId)
                confs.append(float(confidence))
            
    
    indices = cv2.dnn.NMSBoxes(bbox, confs, 0.6, 0.5) 
    
    """
        bboxes          : a set of bounding boxes to apply NMS.
        scores          : a set of corresponding confidences.
        score_threshold : a threshold used to filter boxes by score.
        nms_threshold   : a threshold used in non maximum suppression.
        indices         : the kept indices of bboxes after NMS.
        eta             : a coefficient in adaptive threshold formula: nms_thresholdi+1=eta⋅nms_thresholdi.
        top_k           :if >0, keep at most top_k picked indices.
    """
    
    
    for i in indices:
        i = i[0]
        box = bbox[i]
        x,y,w,h = box[0],box[1],box[2],box[3]
        cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,255),2)
        cv2.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.6,
                    (255,0,255),2)
    
while True:
    
    success, img = cap.read()
    
    blob = cv2.dnn.blobFromImage(img, 1/255, (320,320), [0,0,0], 1, crop=False)
    
    """
        image        : input image (with 1-, 3- or 4-channels).
        size         : spatial size for output image
        mean         : scalar with mean values which are subtracted from channels. 
                       Values are intended to be in (mean-R, mean-G, mean-B) order if image has BGR ordering and 
                       swapRB is true.
        scalefactor  : multiplier for image values.
        swapRB       : flag which indicates that swap first and last channels in 3-channel image is necessary.
        crop         : flag which indicates whether image will be cropped after resize or not
        ddepth       : Depth of output blob. Choose CV_32F or CV_8U.
        
        if crop is true, input image is resized so one side after resize is equal to corresponding dimension in size 
        and another one is equal or larger. Then, crop from the center is performed. 
        If crop is false, direct resize without cropping and preserving aspect ratio is performed.
    """
    
    net.setInput(blob) # we transfer videos to network this is input value in network
    
    
    layerNames = net.getLayerNames() # we select layer names in network
    
    last_layer_index = net.getUnconnectedOutLayers() # we take index number of output layer end of the network
    
    outputNames = [layerNames[i[0] -1] for i in last_layer_index]
    
    outputs = net.forward(outputNames)

    findObjects(outputs, img)
    
    cv2.imshow('Image', img)
    
    if cv2.waitKey(25) & 0xFF == ord('q'):
        break

## FOR IMAGES

In [None]:
import cv2
import numpy as np

img = cv2.imread("images/busy_street.jpg")
#img = cv2.resize(img, None, fx=0.3, fy=0.3)


classNames = []
with open('coco.names', 'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')


net = cv2.dnn.readNetFromDarknet("yolov3-320.cfg", "yolov3-320.weights")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)


def findObjects(outputs,img):
    
    hT, wT, cT = img.shape
    bbox = [] # bounding box, w, h, x, y
    classIds = [] # this list contains all the class ids
    confs = [] # this list contains confidence values

    for output in outputs: # there are three outputs
        for det in output: # we check every detection in outputs
            scores = det[5:] # we need the probability scores so we are checking after 5th index
            classId = np.argmax(scores) # we find index of maximum probability scores
            confidence = scores[classId] # we find number of probability
            if confidence > 0.5: # we select bigger than 0.5
                w,h = int(det[2]*wT) , int(det[3]*hT) # width and height of box
                x,y = int((det[0]*wT) - w/2) , int((det[1]*hT) - h/2) # center of box
                bbox.append([x, y, w, h])
                classIds.append(classId)
                confs.append(float(confidence))
            
    
    indices = cv2.dnn.NMSBoxes(bbox, confs, 0.6, 0.5) 
    
    """
        bboxes          : a set of bounding boxes to apply NMS.
        scores          : a set of corresponding confidences.
        score_threshold : a threshold used to filter boxes by score.
        nms_threshold   : a threshold used in non maximum suppression.
        indices         : the kept indices of bboxes after NMS.
        eta             : a coefficient in adaptive threshold formula: nms_thresholdi+1=eta⋅nms_thresholdi.
        top_k           :if >0, keep at most top_k picked indices.
    """
    
    
    for i in indices:
        i = i[0]
        box = bbox[i]
        x,y,w,h = box[0],box[1],box[2],box[3]
        cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,255),2)
        cv2.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.6,
                    (255,0,255),2)
    
while True:
    
 
    blob = cv2.dnn.blobFromImage(img, 1/255, (320,320), [0,0,0], 1, crop=False)
    
    """
        image        : input image (with 1-, 3- or 4-channels).
        size         : spatial size for output image
        mean         : scalar with mean values which are subtracted from channels. 
                       Values are intended to be in (mean-R, mean-G, mean-B) order if image has BGR ordering and 
                       swapRB is true.
        scalefactor  : multiplier for image values.
        swapRB       : flag which indicates that swap first and last channels in 3-channel image is necessary.
        crop         : flag which indicates whether image will be cropped after resize or not
        ddepth       : Depth of output blob. Choose CV_32F or CV_8U.
        
        if crop is true, input image is resized so one side after resize is equal to corresponding dimension in size 
        and another one is equal or larger. Then, crop from the center is performed. 
        If crop is false, direct resize without cropping and preserving aspect ratio is performed.
    """
    
    net.setInput(blob) # we transfer videos to network this is input value in network
    
    
    layerNames = net.getLayerNames() # we select layer names in network
    
    last_layer_index = net.getUnconnectedOutLayers() # we take index number of output layer end of the network
    
    outputNames = [layerNames[i[0] -1] for i in last_layer_index]
    
    outputs = net.forward(outputNames)

    findObjects(outputs, img)
    
    cv2.imshow('Image', img)
    
    cv2.waitKey(0)
    
    break