In [23]:
import cv2
import numpy as np

cap = cv2.VideoCapture(0)

### Read and the split labels

In [3]:
label_file = open("model/labels.txt", 'r')
labels = [word.replace('"','').replace("'",'') for word in label_file.read().split(',')]
label_file.close()
print(len(labels),labels)
print(labels[0])

80 ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'trafficlight', 'firehydrant', 'stopsign', 'parkingmeter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sportsball', 'kite', 'baseballbat', 'baseballglove', 'skateboard', 'surfboard', 'tennisracket', 'bottle', 'wineglass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hotdog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cellphone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddybear', 'hairdrier', 'toothbrush']
person


### Create color list for bounding boxes

In [4]:
colors = ["0,255,0","0,0,255","255,0,0","0,120,30","0,30,120","50,50,50","50,0,50","50,50,100"]
colors = [np.array(color.split(",")).astype("int") for color in colors]
colors = np.array(colors)
colors = np.tile(colors,(10,1)) #copying color list 10 times to fill the array with same numbers vertically
len(colors)

80

### Read the model and layers

In [6]:
model = cv2.dnn.readNetFromDarknet("model/yolov3.cfg","model/yolov3.weights")
layers = model.getLayerNames()
output_layers = [layers[int(layer)-1] for layer in model.getUnconnectedOutLayers()]

### Stream and detect object on video camera

In [28]:
while True:
    ret, frame = cap.read()
    frame_width = frame.shape[1]
    frame_height = frame.shape[0]

    frame_blob = cv2.dnn.blobFromImage(frame, 1/255, (416,416), swapRB=True,crop=False)
    model.setInput(frame_blob)
    detection_layers = model.forward(output_layers)

    ids_list = []
    boxes_list = []
    confidences_list = []
    for detection_layer in detection_layers:
        for object_detection in detection_layer:
            scores = object_detection[5:]
            predicted_id = np.argmax(scores)
            confidence = scores[predicted_id]
            if confidence > 0.20: #draw bounding box if confidence is higher than ..
                label = labels[predicted_id]
                bounding_box = object_detection[0:4] * np.array([frame_width,frame_height,frame_width,frame_height])
                (box_center_x, box_center_y,box_width, box_height) = bounding_box.astype("int")

                start_x = int(box_center_x - (box_width/2))
                start_y = int(box_center_y - (box_height/2))

                ## non-maximum surpression step 2 ##
                ids_list.append(predicted_id)
                confidences_list.append(float(confidence))
                boxes_list.append([start_x,start_y,int(box_width),int(box_height)])
                ## non-maximum surpression step 2 ##

    ## non-maximum surpression step 3 ##
    max_ids = cv2.dnn.NMSBoxes(boxes_list, confidences_list, 0.5, 0.4)
    for max_id in max_ids:
        max_class_id = max_id[0]
        box = boxes_list[max_class_id]
        start_x = box[0]
        start_y = box[1]
        box_width = box[2]
        box_height = box[3]

        predicted_id = ids_list[max_class_id]
        label = labels[predicted_id]
        confidence = confidences_list[max_class_id]
    ## non-maximum surpression step 3 ##

        end_x = start_x + box_width
        end_y = start_y + box_height

        box_color = colors[predicted_id]
        box_color = [int(each) for each in box_color]

        label = "{}: {:.2f}%".format(label,confidence*100)
        print("predicted object: {}".format(label))

        cv2.rectangle(frame, (start_x, start_y),(end_x, end_y),box_color,2)
        cv2.putText(frame, label, (start_x, start_y-10),cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color,1)
    cv2.imshow("Detection window", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(0)

predicted object: person: 76.65%
predicted object: person: 97.93%
predicted object: person: 99.99%
predicted object: person: 99.98%
predicted object: person: 99.96%
predicted object: person: 99.77%
predicted object: person: 99.46%
predicted object: person: 99.66%
predicted object: person: 99.87%
predicted object: person: 99.82%
predicted object: person: 99.84%
predicted object: person: 99.90%
predicted object: person: 99.90%
predicted object: person: 99.71%
predicted object: cup: 99.03%
predicted object: person: 99.71%
predicted object: cup: 88.04%
predicted object: cup: 99.64%
predicted object: person: 99.59%
predicted object: cup: 98.82%
predicted object: person: 98.42%
predicted object: cup: 98.58%
predicted object: person: 86.44%
predicted object: cup: 99.52%
predicted object: person: 97.54%
predicted object: cup: 99.70%
predicted object: person: 97.71%
predicted object: cup: 99.80%
predicted object: person: 99.12%
predicted object: person: 99.71%
predicted object: cup: 95.85%
pred

predicted object: cup: 99.72%
predicted object: cellphone: 98.96%
predicted object: person: 97.76%
predicted object: cellphone: 60.07%
predicted object: cup: 99.73%
predicted object: cellphone: 98.77%
predicted object: person: 98.39%
predicted object: cup: 99.77%
predicted object: person: 98.30%
predicted object: cellphone: 97.50%
predicted object: cellphone: 60.47%
predicted object: cup: 99.74%
predicted object: cellphone: 98.35%
predicted object: person: 97.40%
predicted object: cup: 99.76%
predicted object: cellphone: 97.22%
predicted object: person: 96.76%
predicted object: cup: 99.74%
predicted object: person: 98.99%
predicted object: cellphone: 95.37%
predicted object: cup: 99.66%
predicted object: person: 98.63%
predicted object: cellphone: 98.13%
predicted object: cup: 99.49%
predicted object: person: 98.81%
predicted object: cellphone: 97.65%
predicted object: cup: 99.65%
predicted object: cellphone: 98.01%
predicted object: person: 97.89%
predicted object: cup: 99.79%
predict

-1

### Stream and detect object on video camera (inline)

In [None]:
from PIL import Image
import cv2 
from IPython.display import display
from IPython.display import clear_output
try:
    while True:
        ret, frame = cap.read()
        frame_width = frame.shape[1]
        frame_height = frame.shape[0]

        frame_blob = cv2.dnn.blobFromImage(frame, 1/255, (416,416), swapRB=True,crop=False)
        model.setInput(frame_blob)
        detection_layers = model.forward(output_layers)

        ids_list = []
        boxes_list = []
        confidences_list = []
        for detection_layer in detection_layers:
            for object_detection in detection_layer:
                scores = object_detection[5:]
                predicted_id = np.argmax(scores)
                confidence = scores[predicted_id]
                if confidence > 0.20: #draw bounding box if confidence is higher than ..
                    label = labels[predicted_id]
                    bounding_box = object_detection[0:4] * np.array([frame_width,frame_height,frame_width,frame_height])
                    (box_center_x, box_center_y,box_width, box_height) = bounding_box.astype("int")

                    start_x = int(box_center_x - (box_width/2))
                    start_y = int(box_center_y - (box_height/2))

                    ## non-maximum surpression step 2 ##
                    ids_list.append(predicted_id)
                    confidences_list.append(float(confidence))
                    boxes_list.append([start_x,start_y,int(box_width),int(box_height)])
                    ## non-maximum surpression step 2 ##

        ## non-maximum surpression step 3 ##
        max_ids = cv2.dnn.NMSBoxes(boxes_list, confidences_list, 0.5, 0.4)
        for max_id in max_ids:
            max_class_id = max_id[0]
            box = boxes_list[max_class_id]
            start_x = box[0]
            start_y = box[1]
            box_width = box[2]
            box_height = box[3]

            predicted_id = ids_list[max_class_id]
            label = labels[predicted_id]
            confidence = confidences_list[max_class_id]
        ## non-maximum surpression step 3 ##

            end_x = start_x + box_width
            end_y = start_y + box_height

            box_color = colors[predicted_id]
            box_color = [int(each) for each in box_color]

            label = "{}: {:.2f}%".format(label,confidence*100)
            print("predicted object: {}".format(label))

            cv2.rectangle(frame, (start_x, start_y),(end_x, end_y),box_color,2)
            cv2.putText(frame, label, (start_x, start_y-10),cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color,1)
        #cv2.imshow("Detection window", frame)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Converting BGR to RGB
        display(Image.fromarray(frame))
        clear_output(wait=True)
        #if cv2.waitKey(1) & 0xFF == ord("q"):
            #break
#cap.release()
#cv2.destroyAllWindows()
#cv2.waitKey(0)