# Tests for Object detection with OpenCV
the following notebook contains some test with different model/images for object detection. 

In [3]:
import time
import cv2
import numpy as np
from matplotlib import pyplot as plt
print("OpenCV: "+ str(cv2.__version__))
print("IMPORTS OK")

OpenCV: 4.5.3
IMPORTS OK


In [4]:
image_path = "./img/obj_top.png"
show_image = False #show preview image in notebook
WIDTH_ROBOT = 960
HEIGHT_ROBOT = 600

In [5]:
#Read image
img_original = cv2.imread(image_path,cv2.IMREAD_COLOR )
#calculate h and w
(h, w) = img_original.shape[:2]
WIDTH = w
HEIGHT = h
print("dimensions image upload: "+ str((h,w)) )
print("dimensions robot camera: "+ str((HEIGHT_ROBOT, WIDTH_ROBOT)) )
print("Same dimensions: "+str((h,w)==(HEIGHT, WIDTH)))

def imshow(image_passed, save=False):
    if show_image:
        #cv2.imshow("image",image_passed) #not working in Jupiter notebook
        plt.imshow(cv2.cvtColor(image_passed, cv2.COLOR_BGR2RGB))
        plt.show()
    if save:
        #SAVE
        cv2.imwrite("./img/save.png",image_passed)
        print("Saved")

#SHOW IMAGE        
imshow(img_original)

dimensions image upload: (600, 960)
dimensions robot camera: (600, 960)
Same dimensions: True


## MobileNet SSD Object Detection

In [4]:
#Load net
PROTO = "./models/MobileNetSSD_deploy.prototxt"
MODEL = "./models/MobileNetSSD_deploy.caffemodel"
net = cv2.dnn.readNetFromCaffe(PROTO, MODEL)
print("Net Loaded")

Net Loaded


In [5]:
img_obj = img_original.copy() #copy img

#SHOW IMAGE        
imshow(img_obj)

RESIZED_DIMENSIONS = (300, 300) # Dimensions net was trained on. 
IMG_NORM_RATIO = 0.007843 # In grayscale a pixel can range between 0 and 255
 
#pascal voc classes
classes =  ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", 
            "bus", "car", "cat", "chair", "cow", 
           "diningtable",  "dog", "horse", "motorbike", "person", 
           "pottedplant", "sheep", "sofa", "train", "tvmonitor"]


#FORWARD image
# Create a blob. A blob is a group of connected pixels in a binary 
# frame that share some common property (e.g. grayscale value)
# Preprocess the frame to prepare it for deep learning classification
blob = cv2.dnn.blobFromImage(cv2.resize(img_obj, RESIZED_DIMENSIONS), 
             IMG_NORM_RATIO, RESIZED_DIMENSIONS, 127.5)

# Set the input for the neural network
net.setInput(blob)
# Predict the objects in the image
start = time.time()
neural_network_output = net.forward()
print('Prediction took {:.5f} seconds'.format(time.time() - start))
print("Detections: " + str(neural_network_output.shape[2])) if len(neural_network_output)!=0 else print("No Detections")

conf_threshold = 0.15

#draw bounding box
# Put the bounding boxes around the detected objects
for i in np.arange(0, neural_network_output.shape[2]):
    confidence = neural_network_output[0, 0, i, 2]
    # Confidence must be at least x%       
    if confidence > conf_threshold:
        idx = int(neural_network_output[0, 0, i, 1])

        bounding_box = neural_network_output[0, 0, i, 3:7] * np.array(
            [WIDTH, HEIGHT, WIDTH, HEIGHT])

        (startX, startY, endX, endY) = bounding_box.astype("int")

        label = "{}: {:.2f}%".format(classes[idx], confidence * 100) 

        cv2.rectangle(img_obj, (startX, startY), (
            endX, endY), (255,0,0), 2)     

        y = startY - 15 if startY > 30 else startY + 15    

        cv2.putText(img_obj, label, (startX, y),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)

#SHOW IMAGE        
imshow(img_obj, save=True)

Prediction took 2.29557 seconds
Detections: 4
Saved


## YOLO V3

### COCO

In [7]:
#Load net
modelConfig  = "./models/yolov3.cfg"
modelWeigths = "./models/yolov3.weights"
net = cv2.dnn.readNetFromDarknet(modelConfig, modelWeigths)
print("Net Loaded")

with open('./models/coco.names', 'r') as f:
    classes = f.read().splitlines()
print("Classes: {}".format(len(classes)))

Net Loaded
Classes: 80


### OPEN IMAGES (OFFICIAL)
https://github.com/AlexeyAB/darknet#pre-trained-models

In [8]:
#Load net
modelConfig  = "./models/yolov3-openimages.cfg"
modelWeigths = "./models/yolov3-openimages.weights"
net = cv2.dnn.readNetFromDarknet(modelConfig, modelWeigths)
print("Net Loaded")

with open('./models/open_images_yolo.names', 'r') as f:
    classes = f.read().splitlines()
print("Classes: {}".format(len(classes)))

Net Loaded
Classes: 601


### OPEN IMAGES (SPP TRAINED)
credits: https://github.com/radekosmulski/yolo_open_images

In [9]:
#Load net
modelConfig  = "./models/yolov3-openimages-spp.cfg"
modelWeigths = "./models/yolov3-openimages-spp.weights"
net = cv2.dnn.readNetFromDarknet(modelConfig, modelWeigths)
print("Net Loaded")

with open('./models/open_images.names', 'r') as f:
    classes = f.read().splitlines()
print("Classes: {}".format(len(classes)))

Net Loaded
Classes: 500


In [10]:
img_yolo = img_original.copy() #copy img

#SHOW IMAGE        
imshow(img_yolo)

# create input blob 
blob = cv2.dnn.blobFromImage(img_yolo, 1/255, (416,416), (0,0,0), True, crop=False)

# set input blob for the network
net.setInput(blob)

# function to get the output layer names 
# in the architecture
def get_output_layers(net):
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    return output_layers

# function to draw bounding box on the detected object with class name
def draw_bounding_box(img_yolo, class_id, confidence, x, y, x_plus_w, y_plus_h):
    label = str(classes[class_id])
    # Preparing colour for current bounding box
    color = [int(j) for j in colors[class_id]]
    cv2.rectangle(img_yolo, (x,y), (x_plus_w,y_plus_h), color, 2)
    text_box_current = '{}: {:.2f}'.format(label, confidence)
    if y<5:(x,y)=(x+15, y+30) #label position not out of the image
    cv2.putText(img_yolo, text_box_current, (x-6,y-6), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,0), 2) 
    cv2.putText(img_yolo, text_box_current, (x-5,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2) 
    

# Seed the generator - every time we run the code it will generate by the same rules
# In this way we can keep specific colour the same for every class
np.random.seed(42)
colors = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8')
print("Colors generated: "+str(colors.shape[0]))
    
    
# run inference through the network
# and gather predictions from output layers
start = time.time()
outs = net.forward(get_output_layers(net))
print('Prediction took {:.5f} seconds'.format(time.time() - start))

# initialization
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.1
nms_threshold = 0.6 #lower=stronger

# for each detetion from each output layer 
# get the confidence, class id, bounding box params
# and ignore weak detections (confidence < conf_threshold)
for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > conf_threshold:
            center_x = int(detection[0] * WIDTH)
            center_y = int(detection[1] * HEIGHT)
            w = int(detection[2] * WIDTH)
            h = int(detection[3] * HEIGHT)
            x = center_x - w / 2
            y = center_y - h / 2
            class_ids.append(class_id)
            confidences.append(float(confidence))
            boxes.append([x, y, w, h])

#apply non-max suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
print("Detections: "+str(indices.shape[0])) if len(indices)!=0 else print("No Detections")

# go through the detections remaining
# after nms and draw bounding box
for i in indices:
    i = i[0]
    box = boxes[i]
    x = box[0]
    y = box[1]
    w = box[2]
    h = box[3]
    draw_bounding_box(img_yolo, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))


#SHOW IMAGE        
imshow(img_yolo, save=True)

Colors generated: 500
Prediction took 1.48988 seconds
Detections: 2
Saved


## Yolo V4 (requires OpenCV>4.3)
https://github.com/AlexeyAB/darknet#pre-trained-models

[YoloCSP](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-csp) 

(Yolo9000)[] is currently not supported by OpenCV


In [95]:
#Load net
modelConfig  = "./models/yolov4.cfg"
modelWeigths = "./models/yolov4.weights"
net = cv2.dnn.readNetFromDarknet(modelConfig, modelWeigths)
print("Net Loaded")

with open('./models/coco.names', 'r') as f:
    classes = f.read().splitlines()
print("Classes: {}".format(len(classes)))

conf_threshold = 0.1
nms_threshold = 0.7 #lower=stronger

Net Loaded
Classes: 80


In [97]:
#Load net
modelConfig  = "./models/yolov4_new.cfg"
modelWeigths = "./models/yolov4_new.weights"
net = cv2.dnn.readNetFromDarknet(modelConfig, modelWeigths)
print("Net Loaded")

with open('./models/coco.names', 'r') as f:
    classes = f.read().splitlines()
print("Classes: {}".format(len(classes)))

#suggested
conf_threshold = 0.35
nms_threshold = 0.05 #lower=stronger

Net Loaded
Classes: 80


In [88]:
#Load net
modelConfig  = "./models/yolov4-csp.cfg"
modelWeigths = "./models/yolov4-csp.weights"
net = cv2.dnn.readNetFromDarknet(modelConfig, modelWeigths)
print("Net Loaded")

with open('./models/coco.names', 'r') as f:
    classes = f.read().splitlines()
print("Classes: {}".format(len(classes)))

#suggested
conf_threshold = 0.3
nms_threshold = 0.01 #lower=stronger

Net Loaded
Classes: 80


In [93]:
#Load net
modelConfig  = "./models/yolov4x-mish.cfg"
modelWeigths = "./models/yolov4x-mish.weights"
net = cv2.dnn.readNetFromDarknet(modelConfig, modelWeigths)
print("Net Loaded")

with open('./models/coco.names', 'r') as f:
    classes = f.read().splitlines()
print("Classes: {}".format(len(classes)))

#suggested
conf_threshold = 0.3
nms_threshold = 0.01 #lower=stronger

Net Loaded
Classes: 80


In [98]:
img_yolo = img_original.copy() #copy img

#SHOW IMAGE        
imshow(img_yolo)

# create input blob 
blob = cv2.dnn.blobFromImage(img_yolo, 1/255, (416,416), (0,0,0), True, crop=False)

# set input blob for the network
net.setInput(blob)

# function to get the output layer names 
# in the architecture
def get_output_layers(net):
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    return output_layers

# function to draw bounding box on the detected object with class name
def draw_bounding_box(img_yolo, class_id, confidence, x, y, x_plus_w, y_plus_h):
    label = str(classes[class_id])
    # Preparing colour for current bounding box
    color = [int(j) for j in colors[class_id]]
    cv2.rectangle(img_yolo, (x,y), (x_plus_w,y_plus_h), color, 2)
    text_box_current = '{}: {:.2f}'.format(label, confidence)
    if y<5:(x,y)=(x+15, y+30) #label position not out of the image
    cv2.putText(img_yolo, text_box_current, (x-6,y-6), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,0), 2) 
    cv2.putText(img_yolo, text_box_current, (x-5,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2) 
    

# Seed the generator - every time we run the code it will generate by the same rules
# In this way we can keep specific colour the same for every class
np.random.seed(42)
colors = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8')
print("Colors generated: "+str(colors.shape[0]))
    
    
# run inference through the network
# and gather predictions from output layers
start = time.time()
outs = net.forward(get_output_layers(net))
print('Prediction took {:.5f} seconds'.format(time.time() - start))

# initialization
class_ids = []
confidences = []
boxes = []

# for each detetion from each output layer 
# get the confidence, class id, bounding box params
# and ignore weak detections (confidence < conf_threshold)
for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > conf_threshold:
            center_x = int(detection[0] * WIDTH)
            center_y = int(detection[1] * HEIGHT)
            w = int(detection[2] * WIDTH)
            h = int(detection[3] * HEIGHT)
            x = center_x - w / 2
            y = center_y - h / 2
            class_ids.append(class_id)
            confidences.append(float(confidence))
            boxes.append([x, y, w, h])

#apply non-max suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
print("Detections: "+str(indices.shape[0])) if len(indices)!=0 else print("No Detections")

# go through the detections remaining
# after nms and draw bounding box
for i in indices:
    i = i[0]
    box = boxes[i]
    x = box[0]
    y = box[1]
    w = box[2]
    h = box[3]
    draw_bounding_box(img_yolo, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))


#SHOW IMAGE        
imshow(img_yolo, save=True)

Colors generated: 80
Prediction took 1.35407 seconds
Detections: 19
Saved
