# Tests for Object detection with OpenCV
the following notebook contains some test with different model/images for object detection. 

In [10]:
import time
import cv2
import numpy as np
from matplotlib import pyplot as plt
print("OpenCV: "+ str(cv2.__version__))
print("IMPORTS OK")

OpenCV: 3.4.5
IMPORTS OK


In [11]:
image_path = "./img/obj.png"
#show preview image in notebook
show_image = False
WIDTH_ROBOT = 960
HEIGHT_ROBOT = 600

In [12]:
#Read image
img = cv2.imread(image_path,cv2.IMREAD_COLOR )
#calculate h and w
(h, w) = img.shape[:2]
WIDTH = w
HEIGHT = h
print("dimensions image upload: "+ str((h,w)) )
print("dimensions robot camera: "+ str((HEIGHT_ROBOT, WIDTH_ROBOT)) )
print("Same dimensions: "+str((h,w)==(HEIGHT, WIDTH)))

if show_image:
    #cv2.imshow("image",img) #not working in Jupiter notebook
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.show()

dimensions image upload: (600, 960)
dimensions robot camera: (600, 960)
Same dimensions: True


## MobileNet SSD Object Detection

In [13]:
#Load net
PROTO = "./models/MobileNetSSD_deploy.prototxt"
MODEL = "./models/MobileNetSSD_deploy.caffemodel"
net = cv2.dnn.readNetFromCaffe(PROTO, MODEL)
print("Net Loaded")

Net Loaded


In [14]:
img_obj = img.copy() #copy img

if show_image:
    print("BEFORE:")
    plt.imshow(img_obj)
    plt.show()

RESIZED_DIMENSIONS = (300, 300) # Dimensions net was trained on. 
IMG_NORM_RATIO = 0.007843 # In grayscale a pixel can range between 0 and 255
 
classes =  ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", 
            "bus", "car", "cat", "chair", "cow", 
           "diningtable",  "dog", "horse", "motorbike", "person", 
           "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

# Capture the frame's height and width
(h, w) = img_obj.shape[:2]


#FORWARD image
# Create a blob. A blob is a group of connected pixels in a binary 
# frame that share some common property (e.g. grayscale value)
# Preprocess the frame to prepare it for deep learning classification
blob = cv2.dnn.blobFromImage(cv2.resize(img_obj, RESIZED_DIMENSIONS), 
             IMG_NORM_RATIO, RESIZED_DIMENSIONS, 127.5)

# Set the input for the neural network
net.setInput(blob)
# Predict the objects in the image
neural_network_output = net.forward()
print("detected: " + str(neural_network_output.shape[2]))


#draw bounding box
# Put the bounding boxes around the detected objects
for i in np.arange(0, neural_network_output.shape[2]):
    confidence = neural_network_output[0, 0, i, 2]
    # Confidence must be at least x%       
    if confidence > 0.20:
        idx = int(neural_network_output[0, 0, i, 1])

        bounding_box = neural_network_output[0, 0, i, 3:7] * np.array(
            [w, h, w, h])

        (startX, startY, endX, endY) = bounding_box.astype("int")

        label = "{}: {:.2f}%".format(classes[idx], confidence * 100) 

        cv2.rectangle(img_obj, (startX, startY), (
            endX, endY), (255,0,0), 2)     

        y = startY - 15 if startY - 15 > 15 else startY + 15    

        cv2.putText(img_obj, label, (startX, y),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)

if show_image:
    print("RESULT:")       
    plt.imshow(img_obj)
    plt.show()

#SAVE
cv2.imwrite("./img/save.png",img_obj)

detected: 1


True

## Yolo Obj Detector


In [15]:
#Load net
modelConfig  = "./models/yolov3-openimages.cfg"
modelWeigths = "./models/yolov3-openimages.weights"
net = cv2.dnn.readNetFromDarknet(modelConfig, modelWeigths)
print("Net Loaded")

Net Loaded


In [16]:
img_yolo = img.copy() #copy img

if show_image:
    print("BEFORE:")
    plt.imshow(img_yolo)
    plt.show()

with open('./models/open_images.names', 'r') as f:
    classes = f.read().splitlines()
print("classes: " + str(len(classes)))


net = cv2.dnn.readNet(modelWeigths, modelConfig)

# create input blob 

scale = 0.00392
blob = cv2.dnn.blobFromImage(img_yolo, scale, (416,416), (0,0,0), True, crop=False)

# set input blob for the network
net.setInput(blob)

# function to get the output layer names 
# in the architecture
def get_output_layers(net):
    
    layer_names = net.getLayerNames()
    
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    return output_layers

# function to draw bounding box on the detected object with class name
def draw_bounding_box(img_yolo, class_id, confidence, x, y, x_plus_w, y_plus_h):

    label = str(classes[class_id])

    #color = COLORS[class_id]
    color = (255,0,0)

    cv2.rectangle(img_yolo, (x,y), (x_plus_w,y_plus_h), color, 2)

    cv2.putText(img_yolo, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    
    
# run inference through the network
# and gather predictions from output layers
outs = net.forward(get_output_layers(net))

# initialization
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.9
nms_threshold = 0.9

# for each detetion from each output layer 
# get the confidence, class id, bounding box params
# and ignore weak detections (confidence < 0.5)
for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0] * WIDTH)
            center_y = int(detection[1] * HEIGHT)
            w = int(detection[2] * WIDTH)
            h = int(detection[3] * HEIGHT)
            x = center_x - w / 2
            y = center_y - h / 2
            class_ids.append(class_id)
            confidences.append(float(confidence))
            boxes.append([x, y, w, h])

# apply non-max suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

# go through the detections remaining
# after nms and draw bounding box
for i in indices:
    i = i[0]
    box = boxes[i]
    x = box[0]
    y = box[1]
    w = box[2]
    h = box[3]
    
    draw_bounding_box(img_yolo, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))
'''
#previously ###########
blob = cv2.dnn.blobFromImage(img_yolo, 1 / 255.0, (416, 416),
        swapRB=True, crop=False)

# Set the input for the neural network
net.setInput(blob)
# Predict the objects in the image
neural_network_output = net.forward()
print("detected: " + str(neural_network_output))
'''
#TODO FIND A WAY TO MAKE IT WORK WITH OPENCV 3 OR OPENCV4

if show_image:
    print("RESULT:")
    plt.imshow(img_yolo)
    plt.show()

#SAVE
cv2.imwrite("./img/save.png",img_yolo)

classes: 500


True

## YOLO 

In [17]:
#Load net
modelConfig  = "./models/yolov3-openimages.cfg"
modelWeigths = "./models/yolov3-openimages.weights"
net = cv2.dnn.readNetFromDarknet(modelConfig, modelWeigths)
print("Net Loaded")

Net Loaded


In [18]:
img_yolo = img.copy() #copy img

if show_image:
    print("BEFORE:")
    plt.imshow(img_yolo)
    plt.show()

with open('./models/open_images.names', 'r') as f:
    classes = f.read().splitlines()
print("classes: " + str(len(classes)))
labels = classes
# Setting minimum probability to eliminate weak predictions
probability_minimum = 0.1

# Setting threshold for non maximum suppression
threshold = 0.1

#from https://www.kaggle.com/code/valentynsichkar/yolo-v3-with-opencv/notebook

network = cv2.dnn.readNetFromDarknet(modelConfig, modelWeigths)

# Getting names of all layers
layers_names_all = network.getLayerNames()  # list of layers' names


# Getting only output layers' names that we need from YOLO algorithm
layers_names_output = [layers_names_all[i[0] - 1] for i in network.getUnconnectedOutLayers()]  # list of layers' names

# Check point
print(layers_names_output)  # ['yolo_82', 'yolo_94', 'yolo_106']

# Getting image shape
image_input_shape = img_yolo.shape

# Check point
print(image_input_shape)  # tuple of (h, w, 3)

if show_image:
    plt.rcParams['figure.figsize'] = (10.0, 10.0)
    plt.imshow(cv2.cvtColor(img_yolo, cv2.COLOR_BGR2RGB))
    plt.show()


blob = cv2.dnn.blobFromImage(img_yolo, 1 / 255.0, (416, 416), swapRB=True, crop=False)

# Check point
print(img_yolo.shape)  # (917, 1222, 3)
print(blob.shape)  # (1, 3, 416, 416)

blob_to_show = blob[0, :, :, :].transpose(1, 2, 0)
print(blob_to_show.shape)  # (416, 416, 3)

if show_image:
    # Showing 'blob_to_show'
    plt.rcParams['figure.figsize'] = (5.0, 5.0)
    plt.imshow(blob_to_show)
    plt.show()


# Calculating at the same time, needed time for forward pass
network.setInput(blob)  # setting blob as input to the network
start = time.time()
output_from_network = network.forward(layers_names_output)
end = time.time()

# Showing spent time for forward pass
print('YOLO v3 took {:.5f} seconds'.format(end - start))

# Check point
print(type(output_from_network))  # <class 'list'>
print(type(output_from_network[0]))  # <class 'numpy.ndarray'>

# Seed the generator - every time we run the code it will generate by the same rules
# In this way we can keep specific colour the same for every class
np.random.seed(42)
# randint(low, high=None, size=None, dtype='l')
colours = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')

# Check point
print(colours.shape)  # (80, 3)
print(colours[0])  # [102 220 225]

# Preparing lists for detected bounding boxes, obtained confidences and class's number
bounding_boxes = []
confidences = []
class_numbers = []

# Getting spacial dimension of input image
h, w = image_input_shape[:2]  # Slicing from tuple only first two elements

# Check point
print(h, w)  # 917 1222


for result in output_from_network:
    # Going through all detections from current output layer
    for detection in result:
        # Getting class for current object
        scores = detection[5:]
        class_current = np.argmax(scores)

        # Getting confidence (probability) for current object
        confidence_current = scores[class_current]

        # Eliminating weak predictions by minimum probability
        if confidence_current > probability_minimum:
            # Scaling bounding box coordinates to the initial image size
            # YOLO data format keeps center of detected box and its width and height
            # That is why we can just elementwise multiply them to the width and height of the image
            box_current = detection[0:4] * np.array([w, h, w, h])

            # From current box with YOLO format getting top left corner coordinates
            # that are x_min and y_min
            x_center, y_center, box_width, box_height = box_current.astype('int')
            x_min = int(x_center - (box_width / 2))
            y_min = int(y_center - (box_height / 2))

            # Adding results into prepared lists
            bounding_boxes.append([x_min, y_min, int(box_width), int(box_height)])
            confidences.append(float(confidence_current))
            class_numbers.append(class_current)
            
# It is needed to make sure the data type of the boxes is 'int'
# and the type of the confidences is 'float'
# https://github.com/opencv/opencv/issues/12789
results = cv2.dnn.NMSBoxes(bounding_boxes, confidences, probability_minimum, threshold)

# Check point
# Showing labels of the detected objects
for i in range(len(class_numbers)):
    print(labels[int(class_numbers[i])])

# Saving found labels
with open('found_labels.txt', 'w') as f:
    for i in range(len(class_numbers)):
        f.write(labels[int(class_numbers[i])])
        

# Checking if there is at least one detected object
if len(results) > 0:
    # Going through indexes of results
    for i in results.flatten():
        # Getting current bounding box coordinates
        x_min, y_min = bounding_boxes[i][0], bounding_boxes[i][1]
        box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3]

        # Preparing colour for current bounding box
        colour_box_current = [int(j) for j in colours[class_numbers[i]]]

        # Drawing bounding box on the original image
        cv2.rectangle(img_yolo, (x_min, y_min), (x_min + box_width, y_min + box_height),
                      colour_box_current, 5)

        # Preparing text with label and confidence for current bounding box
        text_box_current = '{}: {:.4f}'.format(labels[int(class_numbers[i])], confidences[i])

        # Putting text with label and confidence on the original image
        cv2.putText(img_yolo, text_box_current, (x_min, y_min - 7), cv2.FONT_HERSHEY_SIMPLEX,
                    1, colour_box_current, 2)
        
if show_image:
    print("RESULT:")
    plt.rcParams['figure.figsize'] = (10.0, 10.0)
    plt.imshow(img_yolo)
    plt.show()

classes: 500
['yolo_82', 'yolo_94', 'yolo_106']
(600, 960, 3)
(600, 960, 3)
(1, 3, 416, 416)
(416, 416, 3)
YOLO v3 took 1.48017 seconds
<class 'list'>
<class 'numpy.ndarray'>
(500, 3)
[102 220 225]
600 960
Plumbing fixture
Tin can
