In [1]:
import cv2
import numpy as np
import pandas as pd

In [2]:
from os import listdir
from os.path import isfile, join, basename
import random
import glob

path = "../data/main_task_data/kitchen/"
images_names = glob.glob(path + "*.jpg")
# images_names = [f for f in listdir(path) if isfile(join(path, f))]

In [3]:
with open("./yolo/yolov3.txt", 'r') as f:
    classes = [line.strip() for line in f.readlines()]
    
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))

In [4]:
# c = classes.copy()
# c.sort()
# for i in c:
#     print(i)

In [5]:
net = cv2.dnn.readNet("./yolo/yolov3.weights", "./yolo/yolov3.cfg")

In [6]:
# function to get the output layer names 
# in the architecture
def get_output_layers(net):
    
    layer_names = net.getLayerNames()
    
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    return output_layers

# function to draw bounding box on the detected object with class name
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):

    label = str(classes[class_id])

    color = COLORS[class_id]

    cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)

    cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

In [7]:
skyhack_classes = ["couch", "chair", "refrigerator", "toilet"]
# yolo_for_sky_outs = pd.DataFrame(columns=skyhack_classes)
yolo_for_sky_out_dict = {"filename":"None" ,"couch": 0, "chair": 0, "refrigerator": 0, "toilet": 0}
yolo_for_sky_outs = pd.DataFrame(columns=skyhack_classes)

In [8]:
# run inference through the network
# and gather predictions from output layers
#outs = net.forward(get_output_layers(net))

# initialization
#class_ids = []
#confidences = []
#boxes = []
#conf_threshold = 0.5
#nms_threshold = 0.4

# for each detetion from each output layer 
# get the confidence, class id, bounding box params
# and ignore weak detections (confidence < 0.5)
#new_predicted_label = yolo_for_sky_out_dict.copy()
#for out in outs:
#    for detection in out:
#        scores = detection[5:]
#        class_id = np.argmax(scores)
#        confidence = scores[class_id]
#        if classes[class_id] in skyhack_classes: 
#            if confidence > 0.5 :
#                center_x = int(detection[0] * width)
#                center_y = int(detection[1] * height)
#                w = int(detection[2] * width)
#                h = int(detection[3] * height)
#                x = center_x - w / 2
#                y = center_y - h / 2
#                class_ids.append(class_id)
#                confidences.append(float(confidence))
#                boxes.append([x, y, w, h])
#                new_predicted_label[classes[class_id]] = 1
#yolo_for_sky_outs = yolo_for_sky_outs.append(new_predicted_label, ignore_index=True)
#yolo_for_sky_outs

In [9]:
for file in images_names[0:100]:
    image = cv2.imread(file)

    width = image.shape[1]
    height = image.shape[0]
    
    blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416))
    net.setInput(blob)
    
    outs = net.forward(get_output_layers(net))
    
    # initialization
    class_ids = []
    confidences = []
    boxes = []
    conf_threshold = 0.5
    nms_threshold = 0.4
    
    # for each detetion from each output layer 
    # get the confidence, class id, bounding box params
    # and ignore weak detections (confidence < 0.5)
    new_predicted_label = yolo_for_sky_out_dict.copy()
    new_predicted_label["filename"] = basename(file)
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if classes[class_id] in skyhack_classes: 
                if confidence > 0.5 :
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)
                    x = center_x - w / 2
                    y = center_y - h / 2
                    class_ids.append(class_id)
                    confidences.append(float(confidence))
                    boxes.append([x, y, w, h])
                    new_predicted_label[classes[class_id]] = 1
    yolo_for_sky_outs = yolo_for_sky_outs.append(new_predicted_label, ignore_index=True)
yolo_for_sky_outs

Unnamed: 0,couch,chair,refrigerator,toilet,filename
0,0,0,0,0,4C6022BE5337FFFA80CEE46BFEF3FEDB3027B6AD.jpg
1,0,0,1,0,B865AE190DD591890B04178499CC1E8E8EE3744E.jpg
2,0,0,0,0,B4C4F4C30D29D867850CCA180C1CEFFB459BB5A1.jpg
3,0,1,0,0,30E0F7C4D7C4FDC159EE5ED2DACBEA64265AA5DB.jpg
4,0,1,1,0,AB7F820A5C99687DA079DEFBFF7D3246D10DE0B7.jpg
5,0,1,0,0,F3B3EC13026C70E0C9294C665FD078E2D31F10EF.jpg
6,0,0,0,0,E36E6D491668A24E3554B9DAABEC5EFFE04AD104.jpg
7,0,0,0,0,A5AF7620A92C42A90D4CC3A5EA27C1934B778F55.jpg
8,0,1,0,0,8F8B29518A7A7B76FEA00ED1B5C226ECD221B789.jpg
9,0,0,0,0,3EEDD21C8BFC352E56357520041BBC66F97776BB.jpg


In [10]:
# apply non-max suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

# go through the detections remaining
# after nms and draw bounding box
for i in indices:
    i = i[0]
    box = boxes[i]
    x = box[0]
    y = box[1]
    w = box[2]
    h = box[3]
    
    draw_bounding_box(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))

# display output image    
img = cv2.imshow("object detection", image)

# wait until any key is pressed
cv2.waitKey()
    
 # save output image to disk
# cv2.imwrite("object-detection.jpg", image)

# release resources
cv2.destroyAllWindows()

In [11]:
labels = pd.read_csv("./labels.csv")
labels = labels[['filename', 'Couch', 'Chair', 'Refrigerator', 'Toilet']]
i = 0
index_list = []
for l in labels['filename']:
    for k in yolo_for_sky_outs["filename"]:
        if k == l:
            index_list.append(i)
                
    i+=1
index_list

labels = labels.loc[index_list]
labels.sort_values(axis=0, by='filename', inplace=True)
yolo_for_sky_outs.sort_values(axis=0, by='filename', inplace=True)

correct = 0
for j in range(len(index_list)):
    if yolo_for_sky_outs.iloc[j]['couch'] == labels.iloc[j]['Couch']:
            correct += 1 / 4
    if yolo_for_sky_outs.iloc[j]['chair'] == labels.iloc[j]['Chair'] :
        correct += 1 / 4
    if yolo_for_sky_outs.iloc[j]['refrigerator'] == labels.iloc[j]['Refrigerator']:
        correct += 1 / 4
    if yolo_for_sky_outs.iloc[j]['toilet'] == labels.iloc[j]['Toilet']:
        correct += 1 / 4
        
print(correct / len(index_list))

0.8675


True