In [1]:
import cv2
import numpy as np
import sys

In [2]:
model = './yolo_v3/yolov3.weights'
config = './yolo_v3/yolov3.cfg'
class_labels = './yolo_v3/coco.names.txt'

In [3]:
## 테스트 영상
img_files = ['./yolo_v3/fig/dog.jpg',
            './yolo_v3/fig/kite.jpg',
            './yolo_v3/fig/person.jpg',
            './yolo_v3/fig/sheep.jpg']
print(img_files)

['./yolo_v3/fig/dog.jpg', './yolo_v3/fig/kite.jpg', './yolo_v3/fig/person.jpg', './yolo_v3/fig/sheep.jpg']


In [4]:
net = cv2.dnn.readNet(model, config)

if net.empty():
    print('Net open failed')
    sys.exit()

In [5]:
classes = []

with open(class_labels, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

classes

['person',
 'bicycle',
 'car',
 'motorbike',
 'aeroplane',
 'bus',
 'train',
 'truck',
 'boat',
 'traffic light',
 'fire hydrant',
 'stop sign',
 'parking meter',
 'bench',
 'bird',
 'cat',
 'dog',
 'horse',
 'sheep',
 'cow',
 'elephant',
 'bear',
 'zebra',
 'giraffe',
 'backpack',
 'umbrella',
 'handbag',
 'tie',
 'suitcase',
 'frisbee',
 'skis',
 'snowboard',
 'sports ball',
 'kite',
 'baseball bat',
 'baseball glove',
 'skateboard',
 'surfboard',
 'tennis racket',
 'bottle',
 'wine glass',
 'cup',
 'fork',
 'knife',
 'spoon',
 'bowl',
 'banana',
 'apple',
 'sandwich',
 'orange',
 'broccoli',
 'carrot',
 'hot dog',
 'pizza',
 'donut',
 'cake',
 'chair',
 'sofa',
 'pottedplant',
 'bed',
 'diningtable',
 'toilet',
 'tvmonitor',
 'laptop',
 'mouse',
 'remote',
 'keyboard',
 'cell phone',
 'microwave',
 'oven',
 'toaster',
 'sink',
 'refrigerator',
 'book',
 'clock',
 'vase',
 'scissors',
 'teddy bear',
 'hair drier',
 'toothbrush']

In [6]:
# 색상을 랜덤하게 채워주는 코드
colors = np.random.uniform(0, 255, size = (80, 3))

print(colors)

[[ 94.12224811 158.46464905   0.66759641]
 [214.68587324   7.71591695 209.08297186]
 [141.36310433 204.05799382 176.68481301]
 [230.06862206 106.40446117 113.02022982]
 [127.8770135  162.13839429 184.18481292]
 [143.60476585 176.01189833 193.5267615 ]
 [ 91.76818599  54.48644566 169.39118272]
 [164.61531604 217.70640152 166.89958631]
 [187.92972776  88.04569845 128.1862838 ]
 [ 51.66552733 102.62539335 169.37206668]
 [ 97.70433021  41.88994658 105.46270162]
 [121.86269297  63.73388111 217.76769741]
 [ 61.12340805  80.91965014 180.90537117]
 [119.55179114   4.60639668  88.84649428]
 [111.01261216  65.15980552 107.52094878]
 [ 58.02700179 128.16907048 250.26319277]
 [252.94082243 174.74611514 115.59994411]
 [215.32827578 243.58301575 114.55896654]
 [ 67.28299067 234.65224035  74.32596967]
 [155.69005135  59.70068989  76.53120048]
 [199.85057686 141.94664822 251.28409182]
 [246.81820895 246.06062484 165.92601177]
 [166.05015677 220.13953224 119.56361507]
 [ 50.12802444 118.72657652  86.92

In [7]:
layer_names = net.getLayerNames()
layer_names

('conv_0',
 'bn_0',
 'leaky_1',
 'conv_1',
 'bn_1',
 'leaky_2',
 'conv_2',
 'bn_2',
 'leaky_3',
 'conv_3',
 'bn_3',
 'leaky_4',
 'shortcut_4',
 'conv_5',
 'bn_5',
 'leaky_6',
 'conv_6',
 'bn_6',
 'leaky_7',
 'conv_7',
 'bn_7',
 'leaky_8',
 'shortcut_8',
 'conv_9',
 'bn_9',
 'leaky_10',
 'conv_10',
 'bn_10',
 'leaky_11',
 'shortcut_11',
 'conv_12',
 'bn_12',
 'leaky_13',
 'conv_13',
 'bn_13',
 'leaky_14',
 'conv_14',
 'bn_14',
 'leaky_15',
 'shortcut_15',
 'conv_16',
 'bn_16',
 'leaky_17',
 'conv_17',
 'bn_17',
 'leaky_18',
 'shortcut_18',
 'conv_19',
 'bn_19',
 'leaky_20',
 'conv_20',
 'bn_20',
 'leaky_21',
 'shortcut_21',
 'conv_22',
 'bn_22',
 'leaky_23',
 'conv_23',
 'bn_23',
 'leaky_24',
 'shortcut_24',
 'conv_25',
 'bn_25',
 'leaky_26',
 'conv_26',
 'bn_26',
 'leaky_27',
 'shortcut_27',
 'conv_28',
 'bn_28',
 'leaky_29',
 'conv_29',
 'bn_29',
 'leaky_30',
 'shortcut_30',
 'conv_31',
 'bn_31',
 'leaky_32',
 'conv_32',
 'bn_32',
 'leaky_33',
 'shortcut_33',
 'conv_34',
 'bn_34',
 'l

In [9]:
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
output_layers                          # 끊어진 인덱스의 한 단계 전 layer가 우리가 사용하고 싶은 layer

['yolo_82', 'yolo_94', 'yolo_106']

In [18]:
confThreshold = 0.5
nmsThreshold = 0.4

In [28]:
for i in img_files:
    img = cv2.imread(i)

    if img is None:
        continue

    # 블롭 생성 & 추론
    blob = cv2.dnn.blobFromImage(img, 1/255., (320, 320), swapRB=True)
    net.setInput(blob)
    outs = net.forward(output_layers) #

    # outs는 3개의 ndarray 리스트.
    # outs[0].shape=(507, 85), 13*13*3=507
    # outs[1].shape=(2028, 85), 26*26*3=2028
    # outs[2].shape=(8112, 85), 52*52*3=8112

    h, w = img.shape[:2]

    class_ids = []
    confidences = []
    boxes = []

    for out in outs:
        for detection in out:
            # detection: 4(bounding box) + 1(objectness_score) + 80(class confidence)
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > confThreshold:
                # 바운딩 박스 중심 좌표 & 박스 크기
                cx = int(detection[0] * w)
                cy = int(detection[1] * h)
                bw = int(detection[2] * w)
                bh = int(detection[3] * h)

                # 바운딩 박스 좌상단 좌표
                sx = int(cx - bw / 2)
                sy = int(cy - bh / 2)

                boxes.append([sx, sy, bw, bh])
                confidences.append(float(confidence))
                class_ids.append(int(class_id))

    # 비최대 억제, Non Max Suppression
#     https://www.visiongeek.io/2018/07/yolo-object-detection-opencv-python.html
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)

    for i in indices:

        sx, sy, bw, bh = boxes[i]
        label = f'{classes[class_ids[i]]}: {confidences[i]:.2}'
        color = colors[class_ids[i]]
        cv2.rectangle(img, (sx, sy, bw, bh), color, 2)
        cv2.putText(img, label, (sx, sy - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2, cv2.LINE_AA)

    t, _ = net.getPerfProfile()
    
#  net.getPerfProfile(): Returns overall time for inference and timings (in ticks) for layers
#  cv2.getTickFrequency(): Returns the number of ticks per second.

#     label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
    label = 'Inference time: {:.3f} ms'.format(t * 1000.0 / cv2.getTickFrequency())

    cv2.putText(img, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                0.7, (0, 0, 255), 2, cv2.LINE_AA) 

    cv2.imshow('img', img)
    cv2.waitKey()

cv2.destroyAllWindows()

In [29]:
print(outs[0].shape)
print(outs[1].shape)
print(outs[2].shape)

(300, 85)
(1200, 85)
(4800, 85)


In [None]:
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
output_layers

In [3]:
import cv2
import numpy as np
import sys

confThreshold = 0.5
nmsThreshold = 0.4

model = './yolo_v3/yolov3.weights'
config = './yolo_v3/yolov3.cfg'
class_labels = './yolo_v3/coco.names.txt'

net = cv2.dnn.readNet(model, config)

if net.empty():
    print('Net open failed')
    sys.exit()
    
classes = []

with open(class_labels, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print('cam open failed')
    sys.exit()

cv2.namedWindow('frame', cv2.WINDOW_AUTOSIZE)

while True:
    ret, frame = cap.read()
    
    if ret is None:
        print('Video read failed')
        break
        
    blob = cv2.dnn.blobFromImage(frame, 1/255., (320, 320), swapRB=True)
    net.setInput(blob)
    outs = net.forward()
    
    h, w = frame.shape[:2]
    
    class_ids = []
    confidences = []
    boxes = []
    
    for out in outs:
        scores = out[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        
        if confidence > confThreshold:
            # 바운딩 박스 중심 좌표 & 박스 크기
            cx = int(out[0] * w)
            cy = int(out[1] * h)
            bw = int(out[2] * w)
            bh = int(out[3] * h)

            # 바운딩 박스 좌상단 좌표
            sx = int(cx - bw / 2)
            sy = int(cy - bh / 2)

            boxes.append([sx, sy, bw, bh])
            confidences.append(float(confidence))
            class_ids.append(int(class_id))

    indices = cv2.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)

    for i in indices:

        sx, sy, bw, bh = boxes[i]
        label = f'{classes[class_ids[i]]}: {confidences[i]:.2}'
        color = colors[class_ids[i]]
        cv2.rectangle(img, (sx, sy, bw, bh), color, 2)
        cv2.putText(img, label, (sx, sy - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2, cv2.LINE_AA)

    t, _ = net.getPerfProfile()

    label = 'Inference time: {:.3f} ms'.format(t * 1000.0 / cv2.getTickFrequency())

    cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                0.7, (0, 0, 255), 2, cv2.LINE_AA) 

    cv2.imshow('frame', frame)
    cv2.waitKey(30)
    
cap.release()
cv2.destroyAllWindows()

error: OpenCV(4.5.5) D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\layers\concat_layer.cpp:104: error: (-201:Incorrect size of input array) Inconsistent shape for ConcatLayer in function 'cv::dnn::ConcatLayerImpl::getMemoryShapes'
