In [1]:
# 내 구글 드라이브에 연동
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
# 필요한 패키지와 모듈을 불러옴
import cv2
import numpy as np
import time
import io
import base64
from IPython.display import HTML

In [3]:
# Detection 하기 전에 원본 동영상을 Display
video = io.open('/content/gdrive/MyDrive/Custom_YOLO3/custom_fruit.mp4', 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<video width="50%" controls>
                <source src="data:video/mp4;base64,{0}" type="video/mp4"/>
             </video>'''.format(encoded.decode('ascii')))

Output hidden; open in https://colab.research.google.com to view.

In [4]:
file_name = '/content/gdrive/MyDrive/Custom_YOLO3/custom_fruit.mp4'  # Detection 할 원본 동영상
min_confidence = 0.5   # detection 으로 인정할 최소 확률(신뢰도) 지정
nms_threshold = 0.4    # non-max suppression threshold
output_name = 'fruit_output_video.mp4'  # Detection 된 output 동영상 
elapsed_time = 0       # 총 경과시간 초기화 

In [5]:
def detectAndDisplay(frame):
    start_time = time.time()
    img = cv2.resize(frame, None, fx=0.9, fy=0.9)
    height, width, channels = img.shape

    # YOLOv3의 Detecting model 3가지(320×320, 416×416, 608×608)
    blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), swapRB=True, crop=False)

    net.setInput(blob)
    outs = net.forward(output_layers)

    # Showing informations on the screen
    class_ids = []      # detection 한 Class id를 저장하는 배열 정의
    confidences = []    # detection 한 Class 의 신뢰도(확률)를 저장하는 배열 정의
    boxes = []          # detection 한 boxing 정보를 저장하는 배열 정의
    colors = []        # detection 한 Class id 마다 다른 컬러를 지정하는 배열 정의

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)      # detection 한 Class id
            confidence = scores[class_id]     # detection 한 Class 의 신뢰도(확률)
            if confidence > min_confidence:
                # Object detected
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # Rectangle coordinates
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])             # boxing 정보를 boxes 배열에 저장
                confidences.append(float(confidence))  # 신뢰도(확률)을 confidences 배열에 저장
                class_ids.append(class_id)             # Class id 를 class_ids 배열에 저장
                colors.append(color_lists[class_id])   # Class id 마다 다른 컬러를 배열에 저장
    
    # apply non-max suppression
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, nms_threshold)    # 박스안에 박스(노이즈)를 하나로 만들어 준다.
    font = cv2.FONT_HERSHEY_PLAIN
    for i in range(len(boxes)):
        if i in indexes:    # 노이즈가 제거된 박스만 표시해 준다.   
            x, y, w, h = boxes[i]
            label = "{}: {:.2f}".format(classes[class_ids[i]], confidences[i]*100)    # Class 이름, 신뢰도(확률) 표시 
            print(i, label)
            color = colors[i]
            cv2.rectangle(img, (x, y), (x + w, y + h), color, 1)
            cv2.rectangle(img, (x, y - 25), (x + w, y), color, -1)
            cv2.putText(img, label, (x + 2, y - 10), font, 1, (255, 255, 255), 1)
            
    process_time = time.time() - start_time
    global elapsed_time
    elapsed_time += process_time   # 총 경과시간 누적
    print("=== A frame took {:.3f} seconds".format(process_time))

    # video 를 disk 에 output 하기 위해 writer 를 초기화한다.
    global writer
    if writer is None and output_name is not None:
        fourcc = cv2.VideoWriter_fourcc(*"DIVX")
        writer = cv2.VideoWriter(output_name, fourcc, 30,
                (img.shape[1], img.shape[0]), True)
        
    # disk 에 frame 을 write 합니다.
    if writer is not None:
        writer.write(img)

In [6]:
# Load YOLO
net = cv2.dnn.readNet("/content/gdrive/MyDrive/Custom_YOLO3/backup/custom-train-yolo3_final.weights", "/content/gdrive/MyDrive/Custom_YOLO3/custom/custom-train-yolo3.cfg")
classes = []   # detection 할 Object(Class) list 배열을 정의
with open("/content/gdrive/MyDrive/Custom_YOLO3/custom/classes.names", "r") as f:
   classes = [line.strip() for line in f.readlines()]   # 80개의 Object(class)를 구분할 수 있는 Object의 이름을 classes 배열에 넣어준다.
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
color_lists = np.random.uniform(0, 255, size=(len(classes), 3))  # Object 마다 컬러를 하나씩 다르게 지정

# 원본 동영상에서 video stream을 읽어온다.
cap = cv2.VideoCapture(file_name)
writer = None
if not cap.isOpened:
    print('--(!)Error opening video capture')
    exit(0)
while True:
    ret, frame = cap.read()
    if frame is None:
        # close the video file pointers
        cap.release()
        # close the writer point
        writer.release()
        print('--(!) No captured frame -- Break!')
        print("elapsed time {:.3f} seconds".format(elapsed_time))
        break
    detectAndDisplay(frame)

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
3 Tomato: 99.60
=== A frame took 1.079 seconds
0 Apple: 99.99
1 Oriental melon: 96.67
2 Apple: 99.96
3 Tomato: 99.64
=== A frame took 1.092 seconds
0 Apple: 99.99
1 Oriental melon: 96.31
2 Apple: 99.96
3 Tomato: 99.63
=== A frame took 1.088 seconds
0 Apple: 99.99
1 Oriental melon: 94.87
2 Apple: 99.97
3 Tomato: 99.59
=== A frame took 1.092 seconds
0 Apple: 99.99
1 Oriental melon: 94.83
2 Apple: 99.96
3 Tomato: 99.52
=== A frame took 1.111 seconds
0 Apple: 99.99
1 Oriental melon: 92.14
2 Apple: 99.96
3 Tomato: 99.54
=== A frame took 1.078 seconds
0 Oriental melon: 92.99
1 Apple: 99.99
2 Apple: 99.96
3 Tomato: 99.59
=== A frame took 1.094 seconds
0 Oriental melon: 96.68
1 Apple: 99.99
2 Apple: 99.96
3 Tomato: 99.70
=== A frame took 1.090 seconds
0 Oriental melon: 96.74
1 Apple: 99.99
2 Apple: 99.96
3 Tomato: 99.65
=== A frame took 1.087 seconds
0 Oriental melon: 96.01
1 Apple: 99.99
2 Apple: 99.96
3 Tomato: 99.68
=== A frame took 1.097 se