In [1]:
# Clone the repository.
# !git clone https://github.com/ultralytics/YOLOv5

In [2]:
# !pip install -r C:/Users/admin/Documents/GitHub/deepLearning/YOLOv5/requirements.txt
# !pip install onnx

In [3]:
# Download .pt model.
# !wget https://github.com/ultralytics/YOLOv5/releases/download/v6.1/YOLOv5s.pt

In [4]:
# !python /content/YOLOv5/export.py --weights /content/YOLOv5s.pt --include onnx

In [3]:
# 라이브러리 추가


import cv2
import numpy as np
import onnx

In [4]:
# 인자
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
SCORE_THRESHOLD = 0.5
NMS_THRESHOLD = 0.45
CONFIDENCE_THRESHOLD = 0.45

# 텍스트 파라미터
FONT_FACE = cv2.FONT_HERSHEY_PLAIN
FONT_SCALE = 0.7
THICKNESS = 1

# 색깔
BLACK = (0,0,0)
BLUE = (255,178,50)
YELLOW = (0,255,255)

In [5]:
# 문자열 출력할 때 차지할 사각형 영역 크기 정보
def draw_label(im, label, x, y):
    # 텍스트 사이즈 가져오기
    text_size = cv2.getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS)
    dim, baseline = text_size[0],  text_size[1]
    # 검은색 사각형을 만들기 위한 텍스트 사이즈 가져오기
    cv2.rectangle(im, (x,y), (x + dim[0], y + dim[1] + baseline), (0,0,0), cv2.FILLED)
    # 사각형 안에 텍스트 넣기
    cv2.putText(im, label, (x, y + dim[1]), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS, cv2.LINE_AA)

In [6]:
# v5 모델 사전 처리작업
def pre_process(input_image, net):
    # 프레임으로부터의 4차원 blob 만들기
    blob = cv2.dnn.blobFromImage(input_image, 1/255, (INPUT_WIDTH, INPUT_HEIGHT), [0,0,0], 1, crop=False)

    # 네트워크 구성하기
    net.setInput(blob)

    # 출력층의 출력값을 가져오기위한 forward pass 실행
    outputs = net.forward(net.getUnconnectedOutLayersNames())
    return outputs

# getUnconnectedOutLayersNames() 은 출력층의 이름들을 제공한다.

In [7]:
'''
    post-process steps
    1. Loop through detections
    2. Filter out good detections
    3. Get the index of the best class score
    4. Discard detections with class scores lower than the threshold value
'''

def post_process(input_image, outputs):
    # 각각의 값들을 넣기위한 리스트
    class_ids = []
    confidences = []
    boxes = []
    
    # 각 행의 값들
    rows = outputs[0].shape[1]
    image_height, image_width = input_image.shape[:2]

    # factor 리사이징하기
    x_factor = image_width / INPUT_WIDTH
    y_factor = image_height / INPUT_HEIGHT

    # detections을 위한 반복문
    for r in range(rows):
        row = outputs[0][0][r]
        confidence = row[4]
        # 낮은 detections들은 제거
        if confidence >= CONFIDENCE_THRESHOLD:
            classes_scores = row[5:]
            # class의 최고값의 인덱스 가져오기
            class_id = np.argmax(classes_scores)
            # 지정한 threshold 이상이면 계속
            if (classes_scores[class_id] > SCORE_THRESHOLD):
                confidences.append(confidence)
                class_ids.append(class_id)
                cx, cy, w, h = row[0], row[1], row[2], row[3]
                left = int((cx - w/2) * x_factor)
                top = int((cy - h/2) * y_factor)
                width = int(w * x_factor)
                height = int(h * y_factor)
                box = np.array([left, top, width, height])
                boxes.append(box)

    indices = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)

    for i in indices:
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        # 바운딩박스 그리기
        cv2.rectangle(input_image, (left, top), (left +width, top + height), BLUE, 3*THICKNESS)
        # Class lable
        label = "{}:{:.2f}".format(classes[class_ids[i]], confidences[i])
        # 라벨링
        draw_label(input_image, label, left, top)
    return input_image

In [6]:
if __name__ == '__main__':
    # class 네임 불러오기
    classesFile = 'C:/localRepository/opencv/three.names'
    classes =None
    with open(classesFile, 'rt') as f:
        classes = f.read().rstrip('\n').split('\n')

    # Load image
    # frame = cv2.imread('C:/localRepository/deepLearning/crosswalk_17_jpg.rf.10f8bfeac778e28bd47f3e53b69dcf19.jpg')
    
    # Load video
    # cap = cv2.VideoCapture('C:\localRepository\deepLearning\crosswalk.mp4')
    cap = cv2.VideoCapture(0)
    

    # 가중치 파일 네트워크 적용
    modelWeights = 'C:/localRepository/opencv/best_l.onnx'
    net = cv2.dnn.readNetFromONNX(modelWeights)
    
    while True:
        # 이미지 가공
        ret, frame = cap.read()
        detections = pre_process(frame, net)
        img = post_process(frame.copy(), detections)
        '''
        Put efficiency information. The function getPerfProfile returns
        the overall time for inference(t)
        and the timings for each of the layers(in layersTimes)
        '''
        # t, _ = net.getPerfProfile()
        # label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
        # print(label)
        cv2.putText(img, None, (20,40), FONT_FACE, FONT_SCALE, (0,0,255), THICKNESS, cv2.LINE_AA)
        cv2.imshow('Output', img)
        cv2.waitKey(1)

        k = cv2.waitKey(30) & 0xff
        if k == 27:
            break

    cap.release()
    cv2.destroyAllWindows()

In [8]:
if __name__ == '__main__':
    # Load class names
    classesFile = 'C:/localRepository/opencv/three.names'
    classes =None
    with open(classesFile, 'rt') as f:
        classes = f.read().rstrip('\n').split('\n')

    # Load image
    cap = cv2.VideoCapture('C:/localRepository/opencv/normal.mp4')
    
    # Load video
    # cap = cv2.VideoCapture('C:\localRepository\deepLearning\crosswalk.mp4')
    # cap = cv2.VideoCapture(0)
    

    # Give the weight files to the model and load the network using them
    modelWeights = 'C:/localRepository/opencv/best_m.onnx'
    net = cv2.dnn.readNetFromONNX(modelWeights)
    
    while True:
    # Process image
        ret, frame = cap.read()
        detections = pre_process(frame, net)
        img = post_process(frame.copy(), detections)
        '''
        Put efficiency information. The function getPerfProfile returns
        the overall time for inference(t)
        and the timings for each of the layers(in layersTimes)
        '''
        # t, _ = net.getPerfProfile()
        # label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
        # print(label)
        cv2.putText(img, None, (20,40), FONT_FACE, FONT_SCALE, (0,0,255), THICKNESS, cv2.LINE_AA)
        cv2.imshow('Output', img)
        cv2.waitKey(1)

        k = cv2.waitKey(30) & 0xff
        if k == 27:
            break

    cap.release()
    cv2.destroyAllWindows()

In [11]:
# 영상 정보 불러오기 테스트용
# cap = cv2.VideoCapture('C:\localRepository\deepLearning\crosswalk.mp4')

# width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
# height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
# count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
# fps = cap.get(cv2.CAP_PROP_FPS)

# print('가로 :', str(width))
# print('세로 :', str(height))
# print('총 프레임 수 :', str(count))
# print('FPS :', str(fps))