In [3]:
import pandas as pd

df_class_info = pd.read_excel("../df_class_info.xlsx")
# df_class_info
class_info = df_class_info["class_name"].to_list()
class_info

['no helmet', 'helmet']

모델 로드

In [1]:
import torch
from models.common import DetectMultiBackend
from utils.torch_utils import select_device

device = select_device(0)
model_path = "runs/train/exp13/weights/best.pt"
model = DetectMultiBackend(model_path, device=device)
stride, names, pt = model.stride, model.names, model.pt

YOLOv5  2022-3-15 torch 1.11.0+cu113 CUDA:0 (NVIDIA GeForce RTX 2080 SUPER, 8192MiB)

Fusing layers... 
Model Summary: 290 layers, 20856975 parameters, 0 gradients, 48.0 GFLOPs


In [2]:
import os
import cv2
import numpy as np
from utils.augmentations import letterbox
from utils.general import non_max_suppression

conf_thres=0.25
iou_thres=0.45
max_det=300

def predict(src):
    img = letterbox(src, 640, stride=stride, auto=True)[0]
    # img = cv2.resize(src, (640, 640), interpolation=cv2.INTER_LINEAR)
    dst = img.copy()
    img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB Convert
    img = np.ascontiguousarray(img)

    bs = 1  # batch_size
    model.warmup(imgsz=(1 if pt else bs, 3, *(640, 640)))
    im = torch.from_numpy(img).to(device)
    im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
    im /= 255  # 0 - 255 to 0.0 - 1.0
    if len(im.shape) == 3:
        im = im[None]  # expand for batch dim

    pred = model(im)
    pred = non_max_suppression(pred, conf_thres, iou_thres, None, False, max_det=max_det)
    
    return dst, pred

In [38]:
#이미지로 predict
factory_img_path = r"E:\aihub_safety_dataset\Training_unziped\image_factory"
img_list = os.listdir(factory_img_path)
colors = [
    (0, 50, 200),
    (0, 200, 50)
]
start_no = 3200
for idx, imgs in enumerate(img_list[start_no:start_no+5]):
    src = cv2.imread(factory_img_path + "\\" + imgs)
    dst, pred = predict(src)
    for det in pred[0]:  # per image
        # if int(det[-1].item()) == 5:
        label = class_info[int(det[5])]
        x1 = int((det[0].item()/dst.shape[1]) * src.shape[1])
        y1 = int((det[1].item()/dst.shape[0]) * src.shape[0])
        x2 = int((det[2].item()/dst.shape[1]) * src.shape[1])
        y2 = int((det[3].item()/dst.shape[0]) * src.shape[0])
        coords = [x1, y1, x2, y2]

        cv2.rectangle(src, (coords[0], coords[1]), (coords[2], coords[3]), colors[int(det[5])], 1)
        (text_w, text_h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)
        cv2.rectangle(src, (coords[0], coords[1] - text_h), (coords[0] + text_w, coords[1] + text_h-20), colors[int(det[5])], -1)
        cv2.putText(src, "{}".format(label), (coords[0], coords[1]), cv2.FONT_HERSHEY_DUPLEX, 1, (255, 255, 255))

    cv2.imshow("result", src)
    cv2.waitKey()
    cv2.destroyAllWindows()

In [8]:
#카메라로 predict
test_video_folder = r"C:\Users\poscohrd\Desktop\yolo_transfer_learning\test_video"
cap = cv2.VideoCapture(0)

# cv2.namedWindow("result", cv2.WINDOW_NORMAL)
colors = [
    (0, 50, 200),
    (0, 200, 50)
]
class0_conf_thres = 0.8
area_thres = 0.2

cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920.0)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080.0)
print(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
print(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

w = round(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # 카메라 가로 픽셀 크기 
h = round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # 카메라 세로 픽셀 크기
fps = cap.get(cv2.CAP_PROP_FPS)  # 초당 프레임 수
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(test_video_folder + "/camera_result.mp4", fourcc, fps, (w, h))

while True:
    ret, frame = cap.read()
    if ret is False:
        break

    iou_deviation_thres = frame.shape[0] * 0.05

    # print(frame.shape, end="\r")
    dst, pred = predict(frame)

    result_list = []
    for det in pred[0]:
        result_list.append({
            "class_no" : int(det[5]),
            "label_str" : "{} : {:.2f}".format(class_info[int(det[5])], det[4]),
            "x1" : int((det[0].item()/dst.shape[1]) * frame.shape[1]),
            "y1" : int((det[1].item()/dst.shape[0]) * frame.shape[0]),
            "x2" : int((det[2].item()/dst.shape[1]) * frame.shape[1]),
            "y2" : int((det[3].item()/dst.shape[0]) * frame.shape[0]),
            "conf" : det[4].item()
        })

    for bbox in result_list:  # per image
        coords = [bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]]

        #no helmet의 confidence가 낮으면 컷
        if bbox["class_no"] == 0 and bbox["conf"] < class0_conf_thres:
            continue
        
        #겹침 체크해서 겹치면 컷
        iou_check = False
        if bbox["class_no"] == 0: #no helmet일 경우 helmet과 겹치는 bbox인지 체크
            for class1_bbox in [x for x in result_list if x["class_no"] == 1]:
                if abs(bbox["x1"] - class1_bbox["x1"]) < iou_deviation_thres and abs(bbox["y1"] - class1_bbox["y1"]) < iou_deviation_thres:
                    iou_check = True
                    break
        
        if iou_check is True:
            continue

        #bbox 면적이 영상크기 대비 일정 % 이상이면 컷
        area = (bbox["x2"] - bbox["x1"]) * (bbox["y2"] - bbox["y1"])
        ratio = area / (frame.shape[0] * frame.shape[1])
        if ratio > area_thres:
            cv2.putText(frame, "too close", (30, 30), cv2.FONT_HERSHEY_DUPLEX, 1, (255, 0, 0))
            continue

        cv2.rectangle(frame, (coords[0], coords[1]), (coords[2], coords[3]), colors[bbox["class_no"]], 4)

        (text_w, text_h), _ = cv2.getTextSize(bbox["label_str"], cv2.FONT_HERSHEY_DUPLEX, 1, 1)
        cv2.rectangle(frame, (coords[0], coords[1]), (coords[0] + text_w, coords[1] + text_h + 4), colors[bbox["class_no"]], -1)
        cv2.putText(frame, "{}".format(bbox["label_str"]), (coords[0], coords[1]+text_h), cv2.FONT_HERSHEY_DUPLEX, 1, (255, 255, 255))
    
    cv2.imshow("result", frame)
    out.write(frame)
    # cv2.resizeWindow("result", 800, 600)
    if cv2.waitKey(1) == 27:
        break

out.release()
cap.release()
cv2.destroyAllWindows()

1920.0
1080.0


In [11]:
cap.release()

In [5]:
#동영상으로 predict
test_video_folder = r"C:\Users\poscohrd\Desktop\yolo_transfer_learning\test_video"
video_file_list = os.listdir(test_video_folder)
colors = [
    (0, 50, 200),
    (0, 200, 50)
]

for video in video_file_list[:7]:
    cap = cv2.VideoCapture(test_video_folder + "/" + video)
    class0_conf_thres = 0.8
    area_thres = 0.3

    w = round(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # 카메라 가로 픽셀 크기 
    h = round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # 카메라 세로 픽셀 크기
    fps = cap.get(cv2.CAP_PROP_FPS)  # 초당 프레임 수

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(test_video_folder + "/" + video.split(".")[0] + "_result.mp4", fourcc, fps, (w, h))

    frame_cnt = 0
    while True:
        ret, frame = cap.read()
       
        if ret is False:
            break

        iou_deviation_thres = frame.shape[0] * 0.05
        dst, pred = predict(frame)
        result_list = []
        for det in pred[0]:
            result_list.append({
                "class_no" : int(det[5]),
                "label_str" : "{} : {:.2f}".format(class_info[int(det[5])], det[4]),
                "x1" : int((det[0].item()/dst.shape[1]) * frame.shape[1]),
                "y1" : int((det[1].item()/dst.shape[0]) * frame.shape[0]),
                "x2" : int((det[2].item()/dst.shape[1]) * frame.shape[1]),
                "y2" : int((det[3].item()/dst.shape[0]) * frame.shape[0]),
                "conf" : det[4].item()
            })

        for bbox in result_list:  # per image
            coords = [bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]]
            
            #no helmet의 confidence가 낮으면 컷
            if bbox["class_no"] == 0 and bbox["conf"] < class0_conf_thres:
                continue
            
            #겹침 체크해서 겹치면 컷
            iou_check = False
            if bbox["class_no"] == 0: #no helmet일 경우 helmet과 겹치는 bbox인지 체크
                for class1_bbox in [x for x in result_list if x["class_no"] == 1]:
                    if abs(bbox["x1"] - class1_bbox["x1"]) < iou_deviation_thres and abs(bbox["y1"] - class1_bbox["y1"]) < iou_deviation_thres:
                        iou_check = True
                        break
            
            if iou_check is True:
                continue

            #bbox 면적이 영상크기 대비 30% 이상이면 컷
            area = (bbox["x2"] - bbox["x1"]) * (bbox["y2"] - bbox["y1"])
            ratio = area / (frame.shape[0] * frame.shape[1])
            if ratio > area_thres:
                cv2.putText(frame, "too close", (30, 30), cv2.FONT_HERSHEY_DUPLEX, 1, (255, 0, 0))
                continue

            cv2.rectangle(frame, (coords[0], coords[1]-20), (coords[2], coords[3]-20), colors[bbox["class_no"]], 2)

            (text_w, text_h), _ = cv2.getTextSize(bbox["label_str"], cv2.FONT_HERSHEY_DUPLEX, 1, 1)
            cv2.rectangle(frame, (coords[0], coords[1]-20), (coords[0] + text_w, coords[1] + text_h + 4-20), colors[bbox["class_no"]], -1)
            cv2.putText(frame, "{}".format(bbox["label_str"]), (coords[0], coords[1]+text_h-20), cv2.FONT_HERSHEY_DUPLEX, 1, (255, 255, 255))

        cv2.imshow("result", frame)
        out.write(frame)
        # cv2.resizeWindow("result", 800, 600)
        if cv2.waitKey(1) == 27:
            break

    out.release()
    cap.release()
    cv2.destroyAllWindows()