In [4]:
import torch
from torchvision.io import read_image
from torchvision import models, transforms, datasets
from torchsummary import summary
import cv2
import matplotlib.pyplot as plt
import numpy as np
import requests

## OpenCV DNN 얼굴검출 : SSD

In [1]:
import numpy as np
import sys
import cv2
import pandas as pd

# img = cv2.imread('./fig/face/sunglass.png')
img = cv2.imread('./fig/face/king_face.png')



if img is None:
    print('image read failed')
    sys.exit()

## Caffe model (Single Shot-Multibox Detector)   
model = './opencv_face_detector/res10_300x300_ssd_iter_140000_fp16.caffemodel'
config = './opencv_face_detector/deploy.prototxt'

## tensorflow model   
# model = './opencv_face_detector/opencv_face_detector_uint8.pb'
# config = './opencv_face_detector/opencv_face_detector.pbtxt'

face_net = cv2.dnn.readNet(model, config)
# face_net.getLayerNames()

if face_net.empty():
    print('Net open failed')
    sys.exit()

# blobFromImage(image[, scalefactor[, size[, mean[, swapRB[, crop[, ddepth]]]]]]) -> retval
blob = cv2.dnn.blobFromImage(img, 1, (300, 300), (104, 177, 123),
                            swapRB=False)

face_net.setInput(blob)
out = face_net.forward()

labels = ["img_id", "is_face", "confidence", "left", "top", "right", "bottom"]
out_df = pd.DataFrame(out[0][0], columns = labels)
print(out_df)

detect = out[0, 0, :, :]
h, w = img.shape[:2]

for i in range(detect.shape[0]):
    confidence = detect[i, 2] # (0, 1, confidence, x1, y1, x2, y2)
    
    if confidence > 0.15:
        # out matrix에서 x1, y1, x2, y2 값이 0 ~1로 normalize 되어 있음
        
        x1 = int(detect[i, 3]*w)
        y1 = int(detect[i, 4]*h)
        x2 = int(detect[i, 5]*w)
        y2 = int(detect[i, 6]*h)
        
        cv2.rectangle(img, (x1, y1), (x2, y2),
                     (0, 0, 255))
        
        text = 'Face: {}%'.format(round(confidence*100, 2))
        cv2.putText(img, text, (x1, y1-1), cv2.FONT_HERSHEY_SIMPLEX,
                   0.8, (0, 0, 255), 1, cv2.LINE_AA)

cv2.imshow('image', img)

while True:
    if cv2.waitKey() == 27:
        break

cv2.destroyAllWindows()

     img_id  is_face  confidence      left       top     right    bottom
0       0.0      1.0    0.988467  0.825121  0.502939  0.893849  0.658860
1       0.0      1.0    0.948228  0.147494  0.511041  0.215415  0.681851
2       0.0      1.0    0.945645  0.288223  0.444901  0.359005  0.628938
3       0.0      1.0    0.919919  0.499226  0.392553  0.590081  0.570998
4       0.0      1.0    0.826739  0.642784  0.463184  0.720662  0.659801
..      ...      ...         ...       ...       ...       ...       ...
195     0.0      0.0    0.000000  0.000000  0.000000  0.000000  0.000000
196     0.0      0.0    0.000000  0.000000  0.000000  0.000000  0.000000
197     0.0      0.0    0.000000  0.000000  0.000000  0.000000  0.000000
198     0.0      0.0    0.000000  0.000000  0.000000  0.000000  0.000000
199     0.0      0.0    0.000000  0.000000  0.000000  0.000000  0.000000

[200 rows x 7 columns]


## OpenCV DNN webcam 얼굴검출

In [None]:
import sys
import numpy as np
import cv2

## Caffe` 학습모델
model = 'opencv_face_detector/res10_300x300_ssd_iter_140000_fp16.caffemodel'
config = 'opencv_face_detector/deploy.prototxt'

## Tensorflow 학습모델
# model = 'opencv_face_detector/opencv_face_detector_uint8.pb'
# config = 'opencv_face_detector/opencv_face_detector.pbtxt'


cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print('Camera open failed!')
    sys.exit()

net = cv2.dnn.readNet(model, config)

if net.empty():
    print('Net open failed!')
    sys.exit()

while True:
    ret, frame = cap.read()

    if not ret:
        break

    blob = cv2.dnn.blobFromImage(frame, 1, (300, 300), (104, 177, 123))
    net.setInput(blob)
    out = net.forward()# out.shape=(1,1, 200, 7)
    
        
    detect = out[0, 0, :, :] ##0, 0, 사용안함
    (h, w) = frame.shape[:2]

    for i in range(detect.shape[0]):
        confidence = detect[i, 2]

        if confidence > 0.5:
           
            # out matrix에서 x1, y1, x2, y2 값이 0 ~1로 normalize 되어 있음
            x1 = int(detect[i, 3] * w)
            y1 = int(detect[i, 4] * h)
            x2 = int(detect[i, 5] * w)
            y2 = int(detect[i, 6] * h)

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0))

            label = f'Face: {confidence:4.2f}'
            cv2.putText(frame, label, (x1, y1 - 1), cv2.FONT_HERSHEY_SIMPLEX, 
                        0.8, (0, 255, 0), 1, cv2.LINE_AA)

    cv2.imshow('frame', frame)

    if cv2.waitKey(1) == 27:
        break

        
cap.release()
cv2.destroyAllWindows()

## Yolo v8 객체검출 with pytorch

In [11]:
from ultralytics import YOLO
from ultralytics import settings
print(settings)

JSONDict("C:\Users\user\AppData\Roaming\Ultralytics\settings.json"):
{
  "settings_version": "0.0.6",
  "datasets_dir": "D:\\OneDrive\\Documents\\lecture_2019\\Academy\\27_KOSA\\1th_standard\\kosa_cv_nlp_rl\\2_cv\\DL_ML_Opencv\\datasets",
  "weights_dir": "weights",
  "runs_dir": "runs",
  "uuid": "0c445b1c348ff2017d32abd479a34336cf4abf0ea76639b3010e4eeb86a6a47e",
  "sync": true,
  "api_key": "",
  "openai_api_key": "",
  "clearml": true,
  "comet": true,
  "dvc": true,
  "hub": true,
  "mlflow": true,
  "neptune": true,
  "raytune": true,
  "tensorboard": true,
  "wandb": true,
  "vscode_msg": true
}


In [5]:
## coco dataset
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]
print(len(classNames))

80


In [14]:
cap = cv2.VideoCapture("./fig/object_detection/vtest.avi")
# cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
# cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
# cap.set(cv2.CAP_PROP_FPS, 30)

if not cap.isOpened():
    print("camera open failed")
    sys.exit()

# model = YOLO("yolov8n.yaml")  # build a new model from scratch
# model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)
# model = YOLO("yolov10n.pt")  # load a pretrained model (recommended for training)
# model = YOLO("yolo11n.pt")  # load a pretrained model (recommended for training)

CONFIDENCE_THRESHOLD = 0.6

# with open('./yolov8_pretrained/coco128.txt', 'r') as f:
#     data = f.read()
#     class_list = data.split('\n')

tm = cv2.TickMeter()

while True:
    tm.reset()
    tm.start()
    
    ret, frame = cap.read()

    if not ret:
        print('frame read failed')
        break
    
    detection = model(frame, verbose=False)[0]

    for data in detection.boxes.data.tolist():
        confidence = data[4]
        if confidence < CONFIDENCE_THRESHOLD:
            continue
        
        xmin, ymin, xmax, ymax = int(data[0]), int(data[1]), int(data[2]), int(data[3])
        label = int(data[5])
        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
        cv2.putText(frame, classNames[label]+ ' ' +str(round(confidence, 2))+'%', 
        (xmin, ymin), cv2.FONT_ITALIC, 0.5, (255, 255, 255), 1)

        tm.stop()

        total = tm.getTimeMilli()

        fps = f'FPS: {1 / total:.4f}'
        cv2.putText(frame, fps, (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1)
        cv2.imshow('frame', frame)

    if cv2.waitKey(20) == 27:
        break

cap.release()
cv2.destroyAllWindows()


## Yolo v3 객체검출 with opencv dnn

In [18]:
# https://pjreddie.com/darknet/yolo/

# NMSBoxes(bboxes, scores, score_threshold, nms_threshold) -> indices
# nms_threshold: nms_threshold a threshold used in non maximum suppression

# getPerfProfile() -> retval, timings
# .   @brief Returns overall time for inference and timings (in ticks) for layers.

# https://github.com/pjreddie/darknet/blob/master/data/coco.names

In [None]:
import sys
import numpy as np
import cv2

# 모델 & 설정 파일
model = './yolo_v3_pb/yolov3.weights'
config = './yolo_v3_pb/yolov3.cfg'
class_labels = './yolo_v3_pb/coco.names'

# 테스트 이미지 파일
img_files = ['../fig/object_detection/dog.jpg', 
             '../fig/object_detection/person.jpg', 
             '../fig/object_detection/sheep.jpg', 
             '../fig/object_detection/kite.jpg']

# img_files = ['yolo_v3/fig/peoples.jpg']

if img_files is None:
    print('Image read failed')
    sys.exit()


# 네트워크 생성
net = cv2.dnn.readNet(model, config)

if net.empty():
    print('Net open failed!')
    sys.exit()

# 클래스 이름 불러오기
classes = []
with open(class_labels, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

# colors = np.random.uniform(0, 255, size=(len(classes), 3))
# colors = np.array([[0, 0, 255], 
#                    [255, 0, 0],
#                    [0, 255, 0],
#                    [0, 255, 255],
#                    [255, 255, 0],
#                    [255, 0, 255]])

# 출력 레이어 이름 받아오기

layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
# outs는 3개의 ndarray 리스트.
# output_layers = ['yolo_82', 'yolo_94', 'yolo_106']
# output_layers[0].shape = (507, 85), 13*13*3
# output_layers[1].shape = (2028, 85), 26*26*3
# output_layers[2].shape = (8112, 85), 52*52*3

print(output_layers)

confThreshold = 0.5
nmsThreshold = 0.4

# 실행

for i in img_files:
    img = cv2.imread(i)

    if img is None:
        continue

    # 블롭 생성 & 추론
    blob = cv2.dnn.blobFromImage(img, 1/255., (320, 320), swapRB=True)
    # blob = cv2.dnn.blobFromImage(img, 1/255., (416, 416), swapRB=True)
    # blob = cv2.dnn.blobFromImage(img, 1/255., (608, 608), swapRB=True)

    net.setInput(blob)
    outs = net.forward(output_layers) 

    # outs[0].shape=(507, 85), 13*13*3=507
    # outs[1].shape=(2028, 85), 26*26*3=2028
    # outs[2].shape=(8112, 85), 52*52*3=8112

    h, w = img.shape[:2]

    class_ids = []
    confidences = []
    boxes = []

    for out in outs:
        for detection in out:
            # detection: 4(bounding box) + 1(objectness_score) + 80(class confidence)
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > confThreshold:
                # 바운딩 박스 중심 좌표 & 박스 크기
                cx = int(detection[0] * w)
                cy = int(detection[1] * h)
                bw = int(detection[2] * w)
                bh = int(detection[3] * h)

                # 바운딩 박스 좌상단 좌표
                sx = int(cx - bw / 2)
                sy = int(cy - bh / 2)

                boxes.append([sx, sy, bw, bh])
                confidences.append(float(confidence))
                class_ids.append(int(class_id))

    # 비최대 억제, Non Max Suppression
# https://deep-learning-study.tistory.com/403

    indices = cv2.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)

    for i in indices:
#         i = i[0]
        sx, sy, bw, bh = boxes[i]
        label = f'{classes[class_ids[i]]}: {confidences[i]:.2}'
        # color = colors[class_ids[i]]
        color = (0, 0, 255)
        cv2.rectangle(img, (sx, sy, bw, bh), color, 2)
        cv2.putText(img, label, (sx, sy - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2, cv2.LINE_AA)

    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
    cv2.putText(img, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                1, (0, 0, 255), 1, cv2.LINE_AA)
    
    cv2.namedWindow('img', cv2.WINDOW_NORMAL)

    cv2.imshow('img', img)
    cv2.waitKey() 

cv2.destroyAllWindows()