## Googlenet 영상인식 모델

In [2]:
# Opencv Deep learning Tutorial
# https://github.com/opencv/opencv/wiki/Deep-Learning-in-OpenCV

# Caffe Model Zoo : github.com/BVLC/caffe
## 모델 파일 : dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel
## 설정 파일 : github.com/BVLC/caffe/blob/master/models/bvlc_googlenet/deploy.prototxt


# ONNX Model Zoo : github.com/onnx/models
# 모델파일: https://github.com/onnx/models/tree/master/vision/classification/inception_and_googlenet/googlenet

# 클래스 이름 파일 : github.com/opencv/opencv/blob/4.1.0/samples/data/dnn/

# readNet(model, config)
# model, config

# 실행순서
# cv2.dnn.readNet(model, config)-> ret, 객체생성
# blobFromImage(image, scalefactor, size, mean, swapRB, crop) -> retval
# scalefactor: Multiply by factor
# image has BGR ordering and swapRB is true.

In [7]:
import sys
import numpy as np
import cv2

########### googLeNet 영상인식
# 입력크기: 224 x 224
# 컬러: BGR
# 밝기평균값: (104, 117, 123)


########## 입력 영상 불러오기

# filename = 'googlenet/apple2.png'
filename = './googlenet/fig/scooter.jpg'

# if len(sys.argv) > 1: 
#     filename = sys.argv[1]

img = cv2.imread(filename)

if img is None:
    print('Image load failed!')
    sys.exit()

######### 네트워크 불러오기

# Caffe
model = 'googlenet/bvlc_googlenet.caffemodel'
config = 'googlenet/deploy.prototxt'

# ONNX
# model = 'googlenet/googlenet-9.onnx'
# config = ''

net = cv2.dnn.readNet(model, config)

if net.empty():
    print('Network load failed!')
    sys.exit()

########## 클래스 이름 불러오기

classNames = []
with open('googlenet/classification_classes_ILSVRC2012.txt', 'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')


########### 추론
# blobFromImage(image[, scalefactor[, size[, mean[, swapRB[, crop[, ddepth]]]]]]) -> retval
# retval: numpy.ndarry.shape = (N,C,H,W), dtype = numpy.float32
## N = number of image, C = channels, H = height, W = width

blob = cv2.dnn.blobFromImage(img, 1, (224, 224), (104, 117, 123), 
                             swapRB = False)
net.setInput(blob)
prob = net.forward()
print(prob.shape)

########### 추론 결과 확인 & 화면 출력

out = prob.flatten() # 1d array
classId = np.argmax(out)
confidence = out[classId]
print(confidence)
text = f'{classNames[classId]} ({confidence * 100:4.2f}%)'
cv2.putText(img, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 1, cv2.LINE_AA)

cv2.imshow('img', img)
cv2.waitKey()
cv2.destroyAllWindows()


(1, 1000)
0.59207493


## OpenCV DNN 얼굴검출

In [None]:
# https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector
# deploy.prototxt.txt, download-weights.py.txt, opencv_face_detector.pbtxt.text 다운로드

# Caffe    https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180205_fp16/res10_300x300_ssd_iter_140000_fp16.caffemodel

# Tensorflow  https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180220_uint8/opencv_face_detector_uint8.pb

## 참고 사이트
# https://deep-learning-study.tistory.com/299

In [14]:
import numpy as np
import sys
import cv2

img = cv2.imread('opencv_face_detector/fig/sunglass.png')

if img is None:
    print('image read failed')
    sys.exit()
    
model = './opencv_face_detector/res10_300x300_ssd_iter_140000_fp16.caffemodel'
config = './opencv_face_detector/deploy.prototxt'

face_net = cv2.dnn.readNet(model, config)

if face_net.empty():
    print('Net open failed')
    sys.exit()

blob = cv2.dnn.blobFromImage(img, 1, (300, 300), (104, 177, 123),
                            swapRB=False)

face_net.setInput(blob)
out = face_net.forward()

detect = out[0, 0, :, :]
h, w = img.shape[:2]

for i in range(detect.shape[0]):
    confidence = detect[i, 2]
    
    if confidence > 0.5:
        # out matrix에서 x1, y1, x2, y2 값이 0 ~1로 normalize 되어 있음
        
        x1 = int(detect[i, 3]*w)
        y1 = int(detect[i, 4]*h)
        x2 = int(detect[i, 5]*w)
        y2 = int(detect[i, 6]*h)
        
        cv2.rectangle(img, (x1, y1), (x2, y2),
                     (0, 0, 255))
        
        text = 'Face: {}%'.format(round(confidence, 2))
        cv2.putText(img, text, (x1, y1-1), cv2.FONT_HERSHEY_SIMPLEX,
                   0.8, (0, 0, 255), 1, cv2.LINE_AA)

cv2.imshow('image', img)

while True:
    if cv2.waitKey() == 27:
        break

cv2.destroyAllWindows()

## YOLOv3를 이용한 객체 검출

In [None]:
# https://pjreddie.com/darknet/yolo/

# NMSBoxes(bboxes, scores, score_threshold, nms_threshold) -> indices
# nms_threshold: nms_threshold a threshold used in non maximum suppression

# getPerfProfile() -> retval, timings
# .   @brief Returns overall time for inference and timings (in ticks) for layers.

## 

In [6]:
import sys
import numpy as np
import cv2


# 모델 & 설정 파일
model = 'yolo_v3/yolov3.weights'
config = 'yolo_v3/yolov3.cfg'
class_labels = 'yolo_v3/coco.names'

confThreshold = 0.5
nmsThreshold = 0.4

# 테스트 이미지 파일
img_files = ['yolo_v3/dog.jpg', 'yolo_v3/person.jpg', 
             'yolo_v3/sheep.jpg', 'yolo_v3/kite.jpg']

# 네트워크 생성
net = cv2.dnn.readNet(model, config)

if net.empty():
    print('Net open failed!')
    sys.exit()

# 클래스 이름 불러오기

classes = []
with open(class_labels, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

colors = np.random.uniform(0, 255, size=(len(classes), 3))

# 출력 레이어 이름 받아오기

layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# output_layers = ['yolo_82', 'yolo_94', 'yolo_106']

# 실행

for i in img_files:
    img = cv2.imread(i)

    if img is None:
        continue

    # 블롭 생성 & 추론
    blob = cv2.dnn.blobFromImage(img, 1/255., (320, 320), swapRB=True)
    net.setInput(blob)
    outs = net.forward(output_layers) #

    # outs는 3개의 ndarray 리스트.
    # outs[0].shape=(507, 85), 13*13*3=507
    # outs[1].shape=(2028, 85), 26*26*3=2028
    # outs[2].shape=(8112, 85), 52*52*3=8112

    h, w = img.shape[:2]

    class_ids = []
    confidences = []
    boxes = []

    for out in outs:
        for detection in out:
            # detection: 4(bounding box) + 1(objectness_score) + 80(class confidence)
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > confThreshold:
                # 바운딩 박스 중심 좌표 & 박스 크기
                cx = int(detection[0] * w)
                cy = int(detection[1] * h)
                bw = int(detection[2] * w)
                bh = int(detection[3] * h)

                # 바운딩 박스 좌상단 좌표
                sx = int(cx - bw / 2)
                sy = int(cy - bh / 2)

                boxes.append([sx, sy, bw, bh])
                confidences.append(float(confidence))
                class_ids.append(int(class_id))

    # 비최대 억제, Non Max Suppression
#     https://www.visiongeek.io/2018/07/yolo-object-detection-opencv-python.html
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)

    for i in indices:
        i = i[0]
        sx, sy, bw, bh = boxes[i]
        label = f'{classes[class_ids[i]]}: {confidences[i]:.2}'
        color = colors[class_ids[i]]
        cv2.rectangle(img, (sx, sy, bw, bh), color, 2)
        cv2.putText(img, label, (sx, sy - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2, cv2.LINE_AA)

    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
    cv2.putText(img, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                0.7, (0, 0, 255), 1, cv2.LINE_AA)

    cv2.imshow('img', img)
    cv2.waitKey()

cv2.destroyAllWindows()
