# Emotions Recognition

In [1]:
!omz_downloader --name face-detection-adas-0001 --precision FP16
!omz_downloader --name emotions-recognition-retail-0003 --precision FP16

################|| Downloading face-detection-adas-0001 ||################

... 100%, 304 KB, 308 KB/s, 0 seconds passed

... 49%, 1024 KB, 546 KB/s, 1 seconds passed
... 99%, 2048 KB, 672 KB/s, 3 seconds passed
... 100%, 2056 KB, 671 KB/s, 3 seconds passed

################|| Downloading emotions-recognition-retail-0003 ||################

... 100%, 54 KB, 112 KB/s, 0 seconds passed

... 21%, 1024 KB, 532 KB/s, 1 seconds passed
... 42%, 2048 KB, 526 KB/s, 3 seconds passed
... 63%, 3072 KB, 535 KB/s, 5 seconds passed
... 84%, 4096 KB, 578 KB/s, 7 seconds passed
... 100%, 4848 KB, 623 KB/s, 7 seconds passed



In [3]:
import openvino as ov

core = ov.Core()

model1 = core.read_model(model="models/face-detection-adas-0001/FP16/face-detection-adas-0001.xml")
face_model = core.compile_model(model=model1, device_name="GPU.1")
face_input_layer = face_model.input(0)
face_output_layer = face_model.output(0)
print("Face Input layer shape : ", face_input_layer.shape)
print("Face Output layer shape : ", face_output_layer.shape)

model2 = core.read_model(model="models/emotions-recognition-retail-0003/FP16/emotions-recognition-retail-0003.xml")
emotion_model = core.compile_model(model=model2, device_name="GPU.1")
emotion_input_layer = emotion_model.input(0)
emotion_output_layer = emotion_model.output(0)
print("Emotion Input layer shape : ", emotion_input_layer.shape)
print("Emotion Output layer shape : ", emotion_output_layer.shape)

Face Input layer shape :  [1,3,384,672]
Face Output layer shape :  [1,1,200,7]
Emotion Input layer shape :  [1,3,64,64]
Emotion Output layer shape :  [1,5,1,1]


In [24]:
import cv2
import numpy as np
frame = cv2.imread("data/emotions.jpg")

resized_frame = cv2.resize(src=frame, dsize=(672, 384)) 
transposed_frame = resized_frame.transpose(2, 0, 1)
input_frame = np.expand_dims(transposed_frame, 0)

In [35]:
def DrawBoundingBoxes(output, frame, conf=0.5):
    boxes = []
    canvas = frame.copy()
    h,w,_ = canvas.shape 

    predictions = output[0][0]            # 하위 집합 데이터 프레임
    confidence = predictions[:,2]         # conf 값 가져오기 [image_id, label, conf, x_min, y_min, x_max, y_max]

    top_predictions = predictions[(confidence>conf)]         # 임계값보다 큰 conf 값을 가진 예측만 선택

    for detection in top_predictions:
        box = (detection[3:7] * np.array([w, h, w, h])).astype("int") # 상자 위치 결정
        (xmin, ymin, xmax, ymax) = box   # xmin, ymin, xmax, ymax에 상자 위치 값 지정
        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)       # 사각형 그리기
        boxes.append(box)     #이미지에 박스를 그린 얼굴의 위치 저장
   
    return boxes
def DrawText(output, frame, face_position):
    emotions = {
        0:"neutral",
        1:"happy",
        2:"sad",
        3:"surprise",
        4:"anger" }

    predictions = output[0,:,0,0]
    topresults_index = np.argmax(predictions)
    emotion = emotions[topresults_index]

    cv2.putText(frame, emotion, (face_position[0],face_position[1]),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2)

# 배경과 합성
def AddBackground(frame, bg):
    frame_h, frame_w = frame.shape[0], frame.shape[1]
    new_h = 500
    new_w = int((new_h/frame_h)*frame_w)
    frame_resize = cv2.resize(frame, (new_w, new_h))
    xmax = bg.shape[1] - 400
    ymax = bg.shape[0] - 175
    xmin = xmax - new_w
    ymin = ymax - new_h
    bg[ymin:ymax, xmin:xmax] = frame_resize
    return bg

In [36]:
face_output = face_model([input_frame])[face_output_layer]
boxes = DrawBoundingBoxes(face_output, frame, conf=0.5)

In [37]:
if boxes is not None:
    
    for box in boxes:          #boxes에 저장된 얼굴의 위치들을 하나씩 box에 전달
    
        xmin, ymin, xmax, ymax = box      #box에 저장된 좌표 저장
        emotion_input = frame[ymin:ymax,xmin:xmax]         #이미지에서 해당 얼굴 위치를 찾아 저장
        
        # 감정 인식 모델을 사용하기 위해 이미지 전처리
        # Input layer shape:  [1,3,64,64]
        resized_image = cv2.resize(src=emotion_input, dsize=(64, 64))      #이미지 사이즈 변경  (64,64,3)
        transposed_image = resized_image.transpose(2, 0, 1)                #shape 위치 변경    (3,64,64)
        input_image = np.expand_dims(transposed_image, 0)                  #차원 확장 (1,3,64,64)

        emotion_output = emotion_model([input_image])[emotion_output_layer]  # 감정 추론
        DrawText(emotion_output, frame, box)   # 추론의 결과값 이미지에 출력하기

In [29]:
cv2.imshow("emotion-recognition", frame)

cv2.waitKey(0)
cv2.destroyAllWindows()

In [38]:
background = "data/bg.jpg"
bg = cv2.imread(background)

deployment = AddBackground(frame, bg)
cv2.imshow("emotion-recognition",deployment)

cv2.waitKey(0)
cv2.destroyAllWindows()

# 웹캠

In [40]:
import openvino as ov
import cv2
import numpy as np

core = ov.Core()

model1 = core.read_model(model="models/face-detection-adas-0001/FP16/face-detection-adas-0001.xml")
face_model = core.compile_model(model=model1, device_name="GPU.1")
face_input_layer = face_model.input(0)
face_output_layer = face_model.output(0)
print("Face Input layer shape : ", face_input_layer.shape)
print("Face Output layer shape : ", face_output_layer.shape)

model2 = core.read_model(model="models/emotions-recognition-retail-0003/FP16/emotions-recognition-retail-0003.xml")
emotion_model = core.compile_model(model=model2, device_name="GPU.1")
emotion_input_layer = emotion_model.input(0)
emotion_output_layer = emotion_model.output(0)
print("Emotion Input layer shape : ", emotion_input_layer.shape)
print("Emotion Output layer shape : ", emotion_output_layer.shape)

def DrawBoundingBoxes(output, frame, conf=0.5):
    boxes = []
    canvas = frame.copy()
    h,w,_ = canvas.shape 

    predictions = output[0][0]            # 하위 집합 데이터 프레임
    confidence = predictions[:,2]         # conf 값 가져오기 [image_id, label, conf, x_min, y_min, x_max, y_max]

    top_predictions = predictions[(confidence>conf)]         # 임계값보다 큰 conf 값을 가진 예측만 선택

    for detection in top_predictions:
        box = (detection[3:7] * np.array([w, h, w, h])).astype("int") # 상자 위치 결정
        (xmin, ymin, xmax, ymax) = box   # xmin, ymin, xmax, ymax에 상자 위치 값 지정
        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)       # 사각형 그리기
        boxes.append(box)     #이미지에 박스를 그린 얼굴의 위치 저장
   
    return boxes
    
def DrawText(output, frame, face_position):
    emotions = {
        0:"neutral",
        1:"happy",
        2:"sad",
        3:"surprise",
        4:"anger" }

    predictions = output[0,:,0,0]
    topresults_index = np.argmax(predictions)
    emotion = emotions[topresults_index]

    cv2.putText(frame, emotion, (face_position[0],face_position[1]),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2)

def AddBackground(frame, bg):
    frame_h, frame_w = frame.shape[0], frame.shape[1]
    new_h = 500
    new_w = int((new_h/frame_h)*frame_w)
    frame_resize = cv2.resize(frame, (new_w, new_h))
    xmax = bg.shape[1] - 300
    ymax = bg.shape[0] - 175
    xmin = xmax - new_w
    ymin = ymax - new_h
    bg[ymin:ymax, xmin:xmax] = frame_resize
    return bg

cam=cv2.VideoCapture(0)
while cv2.waitKey(33)<0 :
    ret, frame = cam.read()
    resized_frame = cv2.resize(src=frame, dsize=(672, 384)) 
    transposed_frame = resized_frame.transpose(2, 0, 1)
    input_frame = np.expand_dims(transposed_frame, 0)

    face_output = face_model([input_frame])[face_output_layer]
    
    boxes = DrawBoundingBoxes(face_output, frame, conf=0.5)
    if boxes is not None:
        for box in boxes: 
            xmin, ymin, xmax, ymax = box      #box에 저장된 좌표 저장
            emotion_input = frame[ymin:ymax,xmin:xmax]         #이미지에서 해당 얼굴 위치를 찾아 저장
        
            # 감정 인식 모델을 사용하기 위해 이미지 전처리
            # Input layer shape:  [1,3,64,64]
            resized_image = cv2.resize(src=emotion_input, dsize=(64, 64))      #이미지 사이즈 변경  (64,64,3)
            transposed_image = resized_image.transpose(2, 0, 1)                #shape 위치 변경    (3,64,64)
            input_image = np.expand_dims(transposed_image, 0)                  #차원 확장 (1,3,64,64)

            emotion_output = emotion_model([input_image])[emotion_output_layer]  # 감정 추론
            DrawText(emotion_output, frame, box)   # 추론의 결과값 이미지에 출력하기
    
    deployment = AddBackground(frame, bg)
    
    cv2.imshow('Press Spacebar to Exit', deployment)

cam.release()
cv2.destroyAllWindows()




Face Input layer shape :  [1,3,384,672]
Face Output layer shape :  [1,1,200,7]
Emotion Input layer shape :  [1,3,64,64]
Emotion Output layer shape :  [1,5,1,1]
