In [1]:
import tensorflow as tf
import numpy as np
import cv2
import mediapipe as mp
import time
import glob

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Dropout, Flatten
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam

In [2]:
emotions = {
    0: ['Angry', (0,0,255), (255,255,255)],
    1: ['Disgust', (0,102,0), (255,255,255)],
    2: ['Fear', (255,255,153), (0,51,51)],
    3: ['Happy', (153,0,153), (255,255,255)],
    4: ['Sad', (255,0,0), (255,255,255)],
    5: ['Surprise', (0,255,0), (255,255,255)],
    6: ['Neutral', (160,160,160), (255,255,255)]
}
num_classes = len(emotions)
input_shape = (48, 48, 1)
weights_1 = 'vggnet.h5'
weights_2 = 'vggnet_up.h5'

In [3]:
class VGGNet(Sequential):
    def __init__(self, input_shape, num_classes, checkpoint_path, lr=1e-3):
        super().__init__()
        self.add(Rescaling(1./255, input_shape=input_shape))
        self.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'))
        self.add(BatchNormalization())
        self.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.5))

        self.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.4))

        self.add(Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.5))

        self.add(Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.4))

        self.add(Flatten())
        
        self.add(Dense(1024, activation='relu'))
        self.add(Dropout(0.5))
        self.add(Dense(256, activation='relu'))

        self.add(Dense(num_classes, activation='softmax'))

        self.compile(optimizer=Adam(learning_rate=lr),
                    loss=categorical_crossentropy,
                    metrics=['accuracy'])
        
        self.checkpoint_path = checkpoint_path

In [4]:
model_1 = VGGNet(input_shape, num_classes, weights_1)
model_1.load_weights(model_1.checkpoint_path)

model_2 = VGGNet(input_shape, num_classes, weights_2)
model_2.load_weights(model_2.checkpoint_path)

In [5]:
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)

In [6]:
def detection_preprocessing(image, h_max=360):
    h, w, _ = image.shape
    if h > h_max:
        ratio = h_max / h
        w_ = int(w * ratio)
        image = cv2.resize(image, (w_,h_max))
    return image

def resize_face(face):
    x = tf.expand_dims(tf.convert_to_tensor(face), axis=2)
    return tf.image.resize(x, (48,48))

def recognition_preprocessing(faces):
    x = tf.convert_to_tensor([resize_face(f) for f in faces])
    return x

In [7]:
def inference(image):
    H, W, _ = image.shape
    
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_detection.process(rgb_image)

    if results.detections:
        faces = []
        pos = []
        for detection in results.detections:
            box = detection.location_data.relative_bounding_box
            # mp_drawing.draw_detection(image, detection)

            x = int(box.xmin * W)
            y = int(box.ymin * H)
            w = int(box.width * W)
            h = int(box.height * H)

            x1 = max(0, x)
            y1 = max(0, y)
            x2 = min(x + w, W)
            y2 = min(y + h, H)

            face = image[y1:y2,x1:x2]
            face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
            faces.append(face)
            pos.append((x1, y1, x2, y2))
    
        x = recognition_preprocessing(faces)

        y_1 = model_1.predict(x)
        y_2 = model_2.predict(x)
        l = np.argmax(y_1+y_2, axis=1)

        for i in range(len(faces)):
            cv2.rectangle(image, (pos[i][0],pos[i][1]),
                            (pos[i][2],pos[i][3]), emotions[l[i]][1], 2, lineType=cv2.LINE_AA)
            
            cv2.rectangle(image, (pos[i][0],pos[i][1]-20),
                            (pos[i][2]+20,pos[i][1]), emotions[l[i]][1], -1, lineType=cv2.LINE_AA)
            
            cv2.putText(image, f'{emotions[l[i]][0]}', (pos[i][0],pos[i][1]-5),
                            0, 0.6, emotions[l[i]][2], 2, lineType=cv2.LINE_AA)
    
    return image

In [8]:
video = 'C:/Users/user/Downloads/lecture.mp4'
cap = cv2.VideoCapture(video)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)
target_h = 360
target_w = int(target_h * frame_width / frame_height)
out = cv2.VideoWriter('run/out.avi',cv2.VideoWriter_fourcc('M','J','P','G'),
                      fps, (target_w,target_h))

while True:
    success, image = cap.read()
    if success:
        # image = resize_image(image)
        result = inference(image)
        out.write(result)
        cv2.imshow('Emotion Detection', image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break
    
cap.release()
out.release()
cv2.destroyAllWindows()



In [9]:
import cv2

def onTrackbarChange(pos, *userdata):
    # Set the video capture position based on the trackbar position
    cv2.setTrackbarPos("Position", "Emotion Detection", pos)
    cap.set(cv2.CAP_PROP_POS_FRAMES, pos)

video_path = 'C:/Users/user/Downloads/lecture.mp4'
cap = cv2.VideoCapture(video_path)

frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)
target_h = 360
target_w = int(target_h * frame_width / frame_height)

out = cv2.VideoWriter('run/out.avi', cv2.VideoWriter_fourcc('M','J','P','G'), fps, (target_w, target_h))

paused = False

# Create a trackbar to control video playback
cv2.namedWindow('Emotion Detection')
cv2.createTrackbar("Position", "Emotion Detection", 0, int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), onTrackbarChange)

while True:
    if not paused:
        success, image = cap.read()
        if success:
            # Assuming 'inference' returns an image
            result = inference(image)
            
            # Write the processed frame to the output video
            out.write(result)
            
            # Display the original frame (or processed frame if needed)
            cv2.imshow('Emotion Detection', image)
            
            # Update the trackbar position based on the current frame
            cv2.setTrackbarPos("Position", "Emotion Detection", int(cap.get(cv2.CAP_PROP_POS_FRAMES)))
            
            # Check for 'p' key to pause/unpause the video
            key = cv2.waitKey(1) & 0xFF
            if key == ord('p'):
                paused = not paused
            elif key == ord('q'):
                break
        else:
            break
    else:
        # If paused, just display the last frame
        cv2.imshow('Emotion Detection', image)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('p'):
            paused = not paused
        elif key == ord('q'):
            break

cap.release()
out.release()
cv2.destroyAllWindows()



In [None]:
import tensorflow as tf
import numpy as np
import cv2
import mediapipe as mp
import time
import glob

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Dropout, Flatten
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam


emotions = {
    0: ['Angry', (0,0,255), (255,255,255)],
    1: ['Disgust', (0,102,0), (255,255,255)],
    2: ['Fear', (255,255,153), (0,51,51)],
    3: ['Happy', (153,0,153), (255,255,255)],
    4: ['Sad', (255,0,0), (255,255,255)],
    5: ['Surprise', (0,255,0), (255,255,255)],
    6: ['Neutral', (160,160,160), (255,255,255)]
}
num_classes = len(emotions)
input_shape = (48, 48, 1)
weights_1 = 'vggnet.h5'
weights_2 = 'vggnet_up.h5'


class VGGNet(Sequential):
    def __init__(self, input_shape, num_classes, checkpoint_path, lr=1e-3):
        super().__init__()
        self.add(Rescaling(1./255, input_shape=input_shape))
        self.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'))
        self.add(BatchNormalization())
        self.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.5))

        self.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.4))

        self.add(Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.5))

        self.add(Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.4))

        self.add(Flatten())
        
        self.add(Dense(1024, activation='relu'))
        self.add(Dropout(0.5))
        self.add(Dense(256, activation='relu'))

        self.add(Dense(num_classes, activation='softmax'))

        self.compile(optimizer=Adam(learning_rate=lr),
                    loss=categorical_crossentropy,
                    metrics=['accuracy'])
        
        self.checkpoint_path = checkpoint_path


model_1 = VGGNet(input_shape, num_classes, weights_1)
model_1.load_weights(model_1.checkpoint_path)

model_2 = VGGNet(input_shape, num_classes, weights_2)
model_2.load_weights(model_2.checkpoint_path)

mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)


def detection_preprocessing(image, h_max=360):
    h, w, _ = image.shape
    if h > h_max:
        ratio = h_max / h
        w_ = int(w * ratio)
        image = cv2.resize(image, (w_,h_max))
    return image

def resize_face(face):
    x = tf.expand_dims(tf.convert_to_tensor(face), axis=2)
    return tf.image.resize(x, (48,48))

def recognition_preprocessing(faces):
    x = tf.convert_to_tensor([resize_face(f) for f in faces])
    return x

def inference(image):
    H, W, _ = image.shape
    
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_detection.process(rgb_image)

    if results.detections:
        faces = []
        pos = []
        for detection in results.detections:
            box = detection.location_data.relative_bounding_box
            # mp_drawing.draw_detection(image, detection)

            x = int(box.xmin * W)
            y = int(box.ymin * H)
            w = int(box.width * W)
            h = int(box.height * H)

            x1 = max(0, x)
            y1 = max(0, y)
            x2 = min(x + w, W)
            y2 = min(y + h, H)

            face = image[y1:y2,x1:x2]
            face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
            faces.append(face)
            pos.append((x1, y1, x2, y2))
    
        x = recognition_preprocessing(faces)

        y_1 = model_1.predict(x)
        y_2 = model_2.predict(x)
        l = np.argmax(y_1+y_2, axis=1)

        for i in range(len(faces)):
            cv2.rectangle(image, (pos[i][0],pos[i][1]),
                            (pos[i][2],pos[i][3]), emotions[l[i]][1], 2, lineType=cv2.LINE_AA)
            
            cv2.rectangle(image, (pos[i][0],pos[i][1]-20),
                            (pos[i][2]+20,pos[i][1]), emotions[l[i]][1], -1, lineType=cv2.LINE_AA)
            
            cv2.putText(image, f'{emotions[l[i]][0]}', (pos[i][0],pos[i][1]-5),
                            0, 0.6, emotions[l[i]][2], 2, lineType=cv2.LINE_AA)
    
    return image


# Video 감정분석
def onTrackbarChange(pos, *userdata):
    # 트랙바 위치를 기준으로 비디오 캡처 위치 설정
    cv2.setTrackbarPos("Position", "Emotion Detection", pos)
    cap.set(cv2.CAP_PROP_POS_FRAMES, pos)

video_path = 'C:/Users/user/Downloads/lecture.mp4'
cap = cv2.VideoCapture(video_path)

frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)
target_h = 360
target_w = int(target_h * frame_width / frame_height)

out = cv2.VideoWriter('run/out.avi', cv2.VideoWriter_fourcc('M','J','P','G'), fps, (target_w, target_h))

paused = False

# 비디오 재생을 제어할 트랙바 생성
cv2.namedWindow('Emotion Detection')
cv2.createTrackbar("Position", "Emotion Detection", 0, int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), onTrackbarChange)

while True:
    if not paused:
        success, image = cap.read()
        if success:
            # 'inference' 이미지 가정 추정
            result = inference(image)
            
            # 처리된 프레임을 출력 비디오에 기록
            out.write(result)
            
            # 원 프레임 표시
            cv2.imshow('Emotion Detection', image)
            
            # 현재 프레임을 기준으로 트랙바 위치 업데이트
            cv2.setTrackbarPos("Position", "Emotion Detection", int(cap.get(cv2.CAP_PROP_POS_FRAMES)))
            
            # 비디오 중지 'p' 키
            key = cv2.waitKey(1) & 0xFF
            if key == ord('p'):
                paused = not paused
            elif key == ord('q'):
                break
        else:
            break
    else:
        # 일시 중지된 경우 마지막 프레임만 표시
        cv2.imshow('Emotion Detection', image)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('p'):
            paused = not paused
        elif key == ord('q'):
            break

cap.release()
out.release()
cv2.destroyAllWindows()



In [1]:
import tensorflow as tf
import numpy as np
import cv2
import mediapipe as mp
import time
import glob

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Dropout, Flatten
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam


emotions = {
    0: ['Angry', (0,0,255), (255,255,255)],
    1: ['Disgust', (0,102,0), (255,255,255)],
    2: ['Fear', (255,255,153), (0,51,51)],
    3: ['Happy', (153,0,153), (255,255,255)],
    4: ['Sad', (255,0,0), (255,255,255)],
    5: ['Surprise', (0,255,0), (255,255,255)],
    6: ['Neutral', (160,160,160), (255,255,255)]
}
num_classes = len(emotions)
input_shape = (48, 48, 1)
weights_1 = 'vggnet.h5'
weights_2 = 'vggnet_up.h5'


class VGGNet(Sequential):
    def __init__(self, input_shape, num_classes, checkpoint_path, lr=1e-3):
        super().__init__()
        self.add(Rescaling(1./255, input_shape=input_shape))
        self.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'))
        self.add(BatchNormalization())
        self.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.5))

        self.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.4))

        self.add(Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.5))

        self.add(Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.4))

        self.add(Flatten())
        
        self.add(Dense(1024, activation='relu'))
        self.add(Dropout(0.5))
        self.add(Dense(256, activation='relu'))

        self.add(Dense(num_classes, activation='softmax'))

        self.compile(optimizer=Adam(learning_rate=lr),
                    loss=categorical_crossentropy,
                    metrics=['accuracy'])
        
        self.checkpoint_path = checkpoint_path


model_1 = VGGNet(input_shape, num_classes, weights_1)
model_1.load_weights(model_1.checkpoint_path)

model_2 = VGGNet(input_shape, num_classes, weights_2)
model_2.load_weights(model_2.checkpoint_path)

mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)


def detection_preprocessing(image, h_max=360):
    h, w, _ = image.shape
    if h > h_max:
        ratio = h_max / h
        w_ = int(w * ratio)
        image = cv2.resize(image, (w_,h_max))
    return image

def resize_face(face):
    x = tf.expand_dims(tf.convert_to_tensor(face), axis=2)
    return tf.image.resize(x, (48,48))

def recognition_preprocessing(faces):
    x = tf.convert_to_tensor([resize_face(f) for f in faces])
    return x

def inference(image):
    H, W, _ = image.shape
    
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_detection.process(rgb_image)

    if results.detections:
        faces = []
        pos = []
        for detection in results.detections:
            box = detection.location_data.relative_bounding_box
            # mp_drawing.draw_detection(image, detection)

            x = int(box.xmin * W)
            y = int(box.ymin * H)
            w = int(box.width * W)
            h = int(box.height * H)

            x1 = max(0, x)
            y1 = max(0, y)
            x2 = min(x + w, W)
            y2 = min(y + h, H)

            face = image[y1:y2,x1:x2]
            face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
            faces.append(face)
            pos.append((x1, y1, x2, y2))
    
        x = recognition_preprocessing(faces)

        y_1 = model_1.predict(x)
        y_2 = model_2.predict(x)
        l = np.argmax(y_1+y_2, axis=1)

        for i in range(len(faces)):
            cv2.rectangle(image, (pos[i][0],pos[i][1]),
                            (pos[i][2],pos[i][3]), emotions[l[i]][1], 2, lineType=cv2.LINE_AA)
            
            cv2.rectangle(image, (pos[i][0],pos[i][1]-20),
                            (pos[i][2]+20,pos[i][1]), emotions[l[i]][1], -1, lineType=cv2.LINE_AA)
            
            cv2.putText(image, f'{emotions[l[i]][0]}', (pos[i][0],pos[i][1]-5),
                            0, 0.6, emotions[l[i]][2], 2, lineType=cv2.LINE_AA)
    
    return image


# Video 감정분석
def onTrackbarChange(pos, *userdata):
    # 트랙바 위치를 기준으로 비디오 캡처 위치 설정
    cv2.setTrackbarPos("Position", "Emotion Detection", pos)
    cap.set(cv2.CAP_PROP_POS_FRAMES, pos)

video_path = 'C:/Users/user/Downloads/lecture.mp4'
cap = cv2.VideoCapture(video_path)

frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)
target_h = 360
target_w = int(target_h * frame_width / frame_height)

out = cv2.VideoWriter('run/out.avi', cv2.VideoWriter_fourcc('M','J','P','G'), fps, (target_w, target_h))

paused = False

# 비디오 재생을 제어할 트랙바 생성
cv2.namedWindow('Emotion Detection')
cv2.createTrackbar("Position", "Emotion Detection", 0, int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), onTrackbarChange)


# 감정별 카운트 초기화
emotion_counts = {emotion_label: 0 for emotion_label in emotions}


while True:
    if not paused:
        success, image = cap.read()
        if success:
            # 'inference' 이미지 가정 추정
            result = inference(image)

            # 얼굴과 감정 분석
            emotion_predictions = []  # 각 얼굴에 대한 감정 분석 결과를 리스트로 저장
            faces = []  # 얼굴 이미지를 저장하는 리스트 추가
            
            for i in range(len(faces)):
                x = recognition_preprocessing([faces[i]])
                y_1 = model_1.predict(x)
                y_2 = model_2.predict(x)
                l = np.argmax(y_1 + y_2, axis=1)
                emotion_predictions.append(l[0])

                # 각 얼굴에 대한 감정 카운트 증가
                emotion_counts[l[0]] += 1
            
            # 처리된 프레임을 출력 비디오에 기록
            out.write(result)
            
            # 원 프레임 표시
            cv2.imshow('Emotion Detection', image)
            
            # 현재 프레임을 기준으로 트랙바 위치 업데이트
            cv2.setTrackbarPos("Position", "Emotion Detection", int(cap.get(cv2.CAP_PROP_POS_FRAMES)))
            
            # 비디오 중지 'p' 키
            key = cv2.waitKey(1) & 0xFF
            if key == ord('p'):
                paused = not paused
            elif key == ord('q'):
                break
        else:
            break
    else:
        # 일시 중지된 경우 마지막 프레임만 표시
        cv2.imshow('Emotion Detection', image)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('p'):
            paused = not paused
        elif key == ord('q'):
            break


# 최종 결과 출력
print("감정 분석 결과:")
for label, count in emotion_counts.items():
    emotion_name = emotions[label][0]
    print(f"{emotion_name}: {count} 번")

cap.release()
out.release()
cv2.destroyAllWindows()

감정 분석 결과:
Angry: 0 번
Disgust: 0 번
Fear: 0 번
Happy: 0 번
Sad: 0 번
Surprise: 0 번
Neutral: 0 번


In [None]:
# ValueError: Layer count mismatch when loading weights from file. 에러 해결방법 - 모델 다시 저장
model.save_weights('fer2013_model_weights.h5')

NameError: name 'model' is not defined

In [None]:
import cv2
import mediapipe as mp
import numpy as np

# 미디어파이프 초기화
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

# Face Detection 모델 초기화
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)

# 감정 레이블
emotions = {
    0: 'Angry',
    1: 'Disgust',
    2: 'Fear',
    3: 'Happy',
    4: 'Sad',
    5: 'Surprise',
    6: 'Neutral'
}

# 감정 분석 함수
def analyze_emotion(face_image):
    # 여기에 감정 분석 모델을 적용하는 코드를 추가하세요.
    # 예를 들어, TensorFlow를 사용하여 감정을 분석할 수 있습니다.

    # 이 함수에서는 단순히 랜덤하게 감정을 선택하는 것으로 대체합니다.
    return emotions[np.random.randint(0, len(emotions))]

# 비디오 캡처 초기화
cap = cv2.VideoCapture('C:/Users/user/Downloads/lecture.mp4')

# 감정 결과 카운트 딕셔너리
emotion_counts = {emotion: 0 for emotion in emotions.values()}

while cap.isOpened():
    success, image = cap.read()
    if not success:
        continue

    # 미디어파이프 Face Detection 적용
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_detection.process(rgb_image)

    # 감정 분석 결과 출력
    if results.detections:
        for detection in results.detections:
            bboxC = detection.location_data.relative_bounding_box
            ih, iw, _ = image.shape
            bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), \
                   int(bboxC.width * iw), int(bboxC.height * ih)
            
            face_image = image[bbox[1]:bbox[1]+bbox[3], bbox[0]:bbox[0]+bbox[2]]

            # 감정 분석 결과
            emotion_result = analyze_emotion(face_image)

            # 결과 출력
            cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 0), 2)
            cv2.putText(image, f'Emotion: {emotion_result}', (bbox[0], bbox[1] - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

            # 감정 결과 카운트 업데이트
            emotion_counts[emotion_result] += 1

    # 화면에 표시
    cv2.imshow('Emotion Analysis', image)

    # 종료 키 확인
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 종료 시 감정 결과 카운트 출력
print("감정 결과 카운트:")
for emotion, count in emotion_counts.items():
    print(f"{emotion}: {count}")

# 종료
cap.release()
cv2.destroyAllWindows()

감정 결과 카운트:
Angry: 27
Disgust: 16
Fear: 25
Happy: 25
Sad: 12
Surprise: 17
Neutral: 12


In [None]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

# 미디어파이프 초기화
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

# Face Detection 모델 초기화
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)

# 감정 레이블
emotions = {
    0: 'Angry',
    1: 'Disgust',
    2: 'Fear',
    3: 'Happy',
    4: 'Sad',
    5: 'Surprise',
    6: 'Neutral'
}

# 감정 분석 모델
def create_model(input_shape):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(7, activation='softmax'))
    return model

# 모델 생성
model = create_model((48, 48, 1))

# 모델 컴파일
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 모델 로드 (훈련된 가중치)
model.load_weights('fer2013_model_weights.h5')

# 비디오 캡처 초기화
cap = cv2.VideoCapture('C:/Users/user/Downloads/lecture.mp4')

while cap.isOpened():
    success, image = cap.read()
    if not success:
        break

     # 이미지 확인
    if image is None or image.size == 0:
        continue  # 이미지가 비어 있으면 다음 프레임으로 넘어갑니다.

    # 이미지 차원 확인
    if image.shape[0] == 0 or image.shape[1] == 0:
        continue  # 이미지의 높이 또는 너비가 0이면 다음 프레임으로 넘어갑니다.

    # 미디어파이프 Face Detection 적용
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_detection.process(rgb_image)

    # 감정 분석 결과 출력
    if results.detections:
        for detection in results.detections:
            bboxC = detection.location_data.relative_bounding_box
            ih, iw, _ = image.shape
            bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), \
                   int(bboxC.width * iw), int(bboxC.height * ih)
            
            face_image = cv2.cvtColor(image[bbox[1]:bbox[1]+bbox[3], bbox[0]:bbox[0]+bbox[2]], cv2.COLOR_BGR2GRAY)
            face_image = cv2.resize(face_image, (48, 48))
            face_image = np.expand_dims(face_image, axis=-1)
            face_image = np.expand_dims(face_image, axis=0)

            # 감정 예측
            predictions = model.predict(face_image)
            emotion_label = np.argmax(predictions)
            emotion_result = emotions[emotion_label]

            # 결과 출력
            cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 0), 2)
            cv2.putText(image, f'Emotion: {emotion_result}', (bbox[0], bbox[1] - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # 화면에 표시
    cv2.imshow('Emotion Analysis', image)

    # 종료 키 확인
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 종료
cap.release()
cv2.destroyAllWindows()



error: OpenCV(4.8.1) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


: 

In [None]:
# ValueError: Layer count mismatch when loading weights from file. 에러 해결방법 - 모델 다시 저장
model.save_weights('fer2013_model_weights.h5')

In [1]:
# 최신

import tensorflow as tf
import numpy as np
import cv2
import mediapipe as mp
import time
import glob
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Dropout, Flatten
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam


emotions = {
    0: ['Angry', (0,0,255), (255,255,255)],
    1: ['Disgust', (0,102,0), (255,255,255)],
    2: ['Fear', (255,255,153), (0,51,51)],
    3: ['Happy', (153,0,153), (255,255,255)],
    4: ['Sad', (255,0,0), (255,255,255)],
    5: ['Surprise', (0,255,0), (255,255,255)],
    6: ['Neutral', (160,160,160), (255,255,255)]
}
num_classes = len(emotions)
input_shape = (48, 48, 1)
weights_1 = 'vggnet.h5'
weights_2 = 'vggnet_up.h5'


class VGGNet(Sequential):
    def __init__(self, input_shape, num_classes, checkpoint_path, lr=1e-3):
        super().__init__()
        self.add(Rescaling(1./255, input_shape=input_shape))
        self.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'))
        self.add(BatchNormalization())
        self.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.5))

        self.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.4))

        self.add(Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.5))

        self.add(Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.4))

        self.add(Flatten())
        
        self.add(Dense(1024, activation='relu'))
        self.add(Dropout(0.5))
        self.add(Dense(256, activation='relu'))

        self.add(Dense(num_classes, activation='softmax'))

        self.compile(optimizer=Adam(learning_rate=lr),
                    loss=categorical_crossentropy,
                    metrics=['accuracy'])
        
        self.checkpoint_path = checkpoint_path


model_1 = VGGNet(input_shape, num_classes, weights_1)
model_1.load_weights(model_1.checkpoint_path)

model_2 = VGGNet(input_shape, num_classes, weights_2)
model_2.load_weights(model_2.checkpoint_path)

mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)


def detection_preprocessing(image, h_max=360):
    h, w, _ = image.shape
    if h > h_max:
        ratio = h_max / h
        w_ = int(w * ratio)
        image = cv2.resize(image, (w_,h_max))
    return image

def resize_face(face):
    x = tf.expand_dims(tf.convert_to_tensor(face), axis=2)
    return tf.image.resize(x, (48,48))

def recognition_preprocessing(faces):
    x = tf.convert_to_tensor([resize_face(f) for f in faces])
    return x

def inference(image):
    H, W, _ = image.shape
    
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_detection.process(rgb_image)

    faces = []  # 빈 리스트로 초기화

    if results.detections:
        pos = []
        for detection in results.detections:
            box = detection.location_data.relative_bounding_box
            x = int(box.xmin * W)
            y = int(box.ymin * H)
            w = int(box.width * W)
            h = int(box.height * H)

            x1 = max(0, x)
            y1 = max(0, y)
            x2 = min(x + w, W)
            y2 = min(y + h, H)

            face = image[y1:y2, x1:x2]
            face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
            faces.append(face)
            pos.append((x1, y1, x2, y2))
    
        x = recognition_preprocessing(faces)

        y_1 = model_1.predict(x)
        y_2 = model_2.predict(x)
        l = np.argmax(y_1 + y_2, axis=1)

        for i in range(len(faces)):
            cv2.rectangle(image, (pos[i][0], pos[i][1]),
                          (pos[i][2], pos[i][3]), emotions[l[i]][1], 2, lineType=cv2.LINE_AA)

            cv2.rectangle(image, (pos[i][0], pos[i][1]-20),
                          (pos[i][2]+20, pos[i][1]), emotions[l[i]][1], -1, lineType=cv2.LINE_AA)

            cv2.putText(image, f'{emotions[l[i]][0]}', (pos[i][0], pos[i][1]-5),
                        0, 0.6, emotions[l[i]][2], 2, lineType=cv2.LINE_AA)
    
    return image, faces


# Video 감정분석
def onTrackbarChange(pos, *userdata):
    # 트랙바 위치를 기준으로 비디오 캡처 위치 설정
    cv2.setTrackbarPos("Position", "Emotion Detection", pos)
    cap.set(cv2.CAP_PROP_POS_FRAMES, pos)

video_path = 'C:/Users/user/Downloads/test_media/01강유리수의소수표현(1)_EBS중학뉴런수학2(상).mp4'
cap = cv2.VideoCapture(video_path)

frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)
target_h = 360
target_w = int(target_h * frame_width / frame_height)

out = cv2.VideoWriter('run/out.avi', cv2.VideoWriter_fourcc('M','J','P','G'), fps, (target_w, target_h))

paused = False

# 비디오 재생을 제어할 트랙바 생성
cv2.namedWindow('Emotion Detection')
cv2.createTrackbar("Position", "Emotion Detection", 0, int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), onTrackbarChange)


# 감정별 카운트 초기화
emotion_counts = {emotion_label: 0 for emotion_label in emotions}


while True:
    if not paused:
        success, image = cap.read()
        if success:
            # 'inference' 이미지 가정 추정
            result, faces = inference(image)  # 'inference' 함수에서 얼굴을 가져옵니다

            # 얼굴과 감정 분석
            emotion_predictions = []  # 각 얼굴에 대한 감정 분석 결과를 리스트로 저장

            for i in range(len(faces)):
                x = recognition_preprocessing([faces[i]])
                y_1 = model_1.predict(x)
                y_2 = model_2.predict(x)
                l = np.argmax(y_1 + y_2, axis=1)
                emotion_predictions.append(l[0])

                # 각 얼굴에 대한 감정 카운트 증가
                emotion_counts[l[0]] += 1
            
            # 처리된 프레임을 출력 비디오에 기록
            out.write(result)
            
            # 원 프레임 표시
            cv2.imshow('Emotion Detection', image)
            
            # 현재 프레임을 기준으로 트랙바 위치 업데이트
            cv2.setTrackbarPos("Position", "Emotion Detection", int(cap.get(cv2.CAP_PROP_POS_FRAMES)))
            
            # 비디오 중지 'p' 키
            key = cv2.waitKey(1) & 0xFF
            if key == ord('p'):
                paused = not paused
            elif key == ord('q'):
                break
        else:
            break
    else:
        # 일시 중지된 경우 마지막 프레임만 표시
        cv2.imshow('Emotion Detection', image)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('p'):
            paused = not paused
        elif key == ord('q'):
            break


# 감정 분석 결과를 그래프로 나타내기
emotion_names = [emotions[label][0] for label in emotion_counts.keys()]
emotion_values = list(emotion_counts.values())

# 감정 색상 값을 0에서 1로 정규화
colors = [(emotions[label][1][0]/255, emotions[label][1][1]/255, emotions[label][1][2]/255) for label in emotion_counts.keys()]

plt.figure(figsize=(10, 6))
plt.bar(emotion_names, emotion_values, color=colors)
plt.xlabel('Emotion')
plt.ylabel('Count')
plt.title('Emotion Analysis Results')
plt.show()

cap.release()
out.release()
cv2.destroyAllWindows()



error: OpenCV(4.8.1) D:\a\opencv-python\opencv-python\opencv\modules\highgui\src\window.cpp:766: error: (-215:Assertion failed) trackbar in function 'cv::setTrackbarPos'
