In [1]:
import os
import numpy as np
import cv2
import torch
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.metrics import Precision, Recall
from flask import Flask, Response, render_template

app = Flask(__name__)

# Path to the YOLOv9 file
yolo_weights_path = '/home/nvinhhung/yolo/yolo9_face/yolov9-face-detection/yolov9/best.pt'

# Check for the existence of the YOLOv9 file
if not os.path.exists(yolo_weights_path):
    print("Error: YOLO weights file path does not exist.")

# Load the YOLOv9 model with PyTorch
yolo_model = torch.hub.load('/home/nvinhhung/yolo/yolo9_face/yolov9-face-detection/yolov9', 'custom', path=yolo_weights_path, source='local')

# Load the trained model for emotions
model_path = '/home/nvinhhung/best_model_2.keras'
if not os.path.exists(model_path):
    print("Error: Model file path does not exist.")

emotion_model = load_model(model_path)
emotion_model.compile(optimizer=Adamax(learning_rate=0.001),
                      loss='categorical_crossentropy',
                      metrics=['accuracy', Precision(), Recall()])

# List of Emotion Labels
emotion_labels = ['anger', 'disgust', 'fear', 'happy', 'pain', 'sad']

# Create a video stream and perform emotion prediction
def generate_frames():
    cap = cv2.VideoCapture(0)  # Use the default camera
    if not cap.isOpened():
        print("Error: Could not open video capture.")
        return

    try:
        while True:
            success, frame = cap.read()
            if not success:
                print("Error: Could not read frame from video capture.")
                break

            # Convert the frame to a tensor
            frame_tensor = torch.from_numpy(frame).permute(2, 0, 1).float()  # Từ HWC sang CHW
            frame_tensor /= 255.0  # Chuẩn hóa giá trị pixel
            results = yolo_model(frame_tensor.unsqueeze(0))  # Thêm chiều batch

            # Check and process detection results
            if isinstance(results, list) and len(results) > 0:
                detections = results[0]  # Lấy đối tượng đầu tiên
                detections = detections.cpu().numpy() if torch.is_tensor(detections) else detections
            else:
                detections = np.empty((0, 6))  # If no results are found, return an empty array

            # Filter detection for person (class_id = 0 for person in the COCO dataset)
            persons = []
            for detection in detections:
                if len(detection) < 6:  # Ensure that the detection has enough information
                    continue
                *box, conf, class_id = detection
                if int(class_id) == 0 and conf > 0.5:  # "class_id 0 is 'person' in COCO."
                    x, y, x2, y2 = map(int, box)
                    w, h = x2 - x, y2 - y
                    persons.append((x, y, w, h))

            # Predict emotions for each detected person
            for (x, y, w, h) in persons:
                person_roi = frame[y:y+h, x:x+w]

                # Convert to RGB and resize for the emotion model
                face = cv2.resize(person_roi, (299, 299))  # Ensure the input size is correct.
                face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)  # Convert to RGB
                face = img_to_array(face) / 255.0  # standardization
                face = np.expand_dims(face, axis=0)

                # Emotion prediction
                prediction = emotion_model.predict(face)
                emotion_index = np.argmax(prediction)
                emotion = emotion_labels[emotion_index]

                # Draw emotional labels on the face.
                cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
                cv2.putText(frame, emotion, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

            # Convert the frame to JPEG
            ret, buffer = cv2.imencode('.jpg', frame)
            if not ret:
                print("Error: Could not encode frame to JPEG.")
                break

            frame = buffer.tobytes()
            yield (b'--frame\r\n'
                   b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')

    finally:
        cap.release()

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/video_feed')
def video_feed():
    return Response(generate_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)


YOLOv5 🚀 2024-11-1 Python-3.6.9 torch-1.10.1+cu102 CPU





Fusing layers... 
yolov9-c summary: 604 layers, 50698278 parameters, 0 gradients
Adding AutoShape... 


 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on all addresses.
 * Running on http://192.168.95.177:5000/ (Press CTRL+C to quit)
192.168.96.99 - - [01/Nov/2024 15:20:11] "GET / HTTP/1.1" 200 -
192.168.96.99 - - [01/Nov/2024 15:20:14] "GET /video_feed HTTP/1.1" 200 -
