In [24]:
import cv2
import csv
import os
import time
from ultralytics import YOLO
import mediapipe as mp

# Load YOLO emotion model
emotion_model = YOLO("runs/detect/train2/weights/last.pt")

# Mapping cảm xúc từ data.yaml
emotion_mapping = {
    "Anger": 0,
    "Contempt": 1,
    "Disgust": 2,
    "Fear": 3,
    "Happy": 4,
    "Neutral": 5,
    "Sad": 6,
    "Surprise": 7,
}


In [25]:
def get_last_stt(csv_path):
    """Reads the CSV file and retrieves the last STT value."""
    if not os.path.exists(csv_path):
        return 0
    
    with open(csv_path, "r") as csv_file:
        csv_reader = csv.reader(csv_file)
        rows = list(csv_reader)
        if len(rows) <= 1:  # No data rows found (only header or empty)
            return 0
        return int(rows[-1][0])  # Get STT from the last row

In [26]:
def create_directory_structure(base_path, sub_dir_name):
    data_path = os.path.join(base_path, "Data")
    sub_path = os.path.join(data_path, sub_dir_name)
    images_path = os.path.join(sub_path, "images")
    labels_path = os.path.join(sub_path, "labels")

    os.makedirs(images_path, exist_ok=True)
    os.makedirs(labels_path, exist_ok=True)

    return data_path, images_path, labels_path

In [27]:
def realtime_camera_with_labels(video_capture, images_path, labels_path, sub_dir_name, start_stt, num_images=15, delay=1):
    # Initialize MediaPipe Face Detection
    mp_face_detection = mp.solutions.face_detection
    face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.2)
    mp_drawing = mp.solutions.drawing_utils
    
    csv_data = []
    captured_count = 0
    current_stt = start_stt
    start_time = time.time()

    while captured_count < num_images:
        ret, frame = video_capture.read()
        if not ret:
            print("Error capturing video frame.")
            break

        # Convert frame to RGB for MediaPipe processing
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_detection.process(rgb_frame)

        # Display the video feed with bounding boxes (for real-time view)
        frame_with_boxes = frame.copy()

        bounding_boxes = []  # List to store bounding box info

        if results.detections:
            for detection in results.detections:
                # Extract bounding box information
                bboxC = detection.location_data.relative_bounding_box
                ih, iw, _ = frame.shape
                x, y, w, h = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)

                # Draw the bounding box on the frame
                cv2.rectangle(frame_with_boxes, (x, y), (x + w, y + h), (0, 255, 0), 2)

                # Append bounding box data for later processing
                x_center = (x + (x + w)) / 2 / frame.shape[1]
                y_center = (y + (y + h)) / 2 / frame.shape[0]
                width = w / frame.shape[1]
                height = h / frame.shape[0]
                bounding_boxes.append((x_center, y_center, width, height))

        # Display the video feed with bounding boxes
        cv2.imshow("Realtime Camera", frame_with_boxes)

        # Capture image and save labels every `delay` seconds
        if time.time() - start_time >= delay:
            captured_count += 1
            current_stt += 1
            image_name = f"image_{sub_dir_name}_{captured_count}.jpg"
            image_path = os.path.join(images_path, image_name)

            # Save the original frame without bounding boxes
            cv2.imwrite(image_path, frame)

            label_name = f"image_{sub_dir_name}_{captured_count}.txt"
            label_path = os.path.join(labels_path, label_name)

            # Save bounding box data to label file
            with open(label_path, "w") as label_file:
                for (x_center, y_center, width, height) in bounding_boxes:
                    label_file.write(f"0 {x_center} {y_center} {width} {height}\n")  # 0 is placeholder for class (no emotion recognition here)

            csv_data.append([current_stt, image_name, sub_dir_name])
            start_time = time.time()  # Reset the timer

        # Press 'q' to quit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    return csv_data

In [28]:
import cv2

def yolo_detect_labels(frame, label_path, emotion_model, emotion_mapping):
    """
    Detect emotions using YOLO and update existing label file with emotion classes
    """
    # Đọc bounding boxes từ file label cũ
    existing_boxes = []
    with open(label_path, 'r') as f:
        for line in f:
            _, x, y, w, h = map(float, line.strip().split())
            existing_boxes.append((x, y, w, h))
    
    # Chạy YOLO detection
    results = emotion_model.predict(source=frame, save=False)
    
    # Chuẩn bị labels mới
    new_labels = []
    
    # Với mỗi box đã có
    for box in existing_boxes:
        x_center, y_center, width, height = box
        
        # Lấy kết quả emotion detection
        if len(results) > 0 and len(results[0].boxes) > 0:
            # Lấy emotion class đầu tiên được detect
            emotion_class = int(results[0].boxes[0].cls[0])
            # Thêm vào list labels mới
            new_labels.append(f"{emotion_class} {x_center} {y_center} {width} {height}")
        else:
            # Nếu không detect được emotion, giữ nguyên class 0
            new_labels.append(f"0 {x_center} {y_center} {width} {height}")
    
    # Ghi đè file label cũ với labels mới
    with open(label_path, 'w') as f:
        for label in new_labels:
            f.write(label + '\n')

In [30]:
def main():
    sub_dir_name = input("Điền thư mục ảnh: ")
    base_path = os.getcwd()
    data_path, images_path, labels_path = create_directory_structure(base_path, sub_dir_name)
    csv_path = os.path.join(data_path, "data.csv")
    start_stt = get_last_stt(csv_path)

    video_capture = cv2.VideoCapture(0)
    if not video_capture.isOpened():
        print("Error: Unable to access the camera.")
        return

    try:
        # Bước 1: Chụp ảnh và lưu bounding boxes ban đầu
        print("Bước 1: Đang chụp ảnh và detect khuôn mặt...")
        csv_data = realtime_camera_with_labels(video_capture, images_path, labels_path, 
                                             sub_dir_name, start_stt)
        
        # Bước 2: Chạy emotion detection và cập nhật labels
        print("\nBước 2: Đang nhận diện cảm xúc...")
        for data in csv_data:
            stt, image_name, _ = data
            
            # Đường dẫn đến ảnh và file label
            image_path = os.path.join(images_path, image_name)
            label_path = os.path.join(labels_path, f"image_{sub_dir_name}_{stt}.txt")
            
            # Đọc ảnh
            frame = cv2.imread(image_path)
            if frame is None:
                print(f"Không thể đọc ảnh: {image_path}")
                continue
                
            # Cập nhật labels với emotion
            print(f"Đang xử lý ảnh {image_name}...")
            yolo_detect_labels(frame, label_path, emotion_model, emotion_mapping)

        # Bước 3: Lưu thông tin vào CSV
        print("\nBước 3: Đang lưu thông tin vào CSV...")
        with open(csv_path, "a", newline="") as csv_file:
            csv_writer = csv.writer(csv_file)
            if start_stt == 0:
                csv_writer.writerow(["stt", "file", "path"])
            csv_writer.writerows(csv_data)

        print(f"\nHoàn thành! CSV đã được lưu tại {csv_path}")

    finally:
        video_capture.release()
        cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

Bước 1: Đang chụp ảnh và detect khuôn mặt...

Bước 2: Đang nhận diện cảm xúc...
Đang xử lý ảnh image_123_1.jpg...

0: 480x640 1 Neutral, 59.0ms
Speed: 2.0ms preprocess, 59.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Đang xử lý ảnh image_123_2.jpg...

0: 480x640 1 Neutral, 60.0ms
Speed: 2.0ms preprocess, 60.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Đang xử lý ảnh image_123_3.jpg...

0: 480x640 1 Neutral, 94.0ms
Speed: 1.0ms preprocess, 94.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Đang xử lý ảnh image_123_4.jpg...

0: 480x640 1 Neutral, 56.0ms
Speed: 2.0ms preprocess, 56.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Đang xử lý ảnh image_123_5.jpg...

0: 480x640 1 Neutral, 58.0ms
Speed: 1.0ms preprocess, 58.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Đang xử lý ảnh image_123_6.jpg...

0: 480x640 1 Neutral, 57.0ms
Speed: 2.0ms preprocess, 57.0ms inference, 1.0ms po

In [1]:
from PIL import Image
import os

def convert_images_to_png(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for filename in os.listdir(input_folder):
        input_path = os.path.join(input_folder, filename)
        if os.path.isfile(input_path):
            try:
                with Image.open(input_path) as img:
                    output_path = os.path.join(output_folder, os.path.splitext(filename)[0] + ".png")
                    img.save(output_path, "PNG")
                    print(f"Converted: {filename} -> {output_path}")
            except Exception as e:
                print(f"Failed to convert {filename}: {e}")

# Thay đổi đường dẫn thư mục đầu vào và đầu ra
input_folder = "D:/Downloads/Compressed/Train/FEC_dataset/all_images"  # Thư mục chứa ảnh gốc
output_folder = "D:/Downloads/Compressed/Train/FEC_dataset/images"  # Thư mục lưu ảnh PNG

convert_images_to_png(input_folder, output_folder)

Converted: $_1.JPG -> D:/Downloads/Compressed/Train/FEC_dataset/images\$_1.png
Converted: %25D0%2592%25D0%25BE%25D0%25BB%25D0%25B5%25D0%25BD-%25D0%25A1%25D0%25B8%25D0%25B4%25D0%25B5%25D1%2580%25D0%25BE%25D0%25B2-2.jpg -> D:/Downloads/Compressed/Train/FEC_dataset/images\%25D0%2592%25D0%25BE%25D0%25BB%25D0%25B5%25D0%25BD-%25D0%25A1%25D0%25B8%25D0%25B4%25D0%25B5%25D1%2580%25D0%25BE%25D0%25B2-2.png
Converted: %25D0%25B1%25D1%2580%25D0%25B0%25D0%25B9%25D0%25B0%25D0%25BD.jpg -> D:/Downloads/Compressed/Train/FEC_dataset/images\%25D0%25B1%25D1%2580%25D0%25B0%25D0%25B9%25D0%25B0%25D0%25BD.png
Converted: %25D1%2582%25D0%25BE%25D1%2581%25D0%25B5.jpg -> D:/Downloads/Compressed/Train/FEC_dataset/images\%25D1%2582%25D0%25BE%25D1%2581%25D0%25B5.png
Converted: %25D8%25A8%25D8%25A7%25D9%2586%2B%25D9%2583%25D9%258A%2B%25D9%2585%25D9%2588%25D9%2586.jpg -> D:/Downloads/Compressed/Train/FEC_dataset/images\%25D8%25A8%25D8%25A7%25D9%2586%2B%25D9%2583%25D9%258A%2B%25D9%2585%25D9%2588%25D9%2586.png
Converted: 