In [24]:
import os
import cv2
from pathlib import Path
from tqdm.notebook import tqdm

def process_emotion_dataset(input_dir: str, output_dir: str, team_id: str = "08", image_size: tuple = (640, 480)):
    """
    Processes a dataset of facial emotion images and outputs it in the format required by Milestone 1.

    Args:
        input_dir (str): Path to input directory containing class folders.
        output_dir (str): Path to save processed images.
        team_id (str): 2-digit team identifier, e.g., '01', '12'.
        image_size (tuple): Output image size (width, height), default is (640, 480).
    """
    # Ensure team ID is valid
    assert len(team_id) == 2 and team_id.isdigit(), "Team ID must be a 2-digit number string."

    input_dir = Path(input_dir)
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    for class_folder in input_dir.iterdir():
        if not class_folder.is_dir():
            continue

        class_name = class_folder.name.upper()
        class_output_dir = output_dir / class_name
        class_output_dir.mkdir(parents=True, exist_ok=True)

        print(f"Processing class: {class_name}")
        image_count = 0

        for file in tqdm(sorted(class_folder.iterdir())):
            if not file.is_file():
                continue

            # Read image
            img = cv2.imread(str(file))
            if img is None:
                print(f"⚠️ Skipping unreadable image: {file}")
                continue

            # Resize
            img_resized = cv2.resize(img, image_size)

            # Filename: C_T_N.png (C=CLASS, T=team_id, N=4-digit number)
            serial_number = f"{image_count:04d}"
            output_filename = f"{class_name[0]}_{team_id}_{serial_number}.png"
            output_path = class_output_dir / output_filename

            # Save image
            cv2.imwrite(str(output_path), img_resized)
            image_count += 1

        print(f"[{class_name}] ✅ Processed {image_count} images.")

    print("🎉 Dataset preprocessing complete according to Milestone 1 format.")


In [25]:
process_emotion_dataset(
    input_dir="./Project",
    output_dir="./final_dataset",
)

Processing class: ANGRY


  0%|          | 0/115 [00:00<?, ?it/s]

[ANGRY] ✅ Processed 115 images.
Processing class: HAPPY


  0%|          | 0/114 [00:00<?, ?it/s]

[HAPPY] ✅ Processed 114 images.
Processing class: NEUTRAL


  0%|          | 0/112 [00:00<?, ?it/s]

[NEUTRAL] ✅ Processed 112 images.
Processing class: SAD


  0%|          | 0/45 [00:00<?, ?it/s]

[SAD] ✅ Processed 45 images.
🎉 Dataset preprocessing complete according to Milestone 1 format.
