In [1]:
import os
import shutil
import uuid
from deepface import DeepFace
from tqdm import tqdm




In [2]:
# ⚠️ Manually update this path before running each dataset
DATASET_FOLDER = r"C:\Users\Tuf\Downloads\Compressed\AffectNet"  # Change this for each dataset

# Define filtered dataset folder (Persistent across runs)
FILTERED_DATASET_FOLDER = "Filtered_Dataset"

# Emotion categories detected by DeepFace
EMOTION_CATEGORIES = ["angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"]

In [3]:
# Step 1: Create the "Filtered_Dataset" folder with emotion subfolders
def initialize_filtered_dataset():
    if not os.path.exists(FILTERED_DATASET_FOLDER):
        os.makedirs(FILTERED_DATASET_FOLDER)

    for emotion in EMOTION_CATEGORIES:
        emotion_folder = os.path.join(FILTERED_DATASET_FOLDER, emotion)
        if not os.path.exists(emotion_folder):
            os.makedirs(emotion_folder)

In [4]:
# Step 2: Recursively find all image files in the dataset
def get_all_images(dataset_path):
    image_paths = []
    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.lower().endswith((".jpg", ".jpeg", ".png")):
                image_paths.append(os.path.join(root, file))
    return image_paths

In [5]:
# Step 3: Process images and move them into categorized folders
def process_images():
    image_paths = get_all_images(DATASET_FOLDER)
    print(f"Found {len(image_paths)} images in the dataset: {DATASET_FOLDER}")

    for img_path in tqdm(image_paths, desc="Processing Images"):
        try:
            # Analyze emotion with enforce_detection=False to prevent errors
            result = DeepFace.analyze(img_path=img_path, actions=['emotion'], enforce_detection=False)

            # Check if a face was detected
            if not result or len(result) == 0:
                print(f"Skipping {img_path}: No face detected.")
                continue  # Skip this image

            # Extract dominant emotion
            dominant_emotion = result[0]['dominant_emotion'].lower()

            # Ensure the emotion is one of the known categories
            if dominant_emotion in EMOTION_CATEGORIES:
                # Define target path
                target_folder = os.path.join(FILTERED_DATASET_FOLDER, dominant_emotion)

                # Prevent duplicate filenames by appending a unique identifier
                unique_name = str(uuid.uuid4())[:8] + "_" + os.path.basename(img_path)
                target_path = os.path.join(target_folder, unique_name)

                # Move image to corresponding folder
                shutil.copy(img_path, target_path)
        
        except Exception as e:
            print(f"Error processing {img_path}: {e}")
            continue  # Skip the current image and proceed

In [6]:
# Run the script
if __name__ == "__main__":
    initialize_filtered_dataset()
    process_images()
    print(f"Processing completed for dataset: {DATASET_FOLDER}")
    print("You can now update DATASET_FOLDER to the next dataset and rerun the script.")

Found 41553 images in the dataset: C:\Users\Tuf\Downloads\Compressed\AffectNet


Processing Images:   1%|          | 289/41553 [01:08<2:42:02,  4.24it/s] 


KeyboardInterrupt: 