In [1]:
!pip install facenet-pytorch
!pip install torch torchvision


Collecting facenet-pytorch
  Downloading facenet_pytorch-2.6.0-py3-none-any.whl.metadata (12 kB)
Collecting Pillow<10.3.0,>=10.2.0 (from facenet-pytorch)
  Downloading pillow-10.2.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting torch<2.3.0,>=2.2.0 (from facenet-pytorch)
  Downloading torch-2.2.2-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting torchvision<0.18.0,>=0.17.0 (from facenet-pytorch)
  Downloading torchvision-0.17.2-cp310-cp310-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Downloading nvidia_cuda_

In [5]:
!pip uninstall pillow -y
!pip install pillow


Found existing installation: pillow 10.2.0
Uninstalling pillow-10.2.0:
  Successfully uninstalled pillow-10.2.0
Collecting pillow
  Downloading pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.1 kB)
Downloading pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m53.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pillow
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
facenet-pytorch 2.6.0 requires Pillow<10.3.0,>=10.2.0, but you have pillow 11.1.0 which is incompatible.
mlxtend 0.23.3 requires scikit-learn>=1.3.1, but you have scikit-learn 1.2.2 which is incompatible.
plotnine 0.14.4 requires matplotlib>=3.8.0, but you have matplotlib 3.7.5 which is incompatible.[0m[31m
[0mSuccessfully installed pillow-11.1.0


In [None]:
import os
import numpy as np
from PIL import Image
from facenet_pytorch import MTCNN
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from zipfile import ZipFile
import time

# Configuration
INPUT_DIR = "/kaggle/input/affectnet-cleaned"  # Input dataset root (contains Train and Test folders)
OUTPUT_DIR = "/kaggle/working/processed_dataset"  # Directory to save cropped and augmented images
ZIP_FILE = "/kaggle/working/processed_dataset.zip"  # Path to save ZIP file
IMG_HEIGHT, IMG_WIDTH = 331, 331
TARGET_SIZE = None  # Will be calculated based on the largest class size

# GPU Optimization
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Initialize MTCNN for face cropping
mtcnn = MTCNN(keep_all=True, device="cuda")

# Data Augmentation Generator
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True
)

# Step 1: Preprocess Image (Resize Before MTCNN)
def preprocess_image(image_path, target_size=(512, 512)):
    """
    Resize the input image to a smaller size for faster MTCNN processing.

    Args:
        image_path (str): Path to the input image.
        target_size (tuple): Desired size for resizing (width, height).

    Returns:
        PIL.Image: Resized image.
    """
    img = Image.open(image_path).convert("RGB")
    img = img.resize(target_size)
    return img

# Step 2: Crop Face from Images
def crop_face(image_path):
    """
    Detect and crop the largest face in the image using MTCNN.

    Args:
        image_path (str): Path to the input image.

    Returns:
        np.array: Cropped face image resized to IMG_HEIGHT x IMG_WIDTH.
    """
    img = preprocess_image(image_path)
    img_array = np.array(img)
    boxes, _ = mtcnn.detect(img_array)

    if boxes is not None:
        # Select the largest box
        areas = [(x2 - x1) * (y2 - y1) for x1, y1, x2, y2 in boxes]
        largest_box = boxes[np.argmax(areas)].astype(int)
        x1, y1, x2, y2 = largest_box
        cropped = img_array[y1:y2, x1:x2]
        return Image.fromarray(cropped).resize((IMG_WIDTH, IMG_HEIGHT))
    return None

# Step 3: Oversample with Cropping and Augmentation
def process_and_augment(dataset_dir, output_dir, datagen):
    """
    Process and augment a dataset directory (Train or Test) while preserving structure.

    Args:
        dataset_dir (str): Path to the dataset directory.
        output_dir (str): Path to save the processed images.
        datagen (ImageDataGenerator): Data augmentation generator.

    Returns:
        None
    """
    class_dirs = [d for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d))]

    # Analyze class distribution and calculate target size
    global TARGET_SIZE
    class_distribution = {
        class_name: len(os.listdir(os.path.join(dataset_dir, class_name)))
        for class_name in class_dirs
    }
    TARGET_SIZE = max(class_distribution.values())

    print(f"Class Distribution in {dataset_dir} Before Augmentation:", class_distribution)
    print(f"Target samples per class: {TARGET_SIZE}")

    for class_name in class_dirs:
        class_input_dir = os.path.join(dataset_dir, class_name)
        class_output_dir = os.path.join(output_dir, class_name)
        os.makedirs(class_output_dir, exist_ok=True)

        existing_images = os.listdir(class_input_dir)
        num_existing_samples = len(existing_images)
        augment_needed = TARGET_SIZE - num_existing_samples

        print(f"Processing class '{class_name}'...")

        count = 0
        start_time = time.time()

        for img_name in existing_images:
            img_path = os.path.join(class_input_dir, img_name)
            cropped_face = crop_face(img_path)

            if cropped_face is not None:
                # Save the cropped face
                cropped_path = os.path.join(class_output_dir, f"cropped_{img_name}")
                cropped_face.save(cropped_path)

                # Augment if needed
                if augment_needed > 0:
                    cropped_face_array = np.expand_dims(np.array(cropped_face), axis=0)
                    augment_count = 0
                    for batch in datagen.flow(cropped_face_array, batch_size=1, save_to_dir=class_output_dir,
                                              save_prefix="aug", save_format="jpg"):
                        augment_count += 1
                        if augment_count >= augment_needed:
                            break

            count += 1
            if count % 100 == 0:
                elapsed_time = time.time() - start_time
                print(f"Processed {count} images for class '{class_name}' in {elapsed_time:.2f} seconds...")

        print(f"Class '{class_name}' completed. Total images processed: {count}")

# Step 4: Create ZIP Archive
def create_zip(output_dir, zip_file):
    """
    Compress the processed dataset into a ZIP file.

    Args:
        output_dir (str): Path to the processed dataset directory.
        zip_file (str): Path to save the ZIP file.

    Returns:
        None
    """
    print(f"Creating ZIP file at {zip_file}...")
    with ZipFile(zip_file, 'w') as zipf:
        for root, dirs, files in os.walk(output_dir):
            for file in files:
                zipf.write(os.path.join(root, file),
                           os.path.relpath(os.path.join(root, file), output_dir))
    print(f"ZIP file created at {zip_file}")

# Main Workflow
def main():
    # Process Train and Test datasets
    for split in ["Train", "Test"]:
        input_split_dir = os.path.join(INPUT_DIR, split)
        output_split_dir = os.path.join(OUTPUT_DIR, split)
        print(f"Processing {split} dataset...")
        process_and_augment(input_split_dir, output_split_dir, datagen)

    # Create ZIP file for the processed dataset
    create_zip(OUTPUT_DIR, ZIP_FILE)

if __name__ == "__main__":
    main()


Processing Train dataset...
Class Distribution in /kaggle/input/affectnet-cleaned/Train Before Augmentation: {'surprise': 1200, 'fear': 1680, 'neutral': 1919, 'sad': 1787, 'happy': 2356, 'anger': 1470}
Target samples per class: 2356
Processing class 'surprise'...
