In [1]:
import os

In [2]:
INPUT_DIR = "/kaggle/input/affectnet-fnal"

In [3]:
def analyze_dataset(dataset_dir):
    """
    Analyze the dataset to calculate total images, maximum class count,
    and how many more images are needed to balance the dataset.

    Args:
        dataset_dir (str): Path to the dataset directory.

    Returns:
        dict: A dictionary with the following keys:
              - class_counts: Number of images per class.
              - total_images: Total number of images in the dataset.
              - max_class_count: Maximum number of images in a class.
              - augment_needed: Number of augmentations needed per class to balance.
    """
    class_dirs = [os.path.join(dataset_dir, d) for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d))]
    class_counts = {}
    augment_needed = {}

    total_images = 0
    max_class_count = 0

    for class_dir in class_dirs:
        class_name = os.path.basename(class_dir)
        images = [f for f in os.listdir(class_dir) if f.endswith(('.jpg', '.png'))]
        image_count = len(images)
        class_counts[class_name] = image_count
        total_images += image_count

        if image_count > max_class_count:
            max_class_count = image_count

    # Calculate augmentations needed for each class
    for class_name, count in class_counts.items():
        augment_needed[class_name] = max_class_count - count

    # Print summary
    print("Dataset Analysis:")
    print("-----------------")
    for class_name, count in class_counts.items():
        print(f"Class '{class_name}': {count} images (Needs {augment_needed[class_name]} more to balance)")
    print(f"\nTotal images in dataset: {total_images}")
    print(f"Largest class size: {max_class_count}")
    print(f"Total augmentations needed: {sum(augment_needed.values())}")

    return {
        "class_counts": class_counts,
        "total_images": total_images,
        "max_class_count": max_class_count,
        "augment_needed": augment_needed,
    }


In [4]:
# Directory paths for Train and Test datasets
train_dir = os.path.join(INPUT_DIR, "Train")
test_dir = os.path.join(INPUT_DIR, "Test")

print("Analyzing Train dataset...")
train_analysis = analyze_dataset(train_dir)

print("\nAnalyzing Test dataset...")
test_analysis = analyze_dataset(test_dir)


Analyzing Train dataset...
Dataset Analysis:
-----------------
Class 'surprise': 1709 images (Needs 621 more to balance)
Class 'fear': 1709 images (Needs 621 more to balance)
Class 'neutral': 1846 images (Needs 484 more to balance)
Class 'sad': 1709 images (Needs 621 more to balance)
Class 'happy': 2330 images (Needs 0 more to balance)
Class 'anger': 1709 images (Needs 621 more to balance)

Total images in dataset: 11012
Largest class size: 2330
Total augmentations needed: 2968

Analyzing Test dataset...
Dataset Analysis:
-----------------
Class 'surprise': 742 images (Needs 250 more to balance)
Class 'fear': 742 images (Needs 250 more to balance)
Class 'neutral': 798 images (Needs 194 more to balance)
Class 'sad': 802 images (Needs 190 more to balance)
Class 'happy': 992 images (Needs 0 more to balance)
Class 'anger': 742 images (Needs 250 more to balance)

Total images in dataset: 4818
Largest class size: 992
Total augmentations needed: 1134
