In [1]:
import os
import shutil
from sklearn.model_selection import train_test_split


In [4]:
import os

def count_images_in_directory(directory):
    """Count image files in a given directory."""
    return sum([len(files) for r, d, files in os.walk(directory) if any(file.endswith(('.png', '.jpg', '.jpeg')) for file in files)])

# Define paths to your original and augmented image directories
original_base_path = '../data/Faces/'
augmented_base_path = '../data/Faces (Augmented)/'

categories = ['Angry', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']  

# Initialize counters
total_raw_images = 0
total_augmented_images = 0

# Count images in each category
for category in categories:
    # Path for the original and augmented images
    original_cat_path = os.path.join(original_base_path, category)
    augmented_cat_path = os.path.join(augmented_base_path, category)
    
    # Count images
    raw_images_count = count_images_in_directory(original_cat_path)
    augmented_images_count = count_images_in_directory(augmented_cat_path)
    
    # Update total counts
    total_raw_images += raw_images_count
    total_augmented_images += augmented_images_count
    
    # Print counts for each category
    print(f"Category '{category}':")
    print(f"  Raw images: {raw_images_count}")
    print(f"  Augmented images: {augmented_images_count}")
    print()

# Print total counts
print("Total number of raw images:", total_raw_images)
print("Total number of augmented images:", total_augmented_images)
print("Total number of images in dataset:", total_raw_images + total_augmented_images)


Category 'Angry':
  Raw images: 31
  Augmented images: 189

Category 'Fear':
  Raw images: 27
  Augmented images: 161

Category 'Happy':
  Raw images: 26
  Augmented images: 154

Category 'Neutral':
  Raw images: 26
  Augmented images: 154

Category 'Sad':
  Raw images: 26
  Augmented images: 154

Category 'Surprise':
  Raw images: 28
  Augmented images: 168

Total number of raw images: 164
Total number of augmented images: 980
Total number of images in dataset: 1144


In [3]:
original_base_path = '../data/Faces/'
augmented_base_path = '../data/Faces (Augmented)/'
output_base_path = '../data/Processed/'


train_size = 0.8

def ensure_directory_exists(directory):
    """Ensure directory exists, if not, create it."""
    if not os.path.exists(directory):
        os.makedirs(directory)

def process_category(category):
    # Directories for original and augmented images
    original_cat_path = os.path.join(original_base_path, category)
    augmented_cat_path = os.path.join(augmented_base_path, category)
    
    # Combined list of images from original and augmented folders
    images = []
    if os.path.exists(original_cat_path):
        images.extend([os.path.join(original_cat_path, f) for f in os.listdir(original_cat_path) if f.endswith(('.png', '.jpg', '.jpeg'))])
    if os.path.exists(augmented_cat_path):
        images.extend([os.path.join(augmented_cat_path, f) for f in os.listdir(augmented_cat_path) if f.endswith(('.png', '.jpg', '.jpeg'))])

    # Output directories for training and validation splits
    train_directory = os.path.join(output_base_path, 'Train', category)
    validation_directory = os.path.join(output_base_path, 'Validation', category)
    ensure_directory_exists(train_directory)
    ensure_directory_exists(validation_directory)
    
    train_files, validation_files = train_test_split(images, train_size=train_size, random_state=42)
    
    # move files to the respective directories
    for file in train_files:
        shutil.copy(file, os.path.join(train_directory, os.path.basename(file)))
    for file in validation_files:
        shutil.copy(file, os.path.join(validation_directory, os.path.basename(file)))


categories = ['Angry', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']  
for category in categories:
    process_category(category)
