In [1]:
import os
import shutil
import random
import csv
import re

def age_to_class(age):
    if 0 <= age <= 2:
        return 0
    if 3 <= age <= 9:
        return 1
    elif 10 <= age <= 19:
        return 2
    elif 20 <= age <= 29:
        return 3
    elif 30 <= age <= 39:
        return 4
    elif 40 <= age <= 49:
        return 5
    elif 50 <= age <= 59:
        return 6
    elif 60 <= age <= 69:
        return 7
    elif age >= 70:
        return 8
    else:
        return None

def extract_number(filename):
    match = re.search(r'\d+', filename)
    return int(match.group()) if match else 0

def create_datasets(source_dir, target_dir_base, n=50):
    # Prepare to collect all eligible images
    all_images = []
    for folder_name in os.listdir(source_dir):
        folder_path = os.path.join(source_dir, folder_name)
        if os.path.isdir(folder_path):
            try:
                age = int(folder_name)
            except ValueError:
                continue
            class_label = age_to_class(age)
            if class_label is not None and 0 <= age <= 90:
                images = [(os.path.join(folder_name, img), class_label, extract_number(img)) for img in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, img))]
                all_images.extend(images)

    # Ensure we have at least 2*n images to split between datasets
    if len(all_images) < 2*n:
        raise ValueError("Not enough images to create two datasets of size n each.")

    # Randomly select 2*n images and then split into two groups
    selected_images = random.sample(all_images, 2*n)
    dataset_1_images = selected_images[:n]
    dataset_2_images = selected_images[n:]

    # Create two datasets
    for dataset_suffix, images_set in zip(['clean_dataset', 'adversarial_dataset'], [dataset_1_images, dataset_2_images]):
        target_dir = os.path.join(target_dir_base, dataset_suffix)
        images_dir = os.path.join(target_dir, 'images')
        os.makedirs(images_dir, exist_ok=True)
        labels_file_path = os.path.join(target_dir, 'labels.csv')

        with open(labels_file_path, 'w', newline='') as csvfile:
            labelwriter = csv.writer(csvfile)
            labelwriter.writerow(['image_name', 'class_label'])

            for img_path, label, _ in images_set:
                target_image_path = os.path.join(images_dir, os.path.basename(img_path))
                shutil.copy(os.path.join(source_dir, img_path), target_image_path)
                labelwriter.writerow([os.path.basename(img_path), label])

source_dir = '/Users/samin/Desktop/Classes/9.60/9.60-Project/face_age'
target_dir_base = '/Users/samin/Desktop/Classes/9.60/9.60-Project/new_datasets'
create_datasets(source_dir, target_dir_base, n=500)
