In [33]:
import os
import shutil
import random
from PIL import Image
import torch
from torchvision import transforms
from tqdm import tqdm

# Define paths
base_dir = "../DATA_PREPARE_ATT_02/AffectNet"  # Replace with your dataset directory
output_dir = "AffPreProcessed"

# Define resolutions and transformations
resolution = (260, 260)
transform = transforms.Compose([
    transforms.Resize(resolution),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor()
])

# Helper function to create directory structure
def create_dir_structure(base_dir, output_dir):
    for folder in ["train", "val", "test"]:
        for category in os.listdir(os.path.join(base_dir, folder)):
            os.makedirs(os.path.join(output_dir, folder, category), exist_ok=True)

# Function to convert tensor back to a PIL image
def tensor_to_pil(tensor):
    return transforms.ToPILImage()(tensor)

# Function to preprocess and shuffle dataset
def preprocess_and_shuffle(base_dir, output_dir, transform, train_ratio=0.8, val_ratio=0.1, test_ratio=0.1):
    create_dir_structure(base_dir, output_dir)
    for folder in ["train", "val", "test"]:
        for category in os.listdir(os.path.join(base_dir, folder)):
            images = os.listdir(os.path.join(base_dir, folder, category))
            random.shuffle(images)
            
            # Split into train, val, and test
            total = len(images)
            train_split = int(train_ratio * total)
            val_split = train_split + int(val_ratio * total)
            
            for i, img_name in enumerate(tqdm(images, desc=f"Processing {folder}/{category}")):
                img_path = os.path.join(base_dir, folder, category, img_name)
                img = Image.open(img_path).convert('RGB')
                processed_tensor = transform(img)
                processed_img = tensor_to_pil(processed_tensor)  # Convert tensor to PIL image
                
                # Determine target split
                if i < train_split:
                    target_folder = "train"
                elif i < val_split:
                    target_folder = "val"
                else:
                    target_folder = "test"
                
                # Save image with unique ID
                new_name = f"{category}{random.randint(10000000, 99999999)}.png"
                target_path = os.path.join(output_dir, target_folder, category, new_name)
                processed_img.save(target_path)

# Run the preprocessing
preprocess_and_shuffle(base_dir, output_dir, transform)


Processing train/0: 100%|██████████| 5000/5000 [03:32<00:00, 23.58it/s]
Processing train/1: 100%|██████████| 5000/5000 [03:30<00:00, 23.79it/s]
Processing train/2: 100%|██████████| 5000/5000 [03:12<00:00, 25.96it/s]
Processing train/3: 100%|██████████| 5000/5000 [03:29<00:00, 23.81it/s]
Processing train/4: 100%|██████████| 5000/5000 [03:08<00:00, 26.51it/s]
Processing train/5: 100%|██████████| 5000/5000 [03:21<00:00, 24.82it/s]
Processing train/6: 100%|██████████| 5000/5000 [03:15<00:00, 25.52it/s]
Processing train/7: 100%|██████████| 5000/5000 [04:02<00:00, 20.66it/s]
Processing val/0: 100%|██████████| 100/100 [00:02<00:00, 41.74it/s]
Processing val/1: 100%|██████████| 100/100 [00:02<00:00, 42.04it/s]
Processing val/2: 100%|██████████| 100/100 [00:02<00:00, 33.44it/s]
Processing val/3: 100%|██████████| 100/100 [00:02<00:00, 38.94it/s]
Processing val/4: 100%|██████████| 100/100 [00:02<00:00, 41.14it/s]
Processing val/5: 100%|██████████| 100/100 [00:02<00:00, 43.70it/s]
Processing val/6