In [2]:
import os
from PIL import Image
from torchvision import transforms
from tqdm import tqdm  # For progress tracking

# Set directories
DATASET_DIR = "./dataset/train/images"
PROCESSED_DIR = "./dataset/train/processed"

# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Preprocess images
def preprocess_images(input_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for item in tqdm(os.listdir(input_dir), desc="Processing items"):
        item_path = os.path.join(input_dir, item)

        # Handle directories
        if os.path.isdir(item_path):
            output_folder = os.path.join(output_dir, item)
            os.makedirs(output_folder, exist_ok=True)

            for img_file in os.listdir(item_path):
                img_path = os.path.join(item_path, img_file)
                process_and_save_image(img_path, output_folder)
        # Handle files directly in the parent directory
        elif os.path.isfile(item_path):
            process_and_save_image(item_path, output_dir)

def process_and_save_image(img_path, output_dir):
    try:
        img = Image.open(img_path).convert("RGB")  # Open image
        processed_img = transform(img)  # Apply transformations
        processed_img_pil = transforms.ToPILImage()(processed_img)  # Convert tensor back to PIL image
        output_path = os.path.join(output_dir, os.path.basename(img_path))
        processed_img_pil.save(output_path)  # Save the processed image
    except Exception as e:
        print(f"Error processing {img_path}: {e}")

preprocess_images(DATASET_DIR, PROCESSED_DIR)


Processing items: 100%|████████████████████████████████████████████████████████████| 3546/3546 [00:56<00:00, 62.38it/s]
