In [30]:
import torch
print(torch.cuda.is_available())

True


In [1]:
import os
import shutil
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import random

RAW_DIR = "data/raw"
PROCESSED_DIR = "data/processed"
CATEGORIES = ["fake", "real"]
TRAIN_DIR = os.path.join(PROCESSED_DIR, "train")
TEST_DIR = os.path.join(PROCESSED_DIR, "test")


split_ratio = 0.1
random.seed(42)


BATCH_SIZE = 16
IMAGE_SIZE = (1024, 1024)

resize_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE)
])

augmentation_transforms = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=30),
    transforms.ColorJitter(brightness=0.3, contrast=0.4, saturation=0.3, hue=0.2),
    transforms.ToTensor()
])

save_transform = transforms.ToPILImage()



In [2]:
def process_images(category):    
    raw_path = os.path.join(RAW_DIR, category)
    processed_path = PROCESSED_DIR
    os.makedirs(processed_path, exist_ok=True)

    images = [os.path.join(raw_path, f) for f in os.listdir(raw_path) if f.endswith((".png", ".jpg", ".jpeg"))]

    image_counter = 0
    
    for i in tqdm(range(0, len(images), BATCH_SIZE), desc=f"Processing {category}"):
        batch = images[i:i + BATCH_SIZE]
        for img_path in batch:
            try:
                img = Image.open(img_path).convert("RGB")  # Kép betöltése

                # Save a simple resized version
                resized_img = resize_transform(img)
                resized_save_path = os.path.join(processed_path, f"{category}_{image_counter}.jpg")
                resized_img.save(resized_save_path)

                image_counter += 1

                # Save two augmented versions
                for aug_idx in range(2):
                    augmented_img = augmentation_transforms(img)  # Apply augmentation
                    augmented_img = save_transform(augmented_img)  # Convert tensor to PIL image
                    aug_save_path = os.path.join(processed_path, f"{category}_{image_counter}.jpg")
                    augmented_img.save(aug_save_path)
                    image_counter += 1

            except Exception as e:
                print(f"Error processing {img_path}: {e}")

In [3]:
if os.path.exists(PROCESSED_DIR):
        shutil.rmtree(PROCESSED_DIR )

for category in CATEGORIES:
    process_images(category)

Processing fake: 100%|██████████| 34/34 [03:15<00:00,  5.75s/it]
Processing real: 100%|██████████| 28/28 [02:45<00:00,  5.91s/it]


In [4]:
def split_and_move_data():
    # Get all image paths from the processed directory
    image_paths = [os.path.join(PROCESSED_DIR, f) for f in os.listdir(PROCESSED_DIR) if f.endswith((".png", ".jpg", ".jpeg"))]
    
    # Shuffle the image paths for random split
    random.shuffle(image_paths)
    
    # Calculate the split index
    split_index = int(len(image_paths) * split_ratio)
    
    # Split the images into train and test sets
    train_images = image_paths[:split_index]
    test_images = image_paths[split_index:]
    
    # Move images to the 'train' directory
    for img_path in train_images:
        shutil.move(img_path, os.path.join(TRAIN_DIR, os.path.basename(img_path)))
    
    # Move images to the 'test' directory
    for img_path in test_images:
        shutil.move(img_path, os.path.join(TEST_DIR, os.path.basename(img_path)))

    print(f"Total images: {len(image_paths)}")
    print(f"Training set: {len(train_images)}")
    print(f"Test set: {len(test_images)}")

In [5]:
os.makedirs(TRAIN_DIR, exist_ok=True)
os.makedirs(TEST_DIR, exist_ok=True)

split_and_move_data()


Total images: 2910
Training set: 291
Test set: 2619
