# Libraries

In [1]:
import torch
import os
import shutil
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import random
from glob import glob
import numpy as np
import cv2

# Preparation:


Some global variables

In [2]:
RAW_DIR = "data/raw/140k face"
PROCESSED_DIR = "data/processed"

CATEGORIES = ["fake", "real"]
TRAIN_DIR = os.path.join(PROCESSED_DIR, "train/rgb")
VALID_DIR = os.path.join(PROCESSED_DIR, "valid/rgb")
TEST_DIR = os.path.join(PROCESSED_DIR, "test/rgb")

SHARPEN_TRAIN_DIR = os.path.join(PROCESSED_DIR, "train/sharpen")
SHARPEN_VALID_DIR = os.path.join(PROCESSED_DIR, "valid/sharpen")
SHARPEN_TEST_DIR = os.path.join(PROCESSED_DIR, "test/sharpen")

EDGE_TRAIN_DIR = os.path.join(PROCESSED_DIR, "train/edge")
EDGE_VALID_DIR = os.path.join(PROCESSED_DIR, "valid/edge")
EDGE_TEST_DIR = os.path.join(PROCESSED_DIR, "test/edge")

train_ratio = 0.7
val_ratio = 0.2
random.seed(12347556)

AUG_AMOUNT=0 #the amount of augmented version of each pictures. Turned of because of the overwhelming amount of pictures existing already
BATCH_SIZE = 1000

Delete the whole processed_dir structure

In [9]:
#Here we delete the whole 

if os.path.exists(PROCESSED_DIR):
         shutil.rmtree(PROCESSED_DIR )

# Transformation

The transformation structures

Processing normal images

In [3]:
def process_label(label, size):
    print(f"Working on {label}")
    raw_path = os.path.join(RAW_DIR, label)
    processed_path = PROCESSED_DIR
    os.makedirs(processed_path, exist_ok=True)

    image_paths = [os.path.join(raw_path, f) for f in os.listdir(raw_path) if f.endswith((".png", ".jpg", ".jpeg"))]

    random.shuffle(image_paths)
    
    # Calculate the split index
    train_split_index = int(len(image_paths) * train_ratio)
    val_split_index = int(len(image_paths) * (train_ratio + val_ratio))

    train_paths = image_paths[:train_split_index]
    val_paths = image_paths[train_split_index:val_split_index]
    test_paths = image_paths[val_split_index:]

    create_images(train_paths, "train", size, label)

    create_images(val_paths, "valid", size, label)

    create_test(test_paths, "test", size, label)
   

In [4]:
def get_unique_filename(base_path, base_name, counter):

    image_name=f"{base_name}_{counter}.jpg"
    
    while True:
        filename = os.path.join(base_path, image_name)
        if not os.path.exists(filename):
            return image_name
        counter += 1

In [5]:
def create_images(images, category, size, label):

    resize_transform = transforms.Compose([
        transforms.Resize(size),
        transforms.ToTensor(),
    ])
    
    augmentation_transforms = transforms.Compose([
        transforms.RandomResizedCrop(size, scale=(0.7, 0.9)),
        transforms.RandomApply([
            transforms.ColorJitter(contrast=0.5) 
        ], p=0.5),
        transforms.Resize(size),
        transforms.ToTensor(),
    ])
    save_transform = transforms.ToPILImage()



    ROOT_DIR=os.path.join(PROCESSED_DIR, f"{size[0]}")
    
    if (category=="train"):
        rgb_path=os.path.join(ROOT_DIR, "train/rgb")
        edge_path=os.path.join(ROOT_DIR, "train/edge")
        sharpen_path=os.path.join(ROOT_DIR, "train/sharpen")
    elif(category=="valid"):
        rgb_path=os.path.join(ROOT_DIR, "valid/rgb")
        edge_path=os.path.join(ROOT_DIR, "valid/edge")
        sharpen_path=os.path.join(ROOT_DIR, "valid/sharpen")
    else:
        rgb_path=os.path.join(ROOT_DIR, "test/rgb")
        edge_path=os.path.join(ROOT_DIR, "test/edge")
        sharpen_path=os.path.join(ROOT_DIR, "test/sharpen")

    os.makedirs(rgb_path, exist_ok=True)

    image_counter=0
    for i in tqdm(range(0, len(images), BATCH_SIZE), desc=f"Processing {category}"): #len(images)
        batch = images[i:i + BATCH_SIZE]
        for img_path in batch:

            img = Image.open(img_path).convert("RGB")  # Kép betöltése

            resized_img_name = get_unique_filename(rgb_path, label, image_counter)

                # Save a simple resized version
            resized_img = resize_transform(img)
            make_filters(resized_img, resized_img_name , edge_path, sharpen_path)
            resized_img = save_transform(resized_img)


            resized_img_path=os.path.join(rgb_path, resized_img_name)
                
            resized_img.save(resized_img_path)


                
            image_counter += 1

            if (category=="train"):
                for aug_idx in range(AUG_AMOUNT):
                    aug_img_name= get_unique_filename(rgb_path, label, image_counter)
                    
                    aug_img = augmentation_transforms(img)  # Apply augmentation
                    make_filters(aug_img, aug_img_name , edge_path, sharpen_path)
    
                    
                    aug_img = save_transform(aug_img)  # Convert tensor to PIL image
    
    
                    aug_img_path=os.path.join(rgb_path, aug_img_name)
                        
                    aug_img.save(aug_img_path)
    
                        
                    image_counter += 1

    
    

 move data into respective folder

In [6]:
def make_filters(tensor_image, filename, edge_path, sharpen_path):

    image = tensor_image.permute(1, 2, 0).numpy()

    os.makedirs(edge_path, exist_ok=True)
    os.makedirs(sharpen_path, exist_ok=True)
    
    kernel_edge = np.array([[-1, -1, -1],
                            [-1, 8, -1],
                            [-1, -1, -1]])


    kernel_sharpen = np.array([ [0, -1, 0],
                                [-1, 5, -1],
                                [0, -1, 0]])
    
    # Apply Edge Detection filter
    edge_result = cv2.filter2D(image, -1, kernel_edge)

    # Save the Edge Detection filtered image
    edge_output_path = os.path.join(edge_path, filename)
    cv2.imwrite(edge_output_path, edge_result)

    # Apply Sharpen filter
    sharpen_result = cv2.filter2D(image, -1, kernel_sharpen)

    # Save the Sharpen filtered image
    sharpen_output_path = os.path.join(sharpen_path, filename)
    cv2.imwrite(sharpen_output_path, sharpen_result)

In [7]:
for cat in CATEGORIES:
    process_images(cat, (64,64))

Working on fake


Processing train:  32%|███▏      | 36/112 [08:19<17:34, 13.88s/it]


OSError: [Errno 28] No space left on device

In [10]:
for cat in CATEGORIES:
    process_images(cat, (128,128))

Working on fake


Processing train: 100%|██████████| 112/112 [21:04<00:00, 11.29s/it]
Processing valid: 100%|██████████| 32/32 [06:08<00:00, 11.52s/it]


Working on real


Processing train: 100%|██████████| 49/49 [09:40<00:00, 11.84s/it]
Processing valid: 100%|██████████| 14/14 [02:34<00:00, 11.05s/it]


In [9]:
for cat in CATEGORIES:
    process_images(cat, (256,256))

Working on fake


Processing train: 100%|██████████| 112/112 [33:17<00:00, 17.84s/it]
Processing valid: 100%|██████████| 32/32 [09:19<00:00, 17.47s/it]


Working on real


Processing train: 100%|██████████| 49/49 [15:29<00:00, 18.96s/it]
Processing valid: 100%|██████████| 14/14 [04:08<00:00, 17.73s/it]
