In [1]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm

In [2]:
IMG_DATA_DIR = os.path.join('..', 'data', 'img')

REAL_IMG_SOURCE_DIR = os.path.join(IMG_DATA_DIR, 'raw')
FAKE_IMG_SOURCE_DIR = os.path.join(IMG_DATA_DIR, 'dreamlike-photoreal-2')

IMG_TARGET_DIR = os.path.join(IMG_DATA_DIR, 'vit')
LABEL_0 = "real"
LABEL_1 = "fake"
SIZE = 224

In [8]:
number_of_images = len([img for img in os.listdir(REAL_IMG_SOURCE_DIR) if ".jpg" in img])

assert number_of_images == len([img for img in os.listdir(FAKE_IMG_SOURCE_DIR) if ".jpg" in img])

In [11]:
from sklearn.model_selection import train_test_split

In [15]:
train_indexes, val_indexes = train_test_split(np.arange(number_of_images) + 1, test_size=0.2, random_state=42)
train_imgs = [str(i).zfill(4) + ".jpg" for i in train_indexes]
val_imgs = [str(i).zfill(4) + ".jpg" for i in val_indexes]

In [19]:
def resize_image(input_image_path, output_image_path, size):
    original_image = Image.open(input_image_path)
    resized_image = original_image.resize((size, size), Image.Resampling.LANCZOS)
    resized_image.save(output_image_path)

In [22]:
os.makedirs(os.path.join(IMG_TARGET_DIR, "train", LABEL_0), exist_ok=True)
os.makedirs(os.path.join(IMG_TARGET_DIR, "train", LABEL_1), exist_ok=True)
os.makedirs(os.path.join(IMG_TARGET_DIR, "val", LABEL_0), exist_ok=True)
os.makedirs(os.path.join(IMG_TARGET_DIR, "val", LABEL_1), exist_ok=True)

In [23]:
for filename in tqdm(os.listdir(REAL_IMG_SOURCE_DIR)):
    input_path = os.path.join(REAL_IMG_SOURCE_DIR, filename)
    if filename in train_imgs:
        output_path = os.path.join(IMG_TARGET_DIR, "train", LABEL_0, filename)
    else:
        output_path = os.path.join(IMG_TARGET_DIR, "val", LABEL_0, filename)
    resize_image(input_path, output_path, SIZE)

100%|██████████| 4319/4319 [01:06<00:00, 64.74it/s]


In [27]:
for filename in tqdm(os.listdir(FAKE_IMG_SOURCE_DIR)):
    input_path = os.path.join(FAKE_IMG_SOURCE_DIR, filename)
    if filename in train_imgs:
        output_path = os.path.join(IMG_TARGET_DIR, "train", LABEL_1, filename)
    else:
        output_path = os.path.join(IMG_TARGET_DIR, "val", LABEL_1, filename)
    resize_image(input_path, output_path, SIZE)

100%|██████████| 4319/4319 [00:49<00:00, 87.93it/s]
