In [None]:
import os
import cv2
import random

def load_images_by_class(input_folder, class_name):
    images = [f for f in os.listdir(input_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
    image_paths = [(os.path.join(input_folder, img), class_name) for img in images]
    return image_paths

def split_data_balanced(class_data, train_ratio=0.7, val_ratio=0.15):
    random.shuffle(class_data)
    total = len(class_data)
    train_end = int(train_ratio * total)
    val_end = train_end + int(val_ratio * total)
    return {
        "train": class_data[:train_end],
        "val": class_data[train_end:val_end],
        "test": class_data[val_end:]
    }

def resize_by_width(img, target_width=1000):
    h, w = img.shape[:2]
    scale = target_width / w
    new_h = int(h * scale)
    resized = cv2.resize(img, (target_width, new_h), interpolation=cv2.INTER_AREA)
    return resized

import numpy as np

def center_crop(img, crop_ratio=0.8):
    h, w = img.shape[:2]
    ch, cw = int(h * crop_ratio), int(w * crop_ratio)
    start_x = (w - cw) // 2
    start_y = (h - ch) // 2
    return img[start_y:start_y+ch, start_x:start_x+cw]

def adjust_brightness_contrast(img, brightness=30, contrast=30):
    beta = np.random.randint(-brightness, brightness)
    alpha = 1.0 + (np.random.rand() * contrast / 100.0) * random.choice([-1, 1])
    img_bc = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)
    return img_bc

def random_affine_transform(img):
    rows, cols = img.shape[:2]
    pts1 = np.float32([[5,5], [cols-5,5], [5,rows-5]])
    pts2 = pts1 + np.random.randint(-15, 15, pts1.shape).astype(np.float32)
    M = cv2.getAffineTransform(pts1, pts2)
    return cv2.warpAffine(img, M, (cols, rows))

def augment_and_save(image_list, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    scale_widths = [600, 800, 1000]

    for src_path, label in image_list:
        img = cv2.imread(src_path)
        if img is None:
            print(f"Warning: Could not read {src_path}")
            continue

        name, ext = os.path.splitext(os.path.basename(src_path))

        for idx, width in enumerate(scale_widths):
            base_img = resize_by_width(img, target_width=width)

            # Augmentation set
            augmentations = [
                ("0", base_img),  # Original scaled
                ("1", center_crop(base_img)),  # Center cropped
                ("2", cv2.rotate(base_img, cv2.ROTATE_180)),  # Rotated
                ("3", cv2.flip(base_img, 1)),  # Horizontal flip
                ("4", adjust_brightness_contrast(base_img)),  # Brightness/contrast
                ("5", random_affine_transform(base_img)),  # Affine transform
            ]

            for aug_code, aug_img in augmentations:
                out_name = f"{label}_{name}_s{idx}_{aug_code}{ext}"
                out_path = os.path.join(output_folder, out_name)
                cv2.imwrite(out_path, aug_img)


def prepare_dataset(real_folder, not_real_folder, output_base):
    # Load class-wise images
    real_images = load_images_by_class(real_folder, "real")
    not_real_images = load_images_by_class(not_real_folder, "not_real")

    # Shuffle each class list before splitting
    random.shuffle(real_images)
    random.shuffle(not_real_images)

    # Split with class balance
    real_split = split_data_balanced(real_images)
    not_real_split = split_data_balanced(not_real_images)

    # Merge splits
    train_data = real_split["train"] + not_real_split["train"]
    val_data = real_split["val"] + not_real_split["val"]
    test_data = real_split["test"] + not_real_split["test"]

    # Shuffle final combined splits
    random.shuffle(train_data)
    random.shuffle(val_data)
    random.shuffle(test_data)

    # Augment and save
    print("Augmenting training data...")
    augment_and_save(train_data, os.path.join(output_base, "train"))

    print("Augmenting validation data...")
    augment_and_save(val_data, os.path.join(output_base, "val"))

    print("Augmenting test data...")
    augment_and_save(test_data, os.path.join(output_base, "test"))

    print("Dataset preparation completed.")

# main
real_input = "./final dataset/real"
not_real_input = "./final dataset/not_real"
output_folder = "./data random/split"

prepare_dataset(real_input, not_real_input, output_folder)

Augmenting training data...
Augmenting validation data...
Augmenting test data...
Dataset preparation completed.
