In [73]:
import cv2
import numpy as np
import os
from PIL import Image
import imgaug.augmenters as iaa

In [74]:
dataset_folder = "../Train"
output_folder = "../Preprocessed-Data"

In [96]:
# RESIZE & AUGMENTATION

# Resize image 64x64
def resize_image(image, size=(64, 64)):
    return image.resize(size)

# Augmentation function
def augment_data(image):
    # Definisikan augmentor
    augmentor = iaa.Sequential([
        iaa.Fliplr(0.5),  # Flipping horizontal dengan peluang 0.5
        iaa.Affine(rotate=(-10, 10))  # Rotasi dalam rentang -10 hingga 10 derajat
    ])
    augmented_image = augmentor.augment_image(np.array(image))  # Do augmentation
    return augmented_image

# Loop melalui setiap folder class di dataset asli
for class_folder in os.listdir(dataset_folder):
    class_path = os.path.join(dataset_folder, class_folder)
    output_class_path = os.path.join(output_folder, class_folder)
    os.makedirs(output_class_path, exist_ok=True)
    
    # Loop melalui setiap gambar di dalam folder class
    for image_file in os.listdir(class_path):
        image_path = os.path.join(class_path, image_file)
        
        # Read image using PIL
        image = Image.open(image_path)
        
        # Ubah ukuran gambar menjadi 64x64
        resized_image = resize_image(image)
        
        # Simpan gambar yang telah diubah ukurannya ke dalam folder output
        output_image_path = os.path.join(output_class_path, image_file)
        resized_image_rgb = resized_image.convert('RGB')
        resized_image_rgb.save(output_image_path)
        
        # Lakukan augmentasi pada setiap gambar yang telah diubah ukurannya
        augmented_image = augment_data(resized_image)
        
        # Ubah gambar yang telah di-augmentasi ke dalam mode RGB
        augmented_image_rgb = Image.fromarray(augmented_image).convert("RGB")
        
        # Simpan gambar yang telah di-augmentasi ke dalam folder output dengan tambahan "_augmented" pada nama file
        augmented_image_path = os.path.join(output_class_path, f"{os.path.splitext(image_file)[0]}_augmented.jpg")
        augmented_image_rgb.save(augmented_image_path)


In [108]:
# DATASET SPLITTING

import os
import shutil
import random

def split_dataset(source_dir, train_dir, val_dir, test_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    image_files = os.listdir(source_dir)
    random.shuffle(image_files)

    total_samples = len(image_files)
    train_samples = int(train_ratio * total_samples)
    val_samples = int(val_ratio * total_samples)
    test_samples = int(test_ratio * total_samples)

    # Train set
    for file in image_files[:train_samples]:
        shutil.copy(os.path.join(source_dir, file), os.path.join(train_dir, file))

    # Validation set
    for file in image_files[train_samples:train_samples+val_samples]:
        shutil.copy(os.path.join(source_dir, file), os.path.join(val_dir, file))

    # Test set
    for file in image_files[train_samples+val_samples:train_samples+val_samples+test_samples]:
        shutil.copy(os.path.join(source_dir, file), os.path.join(test_dir, file))

    # ensure all data goes into the data division
    remaining_samples = image_files[train_samples+val_samples+test_samples:]
    for file in remaining_samples:
        if len(os.listdir(train_dir)) < train_samples:
            shutil.copy(os.path.join(source_dir, file), os.path.join(train_dir, file))
        else:
            shutil.copy(os.path.join(source_dir, file), os.path.join(val_dir, file))

source_dir = '../Preprocessed-Data/RottenTomato'  # source dataset path
train_dir = '../Datasets/Train/RottenTomato'  # train directory path
val_dir = '../Datasets/Validation/RottenTomato'  # validation directory path
test_dir = '../Datasets/Test/RottenTomato'  # test directory path

split_dataset(source_dir, train_dir, val_dir, test_dir)