Augmentasi Dataset

In [1]:
import os
import random
import shutil
import cv2
import numpy as np
import albumentations as A

# Lokasi dataset asli dan tujuan
base_dir = '../../Dataset/Dataset_Nusantara_7'
augmented_dir = '../../Dataset/Dataset_Nusantara_7_augmented_1100'

# Membuat folder untuk data augmented jika belum ada
os.makedirs(augmented_dir, exist_ok=True)

# Mendapatkan daftar kelas
classes = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

# Fungsi augmentasi
augmentations = [
    A.Rotate(limit=90, p=1.0),
    A.VerticalFlip(p=1.0),
    A.RandomScale(scale_limit=(0.2, 0.5), p=1.0),  # Zoom in
    A.CoarseDropout(max_holes=8, max_height=16, max_width=16, p=1.0),  # Random erase
    A.GridDropout(ratio=0.5, p=1.0)  # GridMask
]

def apply_augmentation(image, aug):
    augmented = aug(image=image)
    return augmented['image']

# Loop melalui setiap kelas
for cls in classes:
    cls_path = os.path.join(base_dir, cls)
    images = os.listdir(cls_path)
    images = [os.path.join(cls_path, img) for img in images]
    label_dir = os.path.join(augmented_dir, cls)
    os.makedirs(label_dir, exist_ok=True)
    
    # Copy gambar asli terlebih dahulu
    for img_path in images:
        shutil.copy2(img_path, os.path.join(label_dir, os.path.basename(img_path)))
    
    # Augmentasi gambar hingga mencapai 1000
    current_count = len(images)
    index = 0
    while current_count < 1100:
        img_path = images[index % len(images)]  # Ambil gambar secara bergantian
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Pilih augmentasi secara acak
        aug = random.choice(augmentations)
        augmented_image = apply_augmentation(image, aug)
        
        # Simpan gambar hasil augmentasi
        new_filename = f"aug_{current_count}_{os.path.basename(img_path)}"
        save_path = os.path.join(label_dir, new_filename)
        cv2.imwrite(save_path, cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR))
        
        current_count += 1
        index += 1

print("Augmentasi selesai. Setiap kelas sekarang memiliki 1100 gambar.")


  from .autonotebook import tqdm as notebook_tqdm
  A.CoarseDropout(max_holes=8, max_height=16, max_width=16, p=1.0),  # Random erase


Augmentasi selesai. Setiap kelas sekarang memiliki 1100 gambar.


Split Dataset Augmentasi

In [3]:
import os
import random
import shutil

# Lokasi dataset asli
base_dir = '../../Dataset/Dataset_Nusantara_7_augmented_1100'  # Sesuaikan path dataset
train_dir = '../../Dataset/Dataset_Nusantara_7_augmented_1100_split/train'
test_dir = '../../Dataset/Dataset_Nusantara_7_augmented_1100_split/test'

# Membuat folder untuk data train dan test jika belum ada
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Mendapatkan daftar kelas (subfolder dalam dataset)
classes = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

# Fungsi untuk menyalin gambar ke folder tujuan
def copy_images(image_paths, dest_dir, label):
    label_dir = os.path.join(dest_dir, label)
    os.makedirs(label_dir, exist_ok=True)
    for img_path in image_paths:
        shutil.copy2(img_path, os.path.join(label_dir, os.path.basename(img_path)))

# Jumlah gambar yang ingin dimasukkan ke test set per kelas
num_test_images = 100

# Loop melalui setiap kelas dan membagi dataset
for cls in classes:
    cls_path = os.path.join(base_dir, cls)
    images = [os.path.join(cls_path, img) for img in os.listdir(cls_path) if os.path.isfile(os.path.join(cls_path, img))]
    
    # Pengecekan jumlah gambar
    if len(images) < num_test_images:
        raise ValueError(f"Kelas '{cls}' hanya memiliki {len(images)} gambar, tidak cukup untuk mengambil {num_test_images} gambar untuk test.")
    
    # Acak urutan gambar
    random.shuffle(images)
    
    # Bagi data
    test_imgs = images[:num_test_images]
    train_imgs = images[num_test_images:]
    
    # Copy gambar ke folder yang sesuai
    copy_images(train_imgs, train_dir, cls)
    copy_images(test_imgs, test_dir, cls)

    # Log jumlah gambar
    print(f"Kelas: {cls} | Train: {len(train_imgs)} gambar | Test: {len(test_imgs)} gambar")

print("\nPembagian dataset selesai: 100 gambar per kelas untuk test, sisanya untuk train.")


Kelas: JawaBarat_Megamendung | Train: 1000 gambar | Test: 100 gambar
Kelas: Kalimantan_CorakInsang | Train: 1000 gambar | Test: 100 gambar
Kelas: Kalimantan_Dayak | Train: 1000 gambar | Test: 100 gambar
Kelas: Papua_Cendrawasih | Train: 1000 gambar | Test: 100 gambar
Kelas: Solo_Parang | Train: 1000 gambar | Test: 100 gambar
Kelas: Tiongkok_IkatCelup | Train: 1000 gambar | Test: 100 gambar
Kelas: Yogyakarta_Kawung | Train: 1000 gambar | Test: 100 gambar

Pembagian dataset selesai: 100 gambar per kelas untuk test, sisanya untuk train.
