In [1]:
import pathlib
import shutil
from collections import Counter
import random
import numpy as np
from PIL import Image
import cv2

In [3]:
# Konfigurasi
base_dir = pathlib.Path(".") 
data_dir = base_dir / 'data valid merah' # Direktori Sumber Original (sebelum oversampling)
source_dir = data_dir # Menunjuk ke data asli
output_dir = base_dir / 'data valid merah balanced' # Direktori Tujuan (setelah oversampling)
seed = 42

# Set random seed untuk reproducibility
random.seed(seed)
np.random.seed(seed)

print("="*60)
print("OVERSAMPLING KELAS MINORITAS (AUGMENTASI AMAN)")
print("="*60)

# ==============================================================================
# FUNGSI AUGMENTASI AMAN (Diubah total dari yang lama)
# ==============================================================================

MAX_ROTATION_ANGLE = 10 # Maksimum rotasi yang aman (misalnya +/- 10 derajat)
AUGMENTATION_CHOICES = ['flip_horizontal', 'rotate_small']

def augment_image_safe(image_path):
    """
    Augmentasi gambar untuk oversampling menggunakan Flip Horizontal atau Rotasi Sudut Kecil.
    Ini menggantikan fungsi augment_image lama.
    """
    img = cv2.imread(str(image_path))
    if img is None:
        return None
    
    # Pilih satu jenis augmentasi secara acak
    augmentation_type = random.choice(AUGMENTATION_CHOICES)
    
    if augmentation_type == 'flip_horizontal':
        # Menerapkan Flip Horizontal (code: 1)
        img = cv2.flip(img, 1) 
        
    elif augmentation_type == 'rotate_small':
        # Menerapkan Rotasi Sudut Kecil Acak
        
        angle = random.uniform(-MAX_ROTATION_ANGLE, MAX_ROTATION_ANGLE)
        
        # Dapatkan matriks rotasi (pusat di tengah gambar)
        (h, w) = img.shape[:2]
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, angle, 1.0) # Scale tetap 1.0
        
        # Lakukan transformasi affine, mengisi area kosong dengan hitam (0,0,0)
        img = cv2.warpAffine(img, M, (w, h), borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0))
        
    return img

# ==============================================================================
# FUNGSI COUNT DAN OVERSAMPLING (Diperbaiki logika pemanggilan augmentasi)
# ==============================================================================

def count_files_per_class(directory):
    """Hitung jumlah file per kelas"""
    class_counts = {}
    directory = pathlib.Path(directory)
    
    for class_folder in directory.iterdir():
        if class_folder.is_dir():
            # Tambahkan .jpg jika diperlukan
            files = list(class_folder.glob('*.png')) 
            class_counts[class_folder.name] = len(files)
    
    return class_counts

def oversample_minority_classes(source_dir, output_dir, target_count=None):
    """
    Oversample kelas minoritas dengan augmentasi AMAN.
    """
    source_dir = pathlib.Path(source_dir)
    output_dir = pathlib.Path(output_dir)
    
    class_counts = count_files_per_class(source_dir)
    
    print("\nDistribusi Data Original:")
    for class_name, count in sorted(class_counts.items()):
        print(f"  Kelas {class_name}: {count} samples")
    
    if target_count is None:
        target_count = max(class_counts.values())
    
    print(f"\nTarget samples per class: {target_count}")
    
    if output_dir.exists():
        print(f"\nMenghapus folder output lama...")
        shutil.rmtree(output_dir)
    
    output_dir.mkdir(parents=True, exist_ok=True)
    
    total_created = 0
    
    for class_folder in source_dir.iterdir():
        if not class_folder.is_dir():
            continue
            
        class_name = class_folder.name
        original_files = list(class_folder.glob('*.png'))
        current_count = len(original_files)
        
        print(f"\nProcessing class {class_name}:")
        
        output_class_dir = output_dir / class_name
        output_class_dir.mkdir(parents=True, exist_ok=True)
        
        # Copy semua file original ke folder output yang baru
        for f in original_files:
            shutil.copy2(str(f), str(output_class_dir / f.name))
        
        needed_samples = target_count - current_count
        
        if needed_samples <= 0:
            print(f"  ✓ Sudah seimbang atau mayoritas. Total: {current_count}")
            continue
        
        print(f"  Need to add: {needed_samples} samples")
        
        # --- LOGIKA OVERSAMPLING DENGAN AUGMENTASI AMAN ---
        samples_created = 0
        
        while samples_created < needed_samples:
            # Pilih file random dari original files
            source_file = random.choice(original_files)
            
            # Generate augmented image menggunakan fungsi AMAN yang baru
            augmented_img = augment_image_safe(source_file)
            
            if augmented_img is not None:
                # Generate nama file baru
                base_name = source_file.stem
                new_name = f"{base_name}_aug{samples_created}.png"
                output_path = output_class_dir / new_name
                
                # Simpan augmented image
                cv2.imwrite(str(output_path), augmented_img)
                
                samples_created += 1
                
                if samples_created % 50 == 0:
                    print(f"    Created {samples_created}/{needed_samples} samples...")
        
        total_created += samples_created
        print(f"  ✓ Created {samples_created} augmented samples")
        print(f"  Total in class: {current_count + samples_created}")
    
    print("\n" + "="*60)
    print("OVERSAMPLING COMPLETED!")
    print("="*60)
    
    # Verifikasi hasil akhir
    print("\nDistribusi Data Setelah Oversampling:")
    balanced_counts = count_files_per_class(output_dir)
    for class_name, count in sorted(balanced_counts.items()):
        print(f"  Kelas {class_name}: {count} samples")
    
    print(f"\nTotal augmented samples created: {total_created}")
    return output_dir

# ==============================================================================
# JALANKAN PROSES
# ==============================================================================

try:
    print("Starting oversampling process...")
    # Asumsi 214 adalah kelas mayoritas yang baru
    balanced_dir = oversample_minority_classes(
        source_dir=source_dir,
        output_dir=output_dir,
        target_count=366
    )
    
    print(f"\n✅ Oversampling berhasil! Data balanced tersimpan di: {balanced_dir}")
    
except Exception as e:
    print(f"\n❌ Error: {e}")
    import traceback
    traceback.print_exc()

OVERSAMPLING KELAS MINORITAS (AUGMENTASI AMAN)
Starting oversampling process...

Distribusi Data Original:
  Kelas 1: 183 samples
  Kelas 2: 366 samples
  Kelas 3: 231 samples
  Kelas 4: 91 samples

Target samples per class: 366

Menghapus folder output lama...

Processing class 1:
  Need to add: 183 samples
    Created 50/183 samples...
    Created 100/183 samples...
    Created 150/183 samples...
  ✓ Created 183 augmented samples
  Total in class: 366

Processing class 2:
  ✓ Sudah seimbang atau mayoritas. Total: 366

Processing class 3:
  Need to add: 135 samples
    Created 50/135 samples...
    Created 100/135 samples...
  ✓ Created 135 augmented samples
  Total in class: 366

Processing class 4:
  Need to add: 275 samples
    Created 50/275 samples...
    Created 100/275 samples...
    Created 150/275 samples...
    Created 200/275 samples...
    Created 250/275 samples...
  ✓ Created 275 augmented samples
  Total in class: 366

OVERSAMPLING COMPLETED!

Distribusi Data Setelah Ov