In [2]:
import os
import zipfile
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split

# ========== [1] Ekstrak file ZIP ==========
zip_path = "dataset.zip"
extract_path = "dataset_mentah"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# ========== [2] Mapping Label ==========
source_mapping = {
    "KUKU/SAKIT": "sapi_sakit_pmk_kuku",
    "KUKU/SEHAT": "sapi_sehat",
    "MULUT/SAKIT": "sapi_sakit_pmk_mata",
    "MULUT/SEHAT": "sapi_sehat"
}

# ========== [3] Membuat Struktur Folder Tujuan ==========
base_target_dir = "dataset"
splits = ['train', 'val', 'test']
labels = list(set(source_mapping.values()))

for split in splits:
    for label in labels:
        os.makedirs(os.path.join(base_target_dir, split, label), exist_ok=True)

# ========== [4] Fungsi Split & Copy ==========
def split_and_copy(images, label_name, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1):
    train_files, temp_files = train_test_split(images, train_size=train_ratio, random_state=42)
    val_files, test_files = train_test_split(temp_files, test_size=test_ratio/(val_ratio + test_ratio), random_state=42)

    for split, files in zip(['train', 'val', 'test'], [train_files, val_files, test_files]):
        for img_path in files:
            fname = os.path.basename(img_path)
            dst_path = os.path.join(base_target_dir, split, label_name, fname)
            shutil.copy2(img_path, dst_path)

# ========== [5] Proses Semua Gambar ==========
for src_rel_path, label in source_mapping.items():
    full_path = os.path.join(extract_path, src_rel_path)
    image_paths = [os.path.join(full_path, fname) for fname in os.listdir(full_path)
                   if fname.lower().endswith(('.jpg', '.jpeg', '.png'))]
    split_and_copy(image_paths, label)

print("✅ Dataset selesai diproses dan disimpan di:", base_target_dir)

✅ Dataset selesai diproses dan disimpan di: dataset
