In [5]:
import os
import shutil
import random
from sklearn.model_selection import train_test_split
# Direktori dataset awal
DATASET_DIR = "/workspaces/Image-Classification-for-Cat-Dog-and-Tiger/submission/data"

# Direktori baru setelah dipisah
BASE_DIR = "../submission"
TRAIN_DIR = os.path.join(BASE_DIR, "train")
VAL_DIR = os.path.join(BASE_DIR, "val")
TEST_DIR = os.path.join(BASE_DIR, "test")

# Fungsi untuk membagi dataset
def split_data():
    if os.path.exists(BASE_DIR):
        shutil.rmtree(BASE_DIR)  # Hapus folder lama jika ada
    os.makedirs(TRAIN_DIR)
    os.makedirs(VAL_DIR)
    os.makedirs(TEST_DIR)

    for class_name in os.listdir(DATASET_DIR):
        class_path = os.path.join(DATASET_DIR, class_name)
        if not os.path.isdir(class_path):
            continue

        images = os.listdir(class_path)
        random.shuffle(images)

        train_files, temp_files = train_test_split(images, test_size=0.3, random_state=42)
        val_files, test_files = train_test_split(temp_files, test_size=0.33, random_state=42)  # 0.33 dari 30% (≈10%)

        # Buat folder untuk setiap kelas
        for dataset, dataset_dir in zip([train_files, val_files, test_files], [TRAIN_DIR, VAL_DIR, TEST_DIR]):
            class_dataset_dir = os.path.join(dataset_dir, class_name)
            os.makedirs(class_dataset_dir)
            for file in dataset:
                shutil.copy(os.path.join(class_path, file), os.path.join(class_dataset_dir, file))

# Jalankan pemisahan dataset
split_data()

In [9]:
import os

def count_images(directory):
    total_images = 0
    class_counts = {}

    for class_name in os.listdir(directory):
        class_path = os.path.join(directory, class_name)
        if os.path.isdir(class_path):  # Pastikan hanya membaca folder
            num_images = len([file for file in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, file))])
            class_counts[class_name] = num_images
            total_images += num_images

    print("Total Images:", total_images)
    for class_name, count in class_counts.items():
        print(f"{class_name}: {count} images")

# Ganti dengan path ke direktori yang ingin diperiksa
DIRECTORY_PATH = "submission/data"

count_images(DIRECTORY_PATH)


Total Images: 15000
tigers: 5000 images
dogs: 5000 images
cats: 5000 images


In [11]:
import os
import shutil
import random

# Direktori dataset awal
DATASET_DIR = "/workspaces/Image-Classification-for-Cat-Dog-and-Tiger/submission/data"

# Direktori baru setelah dipisah
BASE_DIR = "./split_dataset"
TRAIN_DIR = os.path.join(BASE_DIR, "train")
VAL_DIR = os.path.join(BASE_DIR, "val")
TEST_DIR = os.path.join(BASE_DIR, "test")

# Jumlah target gambar per kelas
TRAIN_COUNT = 3500  # 70%
VAL_COUNT = 1000    # 20%
TEST_COUNT = 500    # 10%

def split_data():
    # Hapus direktori lama jika sudah ada
    if os.path.exists(BASE_DIR):
        shutil.rmtree(BASE_DIR)

    # Buat direktori baru
    os.makedirs(TRAIN_DIR)
    os.makedirs(VAL_DIR)
    os.makedirs(TEST_DIR)

    for class_name in os.listdir(DATASET_DIR):
        class_path = os.path.join(DATASET_DIR, class_name)
        if not os.path.isdir(class_path):
            continue  # Lewati jika bukan folder
        
        images = os.listdir(class_path)
        random.shuffle(images)  # Acak gambar sebelum dipisah

        # Pastikan jumlah gambar cukup
        total_images = len(images)
        if total_images < (TRAIN_COUNT + VAL_COUNT + TEST_COUNT):
            print(f"Warning: Tidak cukup gambar di kelas {class_name} (Hanya {total_images} tersedia)")
            continue
        
        # Bagi dataset sesuai jumlah target
        train_files = images[:TRAIN_COUNT]
        val_files = images[TRAIN_COUNT:TRAIN_COUNT + VAL_COUNT]
        test_files = images[TRAIN_COUNT + VAL_COUNT:TRAIN_COUNT + VAL_COUNT + TEST_COUNT]

        # Buat folder kelas di setiap set (train, val, test)
        for dataset, dataset_dir in zip([train_files, val_files, test_files], [TRAIN_DIR, VAL_DIR, TEST_DIR]):
            class_dataset_dir = os.path.join(dataset_dir, class_name)
            os.makedirs(class_dataset_dir)
            for file in dataset:
                shutil.copy(os.path.join(class_path, file), os.path.join(class_dataset_dir, file))

    print("Dataset berhasil dibagi sesuai rasio 70% - 20% - 10%!")

# Jalankan pemisahan dataset
split_data()


Dataset berhasil dibagi sesuai rasio 70% - 20% - 10%!
