In [None]:
import os
import yaml
import pandas as pd
import matplotlib.pyplot as plt

def analyze_dataset_labels_with_yaml(base_path, yaml_file='data.yaml'):
    """
    Menganalisis dataset penyakit dengan membaca file label dan menghitung distribusi kelas
    berdasarkan angka pertama dari tiap baris pada file label, serta menggunakan file YAML
    untuk memetakan ID kelas ke nama kelas.

    Parameter:
    - base_path: Path utama yang berisi folder train, test, dan valid.
    - yaml_file: Path ke file data.yaml yang berisi mapping kelas.
    """

    # Baca file YAML untuk mendapatkan mapping ID kelas ke nama kelas
    with open(os.path.join(base_path, yaml_file), 'r') as file:
        yaml_data = yaml.safe_load(file)

    class_names = yaml_data.get('names', [])
    num_classes = len(class_names)

    # Subfolder yang akan diproses
    folders = ["train", "test", "valid"]

    # Inisialisasi dictionary untuk menyimpan jumlah data berdasarkan kelas
    class_counts = {folder: {} for folder in folders}

    # Proses setiap folder
    for folder in folders:
        labels_folder_path = os.path.join(base_path, folder, "labels")
        if os.path.exists(labels_folder_path):
            for label_file in os.listdir(labels_folder_path):
                label_file_path = os.path.join(labels_folder_path, label_file)
                with open(label_file_path, 'r') as file:
                    for line in file:
                        # Ambil angka pertama dari setiap baris (kelas)
                        class_id = line.split()[0]
                        if class_id not in class_counts[folder]:
                            class_counts[folder][class_id] = 0
                        class_counts[folder][class_id] += 1

    # Output total data untuk setiap kelas dalam bentuk kolom
    print("Total Data untuk Setiap Kelas:")
    print("{:<25} {:<10} {:<10} {:<10} {:<10}".format(
        "Nama Kelas", "Train", "Test", "Valid", "Total"
    ))
    print("-" * 65)

    total_semua = 0
    for class_id in range(num_classes):
        class_name = class_names[class_id]
        train_count = class_counts["train"].get(str(class_id), 0)
        test_count = class_counts["test"].get(str(class_id), 0)
        valid_count = class_counts["valid"].get(str(class_id), 0)
        total_count = train_count + test_count + valid_count
        total_semua += total_count
        print("{:<25} {:<10} {:<10} {:<10} {:<10}".format(
            class_name, train_count, test_count, valid_count, total_count
        ))
    print("-" * 65)
    print("Total Keseluruhan Data: {}".format(total_semua))

    # Hitung persentase distribusi data untuk setiap kelas
    print("\nPersentase Distribusi Data untuk Setiap Kelas:")
    print("{:<25} {:<10} {:<10} {:<10}".format(
        "Nama Kelas", "Train (%)", "Test (%)", "Valid (%)"
    ))
    print("-" * 55)
    for class_id in range(num_classes):
        class_name = class_names[class_id]
        train_count = class_counts["train"].get(str(class_id), 0)
        test_count = class_counts["test"].get(str(class_id), 0)
        valid_count = class_counts["valid"].get(str(class_id), 0)
        total_count = train_count + test_count + valid_count
        if total_count > 0:
            train_percent = (train_count / total_count) * 100
            test_percent = (test_count / total_count) * 100
            valid_percent = (valid_count / total_count) * 100
        else:
            train_percent = test_percent = valid_percent = 0
        print("{:<25} {:<10.2f} {:<10.2f} {:<10.2f}".format(
            class_name, train_percent, test_percent, valid_percent
        ))

    # Konversi ke DataFrame
    df = pd.DataFrame(class_counts).fillna(0).astype(int)
    df.index = [class_names[int(idx)] for idx in df.index]  # Ganti index dengan nama kelas

    # Plot grafik
    df.plot(kind='bar', figsize=(14, 7))
    plt.title('Jumlah Data untuk Setiap Kelas di Setiap Folder')
    plt.xlabel('Kelas')
    plt.ylabel('Jumlah Data')
    plt.xticks(rotation=45, ha='right')
    plt.legend(title='Folder', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

In [None]:
import os

def check_labels_and_images(base_path):
    """
    Memeriksa apakah setiap file gambar memiliki file label yang sesuai, dan sebaliknya,
    di dalam folder train, test, dan valid.

    Parameter:
    - base_path: Path utama yang berisi folder train, test, dan valid.
    """
    # Subfolder yang akan diproses
    folders = ["train", "test", "valid"]

    for folder in folders:
        images_folder = os.path.join(base_path, folder, "images")
        labels_folder = os.path.join(base_path, folder, "labels")

        # Dapatkan daftar file di folder images dan labels
        image_files = set(os.listdir(images_folder))
        label_files = set(os.listdir(labels_folder))

        # Loop melalui setiap file gambar untuk memeriksa label
        missing_labels = []
        for image_file in image_files:
            # Ubah ekstensi file gambar menjadi .txt
            label_file = os.path.splitext(image_file)[0] + ".txt"
            if label_file not in label_files:
                missing_labels.append(image_file)

        # Loop melalui setiap file label untuk memeriksa gambar
        missing_images = []
        for label_file in label_files:
            # Ubah ekstensi file label menjadi .jpg (atau ekstensi gambar lainnya)
            image_file = os.path.splitext(label_file)[0] + ".jpg"  # Sesuaikan dengan ekstensi gambar Anda
            if image_file not in image_files:
                missing_images.append(label_file)

        # Tampilkan hasil
        print(f"Folder {folder}:")
        if missing_labels:
            print(f"  - {len(missing_labels)} file gambar tidak memiliki file label:")
            for file in missing_labels:
                print(f"    - {file}")
        else:
            print("  - Semua file gambar memiliki file label.")

        if missing_images:
            print(f"  - {len(missing_images)} file label tidak memiliki file gambar:")
            for file in missing_images:
                print(f"    - {file}")
        else:
            print("  - Semua file label memiliki file gambar.")
        print("-" * 40)

In [None]:
import os
from collections import defaultdict
import filecmp

def extract_base_name(file_name):
    """
    Mengekstrak nama dasar file tanpa tambahan _jpg dan rf..
    Contoh:
    - leaf_spot214.rf.8bf5924ff9b331a07aa870740e6ad39d.jpg → leaf_spot214
    - leaf_spot214_jpg.rf.0ab37a08b63b9917d4204c9f920d9c43.jpg → leaf_spot214
    """
    file_name = file_name.split(".")[0]
    if "_jpg" in file_name:
        file_name = file_name.replace("_jpg", "")
    if ".rf" in file_name:
        file_name = file_name.split(".rf")[0]
    return file_name

def remove_duplicate_files_labels(base_path):
    """
    Menghapus file duplikat berdasarkan nama dasar file dan konten label yang sama
    di dalam folder train, test, dan valid.  Hanya menghapus jika labelnya sama persis.
    Menampilkan peringatan jika nama dasar file image sama, walaupun labelnya beda.
    """
    folders = ["train", "test", "valid"]
    base_name_to_files = defaultdict(list)

    for folder in folders:
        images_folder_path = os.path.join(base_path, folder, "images")
        labels_folder_path = os.path.join(base_path, folder, "labels")  # Path ke folder label
        if os.path.exists(images_folder_path) and os.path.exists(labels_folder_path): #Pastikan kedua folder ada
            for file_name in os.listdir(images_folder_path):
                if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                    base_name = extract_base_name(file_name)
                    label_file_name = file_name.replace(".jpg", ".txt").replace(".png", ".txt").replace(".jpeg", ".txt").replace(".bmp", ".txt").replace(".tiff", ".txt")
                    label_path = os.path.join(labels_folder_path, label_file_name)
                    if os.path.exists(label_path): # Pastikan file labelnya ada
                        base_name_to_files[base_name].append((folder, file_name, label_path)) #Tambahkan label path
                    else:
                        print(f"Warning: File label tidak ditemukan untuk {folder}/images/{file_name}")
        else:
            print(f"Warning: Folder images atau labels tidak ditemukan di {os.path.join(base_path, folder)}")


    for base_name, files in base_name_to_files.items():
        if len(files) > 1:
            files_to_delete = []
            for i in range(len(files)):
                for j in range(i + 1, len(files)):
                    folder1, file1, label_path1 = files[i]
                    folder2, file2, label_path2 = files[j]

                    if filecmp.cmp(label_path1, label_path2, shallow=False): #Bandingkan isinya
                        print(f"File berikut memiliki nama dasar dan label yang sama ({base_name}):")
                        print(f"- {folder1}/images/{file1}")
                        print(f"- {folder2}/images/{file2}")

                        # Pilih file pertama untuk disimpan, hapus file kedua (konsisten)
                        files_to_delete.append((folder2, file2, label_path2))  # Tambahkan ke daftar untuk dihapus
                        print("-" * 40)
                    else:
                        print(f"Warning: File dengan nama dasar yang sama ({base_name}) memiliki label yang berbeda:")
                        print(f"- {folder1}/images/{file1}")
                        print(f"- {folder2}/images/{file2}")
                        print("-" * 40)

            for folder_to_delete, file_to_delete, label_path_to_delete in files_to_delete:
                image_path = os.path.join(base_path, folder_to_delete, "images", file_to_delete)
                label_path = label_path_to_delete
                if os.path.exists(image_path):
                    os.remove(image_path)
                    print(f"- Dihapus: {folder_to_delete}/images/{file_to_delete}")
                else:
                    print(f"- File gambar tidak ditemukan: {folder_to_delete}/images/{file_to_delete}")
                if os.path.exists(label_path):
                    os.remove(label_path)
                    print(f"- Dihapus: {folder_to_delete}/labels/{os.path.basename(label_path)}")
                else:
                    print(f"- File label tidak ditemukan: {folder_to_delete}/labels/{os.path.basename(label_path)}")
                print("-" * 40)

    print("Proses penghapusan duplikat selesai.")

In [None]:
import os
import yaml
from typing import Dict

def normalize_name(name: str) -> str:
    return name.lower().replace('-', ' ').replace('_', ' ')

def combined_data_yaml(source_folder: str, target_folder: str) -> None:
    source_yaml = os.path.join(source_folder, "data.yaml")
    target_yaml = os.path.join(target_folder, "data.yaml")

    with open(source_yaml, "r") as file:
        source_data = yaml.load(file, Loader=yaml.FullLoader)

    with open(target_yaml, "r") as file:
        target_data = yaml.load(file, Loader=yaml.FullLoader)

    normalized_combined = [normalize_name(name) for name in target_data["names"]]

    for name in source_data["names"]:
        normalized_name = normalize_name(name)
        if normalized_name not in normalized_combined:
            normalized_combined.append(normalized_name)

    combined_data = {
        "train": target_data["train"],
        "val": target_data["val"],
        "test": target_data["test"],
        "names": normalized_combined,
        "nc": len(normalized_combined)
    }

    with open(target_yaml, "w") as file:
        yaml.dump(combined_data, file)

# return dictionary of names
def dictionary_yaml_old_to_new(source_folder: str, target_folder_with_new_data_yaml: str) -> Dict[int, int]:
    source_yaml = os.path.join(source_folder, "data.yaml")
    target_yaml = os.path.join(target_folder_with_new_data_yaml, "data.yaml")

    with open(source_yaml, "r") as file:
        source_data = yaml.load(file, Loader=yaml.FullLoader)

    with open(target_yaml, "r") as file:
        target_data = yaml.load(file, Loader=yaml.FullLoader)

    source_names = source_data["names"]
    target_names = target_data["names"]

    dictionary = {}
    for i, name in enumerate(source_names):
        normalized_name = normalize_name(name)
        if normalized_name in target_names:
            dictionary[i] = target_names.index(normalized_name)
        else:
            # ERROR
            print(f"{name} not found in target data.yaml")
            return {}

    return dictionary

def move_file(source_file: str, target_folder: str) -> None:
    source_file_name = os.path.basename(source_file)
    target_file = os.path.join(target_folder, source_file_name)
    if os.path.exists(target_file):
        # ERROR
        raise FileExistsError("File already exists in target folder")
    else:
        os.rename(source_file, target_file)

def move_label(source_label: str, target_folder: str, mapping: Dict[int, int]) -> None:
    source_label_name = os.path.basename(source_label)
    target_label = os.path.join(target_folder, source_label_name)
    if os.path.exists(target_label):
        # ERROR
        raise FileExistsError("File already exists in target folder")
    else:
        with open(source_label, "r") as file:
            lines = file.readlines()
        with open(target_label, "w") as file:
            for line in lines:
                line = line.split()
                line[0] = str(mapping[int(line[0])])
                line = " ".join(line)
                file.write(line + "\n")

def combine_dataset(source_folder: str, target_folder: str) -> None:
    main_folders = ["train", "valid", "test"]
    for main_folder in main_folders:
        # move images and labels
        source_images_folder = os.path.join(source_folder, main_folder, "images")
        target_images_folder = os.path.join(target_folder, main_folder, "images")

        source_labels_folder = os.path.join(source_folder, main_folder, "labels")
        target_labels_folder = os.path.join(target_folder, main_folder, "labels")

        combined_data_yaml(source_folder, target_folder)
        mapping = dictionary_yaml_old_to_new(source_folder, target_folder)
        if mapping == {}:  # ERROR
            return

        for source_image in os.listdir(source_images_folder):
            source_image_path = os.path.join(source_images_folder, source_image)
            move_file(source_image_path, target_images_folder)

            source_label = source_image.replace(".jpg", ".txt")
            source_label_path = os.path.join(source_labels_folder, source_label)
            move_label(source_label_path, target_labels_folder, mapping)

In [9]:
import os
from PIL import Image
import imagehash
from collections import defaultdict

def remove_duplicate_images(base_path):
    """
    Remove duplicate images and their corresponding labels from a dataset.
    
    Args:
        base_path (str): Path to the dataset directory containing train, test, and valid folders
        
    Returns:
        None
    """
    # Subfolder yang akan diproses
    folders = ["train", "test", "valid"]

    def calculate_image_hash(image_path):
        """
        Menghitung hash gambar menggunakan perceptual hashing (pHash).
        """
        try:
            img = Image.open(image_path)
            return imagehash.average_hash(img)  # Menggunakan pHash (average hash)
        except Exception as e:
            print(f"Gagal memproses {image_path}: {e}")
            return None

    def load_image_hashes_from_folder(folder_path):
        """
        Membaca semua gambar dari folder dan mengembalikan dictionary
        yang berisi nama file dan hash-nya.
        """
        image_hashes = {}
        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                img_hash = calculate_image_hash(file_path)
                if img_hash is not None:
                    image_hashes[file_name] = img_hash
        return image_hashes

    # Load semua hash gambar dari setiap folder
    all_image_hashes = {}
    for folder in folders:
        images_folder_path = os.path.join(base_path, folder, "images")
        if os.path.exists(images_folder_path):
            all_image_hashes[folder] = load_image_hashes_from_folder(images_folder_path)

    # Cek duplikat gambar berdasarkan hash
    hash_to_files = defaultdict(list)

    # Kelompokkan file berdasarkan hash
    for folder in folders:
        for file_name, img_hash in all_image_hashes.get(folder, {}).items():
            hash_to_files[img_hash].append((folder, file_name))

    # Proses dan hapus duplikat
    for img_hash, files in hash_to_files.items():
        if len(files) > 1:
            print("Gambar berikut memiliki hash yang sama:")
            for folder, file_name in files:
                print(f"- {folder}/images/{file_name}")

            # Pertahankan file pertama, hapus sisanya
            files_to_delete = files[1:]

            for folder_to_delete, file_to_delete in files_to_delete:
                # Path gambar
                image_path = os.path.join(base_path, folder_to_delete, "images", file_to_delete)

                # Path label
                label_path = os.path.join(base_path, folder_to_delete, "labels",
                                        os.path.splitext(file_to_delete)[0] + ".txt")

                # Hapus gambar
                if os.path.exists(image_path):
                    os.remove(image_path)
                    print(f"- Dihapus: {folder_to_delete}/images/{file_to_delete}")

                # Hapus label
                if os.path.exists(label_path):
                    os.remove(label_path)
                    print(f"- Dihapus: {folder_to_delete}/labels/{os.path.basename(label_path)}")

            print("-" * 40)

    print("Proses penghapusan duplikat selesai.")

In [8]:
import os
from PIL import Image
import imagehash
from collections import defaultdict

def check_duplicate_images(base_path):
    """
    Check and count duplicate images in a dataset without deleting them.
    
    Args:
        base_path (str): Path to the dataset directory containing train, test, and valid folders
        
    Returns:
        tuple: (total_duplicates, dict of duplicate groups)
    """
    # Subfolder yang akan diproses
    folders = ["train", "test", "valid"]
    total_duplicates = 0
    duplicate_groups = []

    def calculate_image_hash(image_path):
        """
        Menghitung hash gambar menggunakan perceptual hashing (pHash).
        """
        try:
            img = Image.open(image_path)
            return imagehash.average_hash(img)  # Menggunakan pHash (average hash)
        except Exception as e:
            print(f"Gagal memproses {image_path}: {e}")
            return None

    def load_image_hashes_from_folder(folder_path):
        """
        Membaca semua gambar dari folder dan mengembalikan dictionary
        yang berisi nama file dan hash-nya.
        """
        image_hashes = {}
        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                img_hash = calculate_image_hash(file_path)
                if img_hash is not None:
                    image_hashes[file_name] = img_hash
        return image_hashes

    # Load semua hash gambar dari setiap folder
    all_image_hashes = {}
    for folder in folders:
        images_folder_path = os.path.join(base_path, folder, "images")
        if os.path.exists(images_folder_path):
            all_image_hashes[folder] = load_image_hashes_from_folder(images_folder_path)

    # Cek duplikat gambar berdasarkan hash
    hash_to_files = defaultdict(list)

    # Kelompokkan file berdasarkan hash
    for folder in folders:
        for file_name, img_hash in all_image_hashes.get(folder, {}).items():
            hash_to_files[img_hash].append((folder, file_name))

    # Proses dan hitung duplikat
    for img_hash, files in hash_to_files.items():
        if len(files) > 1:
            print("\nGrup gambar duplikat ditemukan:")
            for folder, file_name in files:
                print(f"- {folder}/images/{file_name}")
            
            # Hitung jumlah file yang akan dihapus (semua file kecuali yang pertama)
            num_duplicates = len(files) - 1
            total_duplicates += num_duplicates
            
            # Simpan informasi grup duplikat
            duplicate_groups.append({
                'original': f"{files[0][0]}/images/{files[0][1]}",
                'duplicates': [f"{f[0]}/images/{f[1]}" for f in files[1:]]
            })
            
            print(f"Jumlah duplikat dalam grup ini: {num_duplicates}")
            print("-" * 40)

    print(f"\nRingkasan:")
    print(f"Total gambar duplikat yang ditemukan: {total_duplicates}")
    print(f"Total grup duplikat: {len([g for g in hash_to_files.values() if len(g) > 1])}")
    
    return total_duplicates, duplicate_groups

In [None]:
import os
import yaml
import pandas as pd
import matplotlib.pyplot as plt

def analyze_dataset_labels_for_classification(base_path, yaml_file='data.yaml'):
    """
    Menganalisis dataset citra untuk tugas klasifikasi berdasarkan file label.
    Setiap file label diasumsikan hanya memiliki satu baris (atau baris pertama yang relevan),
    dan angka pertama pada baris tersebut merupakan ID kelas.

    Parameter:
    - base_path: Path utama yang berisi folder train, test, dan valid.
    - yaml_file: Path ke file data.yaml yang berisi mapping kelas.
    """

    # Baca file YAML untuk mendapatkan mapping ID kelas ke nama kelas
    with open(os.path.join(base_path, yaml_file), 'r') as file:
        yaml_data = yaml.safe_load(file)

    class_names = yaml_data.get('names', [])
    num_classes = len(class_names)

    # Folder-folder yang akan diproses
    folders = ["train", "test", "valid"]

    # Inisialisasi dictionary untuk menyimpan jumlah data berdasarkan kelas per folder
    # Inisialisasi semua kelas dengan 0 untuk memastikan setiap kelas ada meskipun count-nya 0
    class_counts = {folder: {str(i): 0 for i in range(num_classes)} for folder in folders}

    # Proses setiap folder
    for folder in folders:
        labels_folder_path = os.path.join(base_path, folder, "labels")
        if os.path.exists(labels_folder_path):
            for label_file in os.listdir(labels_folder_path):
                label_file_path = os.path.join(labels_folder_path, label_file)
                with open(label_file_path, 'r') as file:
                    # Baca baris pertama saja karena untuk klasifikasi kita hanya membutuhkan label utama
                    line = file.readline().strip()
                    if not line:
                        continue  # Skip jika file kosong
                    # Ambil angka pertama dari baris tersebut sebagai ID kelas
                    class_id = line.split()[0]
                    class_counts[folder][class_id] += 1

    # Output total data untuk setiap kelas
    print("Total Data untuk Setiap Kelas:")
    print("{:<25} {:<10} {:<10} {:<10} {:<10}".format(
        "Nama Kelas", "Train", "Test", "Valid", "Total"
    ))
    print("-" * 65)

    total_semua = 0
    for i in range(num_classes):
        class_name = class_names[i]
        # Ambil jumlah data per folder dari dictionary yang sudah diinisialisasi
        train_count = class_counts["train"].get(str(i), 0)
        test_count  = class_counts["test"].get(str(i), 0)
        valid_count = class_counts["valid"].get(str(i), 0)
        total_count = train_count + test_count + valid_count
        total_semua += total_count
        print("{:<25} {:<10} {:<10} {:<10} {:<10}".format(
            class_name, train_count, test_count, valid_count, total_count
        ))
    print("-" * 65)
    print("Total Keseluruhan Data: {}".format(total_semua))

    # Hitung dan tampilkan persentase distribusi data untuk setiap kelas
    print("\nPersentase Distribusi Data untuk Setiap Kelas:")
    print("{:<25} {:<10} {:<10} {:<10}".format(
        "Nama Kelas", "Train (%)", "Test (%)", "Valid (%)"
    ))
    print("-" * 55)
    for i in range(num_classes):
        class_name = class_names[i]
        train_count = class_counts["train"].get(str(i), 0)
        test_count  = class_counts["test"].get(str(i), 0)
        valid_count = class_counts["valid"].get(str(i), 0)
        total_count = train_count + test_count + valid_count
        if total_count > 0:
            train_percent = (train_count / total_count) * 100
            test_percent  = (test_count / total_count) * 100
            valid_percent = (valid_count / total_count) * 100
        else:
            train_percent = test_percent = valid_percent = 0
        print("{:<25} {:<10.2f} {:<10.2f} {:<10.2f}".format(
            class_name, train_percent, test_percent, valid_percent
        ))

    # Konversi data ke DataFrame untuk keperluan plotting
    df = pd.DataFrame(class_counts).fillna(0).astype(int)

    # Pastikan index DataFrame (ID kelas) diubah menjadi nama kelas yang sesuai
    df.index = [class_names[int(idx)] for idx in df.index]

    # Plot grafik batang
    df.plot(kind='bar', figsize=(14, 7))
    plt.title('Jumlah Data untuk Setiap Kelas di Setiap Folder')
    plt.xlabel('Kelas')
    plt.ylabel('Jumlah Data')
    plt.xticks(rotation=45, ha='right')
    plt.legend(title='Folder', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

In [None]:
import os
from typing import Dict

def change_label_class(base_path: str, mappingNumber: Dict[int, int]) -> None:
    for folder in ["train", "valid", "test"]:
        folder_path = os.path.join(base_path, folder)
        labels_folder_path = os.path.join(folder_path, "labels")
        for file in os.listdir(labels_folder_path):
            file_path = os.path.join(labels_folder_path, file)
            with open(file_path, "r") as f:
                lines = f.readlines()
            with open(file_path, "w") as f:
                for line in lines:
                    class_number = int(line.split()[0])
                    if class_number in mappingNumber:
                        class_number = mappingNumber[class_number]
                    f.write(f"{class_number} {' '.join(line.split()[1:])}\n")

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in strawberry-2 to yolov11::   4%|▎         | 107885/2938620 [01:13<32:00, 1474.01it/s]


KeyboardInterrupt: 

In [None]:
def distribusi_dataset(base_path: str, split_ratios: tuple):
    