In [1]:
import os
import shutil
import numpy as np
from tqdm import tqdm

def split_dataset(source_dir, destination_dir, train_ratio=0.8, val_ratio=0.1, test_ratio=0.1, seed=42):
    np.random.seed(seed)  # Imposta il seme per la riproducibilità
    assert train_ratio + val_ratio + test_ratio == 1, "Le proporzioni devono sommare al 100%."
    
    # Creazione delle cartelle di destinazione per il training, validation e testing
    train_dir = os.path.join(destination_dir, 'Training')
    val_dir = os.path.join(destination_dir, 'Validation')
    test_dir = os.path.join(destination_dir, 'Testing')
    
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    
    for subdir, dirs, files in os.walk(source_dir):
        if files:
            # Calcolo il numero di file per ciascuna divisione
            num_files = len(files)
            num_train = int(num_files * train_ratio)
            num_val = int(num_files * val_ratio)
            num_test = num_files - num_train - num_val  # Il resto va nel test per evitare errori di arrotondamento

            # Mescolare i file per assicurare la casualità
            shuffled_files = np.random.permutation(files)

            # Assegna i file ai rispettivi set
            train_files = shuffled_files[:num_train]
            val_files = shuffled_files[num_train:num_train + num_val]
            test_files = shuffled_files[num_train + num_val:]

            # Creazione delle sottocartelle nel percorso di destinazione rispettando la struttura
            rel_path = os.path.relpath(subdir, source_dir)
            final_train_dir = os.path.join(train_dir, rel_path)
            final_val_dir = os.path.join(val_dir, rel_path)
            final_test_dir = os.path.join(test_dir, rel_path)
            
            os.makedirs(final_train_dir, exist_ok=True)
            os.makedirs(final_val_dir, exist_ok=True)
            os.makedirs(final_test_dir, exist_ok=True)

            # Spostamento dei file nelle rispettive cartelle
            for f in train_files:
                shutil.copy2(os.path.join(subdir, f), os.path.join(final_train_dir, f))
            for f in val_files:
                shutil.copy2(os.path.join(subdir, f), os.path.join(final_val_dir, f))
            for f in test_files:
                shutil.copy2(os.path.join(subdir, f), os.path.join(final_test_dir, f))

            print(f"Processati {len(files)} files in {subdir}")

source_directory = 'Dataset'
destination_directory = 'DividedDataset'

split_dataset(source_directory, destination_directory)

Processati 1 files in Dataset
Processati 3 files in Dataset\Non-Target
Processati 6 files in Dataset\Non-Target\Ambient Noise
Processati 7 files in Dataset\Non-Target\Arnoux’s Beaked Whale
Processati 60 files in Dataset\Non-Target\Atlantic Spotted Dolphin
Processati 31 files in Dataset\Non-Target\Bearded Seal
Processati 54 files in Dataset\Non-Target\Beluga, White Whale
Processati 9 files in Dataset\Non-Target\Blue whale
Processati 4 files in Dataset\Non-Target\Bocaccio
Processati 28 files in Dataset\Non-Target\Bottlenose Dolphin
Processati 52 files in Dataset\Non-Target\Bowhead Whale
Processati 64 files in Dataset\Non-Target\Clymene Dolphin
Processati 54 files in Dataset\Non-Target\Common Dolphin
Processati 5 files in Dataset\Non-Target\Cuvier's beaked whale
Processati 4 files in Dataset\Non-Target\Dugong
Processati 7 files in Dataset\Non-Target\Dwarf Sperm Whale
Processati 62 files in Dataset\Non-Target\False Killer Whale
Processati 60 files in Dataset\Non-Target\Fin, Finback Whale
P