In [None]:
import os
import shutil
from tqdm import tqdm

source_root = "data_preprocessed"
target_root = "Data"

splits = ["training", "validation", "testing"]
classes = ["real", "fake"]

# Create target folders
for cls in classes:
    os.makedirs(os.path.join(target_root, cls), exist_ok=True)

file_id = {"real": 1, "fake": 1}

for split in splits:
    for cls in classes:
        source_folder = os.path.join(source_root, split, cls)
        target_folder = os.path.join(target_root, cls)
        
        for file in tqdm(os.listdir(source_folder), desc=f"{split}/{cls}"):
            if not file.endswith(".wav"):
                continue
            
            src_path = os.path.join(source_folder, file)
            new_filename = f"file{file_id[cls]}.wav"
            dest_path = os.path.join(target_folder, new_filename)
            
            # Copy and rename
            shutil.copy2(src_path, dest_path)
            file_id[cls] += 1

print(f"\nMerged and renamed all files into '{target_root}/real' and '{target_root}/fake'")


In [None]:
import os
import shutil
import random

base_dir = 'Data' 
real_dir = os.path.join(base_dir, 'real')
fake_dir = os.path.join(base_dir, 'fake')

output_base = 'Data_Split'
splits = ['training', 'validation', 'testing']
split_ratios = [0.7, 0.15, 0.15]  # 70% train, 15% val, 15% test


for split in splits:
    for cls in ['real', 'fake']:
        os.makedirs(os.path.join(output_base, split, cls), exist_ok=True)

# Function to shuffle and split
def shuffle_and_split(class_dir, class_name):
    files = os.listdir(class_dir)
    random.shuffle(files)

    total = len(files)
    train_end = int(total * split_ratios[0])
    val_end = train_end + int(total * split_ratios[1])

    split_files = {
        'training': files[:train_end],
        'validation': files[train_end:val_end],
        'testing': files[val_end:]
    }

    for split, split_filenames in split_files.items():
        for filename in split_filenames:
            src = os.path.join(class_dir, filename)
            dst = os.path.join(output_base, split, class_name, filename)
            shutil.copy2(src, dst)

# Process both classes
shuffle_and_split(real_dir, 'real')
shuffle_and_split(fake_dir, 'fake')

print("Dataset has been shuffled and split into training/validation/testing.")
