In [5]:
import os
from pydub import AudioSegment
import random
from pydub.playback import play
from pydub.effects import speedup
import shutil
import numpy as np
from sklearn.model_selection import train_test_split
import glob

import xml.etree.ElementTree as ET


In [6]:
## separate the data into train and test, then save them in separate folders

# Define the folders
folders = ['music_wav', 'speech_wav']

# Define the root directory where the folders are located
GTZAN_Source_Folder = "/Users/zainhazzouri/projects/Bachelor_Thesis_R2UNet_res2net/Raw_Data/GTZAN"
CCMixter_Source_Folder = "/Users/zainhazzouri/projects/Bachelor_Thesis_R2UNet_res2net/Raw_Data/ccmixter_corpus"


# distination folder 
destination_folder = "/Users/zainhazzouri/projects/Datapreprocessed/Bachelor_thesis_data"

# Define the directories for the train and test sets
train_directory = os.path.join(destination_folder, 'train')
test_directory = os.path.join(destination_folder, 'test')

# Create the train and test directories if they do not exist
os.makedirs(train_directory, exist_ok=True)
os.makedirs(test_directory, exist_ok=True)

# Define the test size
test_size = 0.2  # 20% for testing, adjust as needed


In [7]:
############# GTZAN Dataset#############
# the GTZAN has sampleRate 22050Hz Mono 16-bit audio files in .wav format

def resample_audio(file_path, target_sample_rate=44100):
    audio = AudioSegment.from_file(file_path)
    return audio.set_frame_rate(target_sample_rate)



# Create a set to keep track of copied files
copied_files = set()

# Loop over the folders and split the files into train and test sets
for folder in folders:
    # Get the full paths to the source folder, and the train and test target folders
    source_folder = os.path.join(GTZAN_Source_Folder, folder)
    train_folder = os.path.join(train_directory, folder)
    test_folder = os.path.join(test_directory, folder)

    # Get a list of all files in the source folder
    files = [f for f in os.listdir(source_folder) if f not in copied_files]

    # Split the files into train and test sets
    train_files, test_files = train_test_split(files, test_size=test_size)

    # Create the train and test folders if they do not exist
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)

    # resample then Copy the train files to the train folder
    for file in train_files:
        resampled_audio = resample_audio(os.path.join(source_folder, file))
        resampled_audio.export(os.path.join(train_folder, file), format="wav")
        copied_files.add(file)

    # resample then Copy the test files to the test folder
    for file in test_files:
        resampled_audio = resample_audio(os.path.join(source_folder, file))
        resampled_audio.export(os.path.join(test_folder, file), format="wav")
        copied_files.add(file)
        
        
        
        
        # shutil.copy(os.path.join(source_folder, file), os.path.join(test_folder, file))
        # copied_files.add(file)


In [8]:
### CCmixter Dataset ###
# the CCMixter has  sampleRate 44100 Hz Stereo 

def convert_to_mono(file_path):
    audio = AudioSegment.from_file(file_path)
    return audio.set_channels(1)


def rename_files_in_dir(main_dir):
    for subdir in os.listdir(main_dir):
        subdir_path = os.path.join(main_dir, subdir)
        if os.path.isdir(subdir_path):  # check if it's a directory
            for filename in os.listdir(subdir_path):
                if filename.endswith('.wav'):
                    subdir_name = subdir.replace(' ', '_')  # replace spaces with underscores
                    if 'mix.wav' in filename:
                        new_name = f'{subdir_name}-mix.wav'
                    elif 'source-01.wav' in filename:
                        new_name = f'{subdir_name}-music.wav'
                    elif 'source-02.wav' in filename:
                        new_name = f'{subdir_name}-voice.wav'
                    else:
                        continue  # Skip files not matching the pattern
                    src = os.path.join(subdir_path, filename)  # Full path to the file (old name)
                    dst = os.path.join(subdir_path, new_name)  # Full path to the file (new name)
                    os.rename(src, dst)  # Rename the file


# copy files to the train and test folders after splitting
main_dir = CCMixter_Source_Folder
train_music = "/Users/zainhazzouri/projects/Datapreprocessed/Bachelor_thesis_data/train/music_wav"
train_speech = "/Users/zainhazzouri/projects/Datapreprocessed/Bachelor_thesis_data/train/speech_wav"
test_music = "/Users/zainhazzouri/projects/Datapreprocessed/Bachelor_thesis_data/test/music_wav"
test_speech = "/Users/zainhazzouri/projects/Datapreprocessed/Bachelor_thesis_data/test/speech_wav"


def copy_files_to_train_test_folders(main_dir, train_music, train_speech, test_music, test_speech):
    # Iterate over subdirectories
    music_files = []
    speech_files = []
    for subdir in os.listdir(main_dir):
        subdir_path = os.path.join(main_dir, subdir)
        print(subdir_path)
        if os.path.isdir(subdir_path):  # check if it's a directory
            files = [f for f in os.listdir(subdir_path) if f.endswith('.wav')]
            print("files", files)
            # append music file if the file end with _music.wav
            music_files.extend([os.path.join(subdir_path, f) for f in files if f.endswith('_music.wav')])

            
            # append speech file if the file end with _speech.wav
            speech_files.extend([os.path.join(subdir_path, f) for f in files if f.endswith('_speech.wav')])


            print("music_files", music_files)
            print("speech_files", speech_files)
            
    # split the files into train and test sets
    train_music_files, test_music_files = train_test_split(music_files, test_size=test_size)
    train_speech_files, test_speech_files = train_test_split(speech_files, test_size=test_size)
        
        
    # shuffle the files
    random.shuffle(train_music_files)
    random.shuffle(test_music_files)
    
    # convert to mono then save the files in the train and test folders
    for file in train_music_files:
        convert_to_mono(file).export(os.path.join(train_music, os.path.basename(file)), format="wav")

    for file in test_music_files:
        convert_to_mono(file).export(os.path.join(test_music, os.path.basename(file)), format="wav")
        
    for file in train_speech_files:
        convert_to_mono(file).export(os.path.join(train_speech, os.path.basename(file)), format="wav")
        
    for file in test_speech_files:
        convert_to_mono(file).export(os.path.join(test_speech, os.path.basename(file)), format="wav")        
                
copy_files_to_train_test_folders(main_dir, train_music, train_speech, test_music, test_speech)

/Users/zainhazzouri/projects/Bachelor_Thesis_R2UNet_res2net/Raw_Data/ccmixter_corpus/tmray_-_Forget_It_-_Demo
files ['tmray_-_Forget_It_-_Demo-mix.wav', 'tmray_-_Forget_It_-_Demo_music.wav', 'tmray_-_Forget_It_-_Demo_speech.wav']
music_files ['/Users/zainhazzouri/projects/Bachelor_Thesis_R2UNet_res2net/Raw_Data/ccmixter_corpus/tmray_-_Forget_It_-_Demo/tmray_-_Forget_It_-_Demo_music.wav']
speech_files ['/Users/zainhazzouri/projects/Bachelor_Thesis_R2UNet_res2net/Raw_Data/ccmixter_corpus/tmray_-_Forget_It_-_Demo/tmray_-_Forget_It_-_Demo_speech.wav']
/Users/zainhazzouri/projects/Bachelor_Thesis_R2UNet_res2net/Raw_Data/ccmixter_corpus/stellarartwars_-_Amy_Winehouse_Blues_(stems)
files ['stellarartwars_-_Amy_Winehouse_Blues_(stems)_music.wav', 'stellarartwars_-_Amy_Winehouse_Blues_(stems)-mix.wav', 'stellarartwars_-_Amy_Winehouse_Blues_(stems)_speech.wav']
music_files ['/Users/zainhazzouri/projects/Bachelor_Thesis_R2UNet_res2net/Raw_Data/ccmixter_corpus/tmray_-_Forget_It_-_Demo/tmray_-_Forg

In [9]:
## preform data augmentation/Users/zainhazzouri/projects/Bachelor_Thesis_R2UNet_res2net/Raw_Data/ccmixter_corpus/stellarartwars_-_Orange_(I_m_In_The_Mood_For_Trancing_Mix)/stellarartwars_-_Orange_(I_m_In_The_Mood_For_Trancing_Mix)_speech.wav

# Root directory where the train folders are located
train_directory = "/Users/zainhazzouri/projects/Datapreprocessed/Bachelor_thesis_data/train"



for folder in folders: # folders = ['music_wav', 'speech_wav']
    # Full path to the input folder
    input_folder = os.path.join(train_directory, folder)

    # Get a list of all the audio files in the input folder
    input_files = glob.glob(os.path.join(input_folder, '*.wav'))

    for i, input_file in enumerate(input_files):
        audio = AudioSegment.from_wav(input_file)

        # Change volume
        gain = random.uniform(-10, 10)  # Random gain between -10dB and 10dB
        audio = audio + gain
        audio.export(os.path.join(input_folder, f'augmented_{i}_gain.wav'), format='wav')

        # Pitch Shifting
        semitone_shift = random.uniform(-3, 3)  # Random shift between -3 and +3 semitones
        pitch_shifted_audio = audio._spawn(audio.raw_data, overrides={
            "frame_rate": int(audio.frame_rate * (2 ** (semitone_shift / 12.0)))
        }).set_frame_rate(audio.frame_rate)
        pitch_shifted_audio.export(os.path.join(input_folder, f'augmented_{i}_pitchshift.wav'), format='wav')

        # Noise Injection
        noise_level = random.uniform(0.001, 0.005)  # Random noise level
        noise = np.random.normal(0, noise_level, len(audio.get_array_of_samples()))
        noise_audio = AudioSegment(
            noise.tobytes(),
            frame_rate=audio.frame_rate,
            sample_width=audio.sample_width,
            channels=audio.channels
        )
        audio_with_noise = audio.overlay(noise_audio)
        audio_with_noise.export(os.path.join(input_folder, f'augmented_{i}_noise.wav'), format='wav')
        
        # Time Stretching
        stretch_factor = random.uniform(0.8, 1.2)  # Random stretch factor between 0.8 and 1.2
        stretched_audio = audio._spawn(audio.raw_data, overrides={
            "frame_rate": int(audio.frame_rate / stretch_factor)
        }).set_frame_rate(audio.frame_rate)
        stretched_audio.export(os.path.join(input_folder, f'augmented_{i}_stretch.wav'), format='wav')

        # Time Shifting
        shift_ms = random.uniform(-500, 500)  # Random shift between -500ms and +500ms
        if shift_ms > 0:
            audio_shifted = AudioSegment.silent(duration=shift_ms) + audio
        else:
            audio_shifted = audio[-shift_ms:]
        audio_shifted.export(os.path.join(input_folder, f'augmented_{i}_timeshift.wav'), format='wav')

    print(f'Augmented audio files saved in {input_folder}')

Augmented audio files saved in /Users/zainhazzouri/projects/Datapreprocessed/Bachelor_thesis_data/train/music_wav
Augmented audio files saved in /Users/zainhazzouri/projects/Datapreprocessed/Bachelor_thesis_data/train/speech_wav


3.7.1
