In [9]:
import os
from pydub import AudioSegment
import random
from pydub.playback import play
from pydub.effects import speedup
import shutil
import numpy as np
from sklearn.model_selection import train_test_split
import glob



In [5]:
music_folder = ""
speech_folder = ""
output_Mix = ""


In [6]:
## Create a mix of music and speech

# Make sure output folder exists
os.makedirs(output_Mix, exist_ok=True)

music_files = [f for f in os.listdir(music_folder) if f.endswith('.wav')]
speech_files = [f for f in os.listdir(speech_folder) if f.endswith('.wav')]

for i, (music_file, speech_file) in enumerate(zip(music_files, speech_files)):
    music_path = os.path.join(music_folder, music_file)
    speech_path = os.path.join(speech_folder, speech_file)

    music = AudioSegment.from_wav(music_path)
    speech = AudioSegment.from_wav(speech_path)

    # Mix the two audio files (adjust the volumes if needed)
    mixed = music.overlay(speech)

    # Save the mixed audio to the output folder
    output_path = os.path.join(output_Mix, f'mixed_{i}.wav')
    mixed.export(output_path, format='wav')

print(f'Mixed audio files saved in {output_Mix}')


Mixed audio files saved in /Users/zainhazzouri/projects/Bachelor_Thesis/Data/Kaggle/Mix_wav


In [None]:
## separate the data into train and test, then save them in separate folders

# Define the folders
folders = ['music_wav', 'speech_wav', 'Mix_wav']

# Define the root directory where the folders are located
root_directory = "/Users/zainhazzouri/projects/Bachelor_Thesis/Data/Kaggle"

# Define the directories for the train and test sets
train_directory = os.path.join(root_directory, 'train')
test_directory = os.path.join(root_directory, 'test')

# Create the train and test directories if they do not exist
os.makedirs(train_directory, exist_ok=True)
os.makedirs(test_directory, exist_ok=True)

# Define the test size
test_size = 0.2  # 20% for testing, adjust as needed

# Loop over the folders and split the files into train and test sets
for folder in folders:
    # Get the full paths to the source folder, and the train and test target folders
    source_folder = os.path.join(root_directory, folder)
    train_folder = os.path.join(train_directory, folder)
    test_folder = os.path.join(test_directory, folder)

    # Get a list of all files in the source folder
    files = os.listdir(source_folder)

    # Split the files into train and test sets
    train_files, test_files = train_test_split(files, test_size=test_size)

    # Create the train and test folders if they do not exist
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)

    # Move the train files to the train folder
    for file in train_files:
        shutil.move(os.path.join(source_folder, file), os.path.join(train_folder, file))

    # Move the test files to the test folder
    for file in test_files:
        shutil.move(os.path.join(source_folder, file), os.path.join(test_folder, file))


In [10]:
## preform data augmentation

# Root directory where the train folders are located
root_directory = "/Users/zainhazzouri/projects/Bachelor_Thesis/Data/train"

noise_folder = "/Users/zainhazzouri/projects/Bachelor_Thesis/Data/Kaggle/Noise"
noise_files = glob.glob(os.path.join(noise_folder, '*.wav'))



for folder in folders:
    # Full path to the input folder
    input_folder = os.path.join(root_directory, folder)

    # Get a list of all the audio files in the input folder
    input_files = glob.glob(os.path.join(input_folder, '*.wav'))

    for i, input_file in enumerate(input_files):
        audio = AudioSegment.from_wav(input_file)

        # Change volume
        gain = random.uniform(-10, 10)  # Random gain between -10dB and 10dB
        audio = audio + gain
        audio.export(os.path.join(input_folder, f'augmented_{i}_gain.wav'), format='wav')

        # Change pitch
        pitch_factor = random.uniform(0.9, 1.1)  # Random pitch factor between 0.9 and 1.1
        audio = speedup(audio, playback_speed=pitch_factor)
        audio.export(os.path.join(input_folder, f'augmented_{i}_pitch.wav'), format='wav')

        # Add background noise
        noise_file = random.choice(noise_files)
        noise = AudioSegment.from_wav(noise_file)
        noise = noise - 20  # Reduce noise volume
        audio = audio.overlay(noise)
        audio.export(os.path.join(input_folder, f'augmented_{i}_noise.wav'), format='wav')

        # Time stretching
        stretch_factor = random.uniform(0.9, 1.1)  # Random stretch factor between 0.9 and 1.1
        audio = audio.set_frame_rate(int(audio.frame_rate * stretch_factor))
        audio.export(os.path.join(input_folder, f'augmented_{i}_stretch.wav'), format='wav')

        # Reverse audio
        if random.random() < 0.5:  # 50% chance of reversing audio
            audio = audio.reverse()
            audio.export(os.path.join(input_folder, f'augmented_{i}_reverse.wav'), format='wav')

    print(f'Augmented audio files saved in {input_folder}')

Augmented audio files saved in /Users/zainhazzouri/projects/Bachelor_Thesis/Data/train/music_wav
Augmented audio files saved in /Users/zainhazzouri/projects/Bachelor_Thesis/Data/train/speech_wav
Augmented audio files saved in /Users/zainhazzouri/projects/Bachelor_Thesis/Data/train/Mix_wav


Silence audio file saved at /Users/zainhazzouri/projects/Bachelor_Thesis/Data/Kaggle/silence_wav/silence.wav
