In [1]:
import os
import numpy as np

import matplotlib.pyplot as plt
import librosa.display
from internal_methods import createShiftSequence, pitchAugmentation, volumeAugmentation, noiseAugmentation
import soundfile as sf
from soundfile import write  # Assuming you have soundfile installed

import shutil
import random



In [3]:
main_dir: str = "DataGathering"
raw_wake: str = "DataGathering/wake"
raw_talk:str = "DataGathering/background/my_talk"
raw_back:str = "DataGathering/background/back"
# raw_background_dir: str = ""

background_talk_dataset = ""  # create iterator and trimmer at the same time


wake_augmented: str = "DataGathering/augmented/wake"
back_augmented: str = "DataGathering/augmented/background/back"
talk_augmented: str = "DataGathering/augmented/background/my_talk"

urban_audio: str = "urban_audio"

wake_train: str = "data/wake"
back_train: str = "data/background"


sr: int = 44100
seconds: int = 2

os.makedirs(main_dir, exist_ok=True)
os.makedirs(raw_wake, exist_ok=True)
os.makedirs(os.path.join(main_dir, "background"), exist_ok=True)
os.makedirs(raw_talk, exist_ok=True)
os.makedirs(raw_back, exist_ok=True)

os.makedirs(os.path.join(main_dir, "augmented"), exist_ok=True)
os.makedirs(wake_augmented, exist_ok=True)
os.makedirs(os.path.join(main_dir, "augmented", "background"), exist_ok=True)
os.makedirs(back_augmented, exist_ok=True)
os.makedirs(talk_augmented, exist_ok=True)

os.makedirs(urban_audio, exist_ok=True)
os.makedirs("data", exist_ok=True)
os.makedirs(wake_train, exist_ok=True)
os.makedirs(back_train, exist_ok=True)

In [6]:
# for background
pitch_shifts_small = createShiftSequence(start=-1.5, stop=1.6, step=0.5)
volume_shifts_small = createShiftSequence(start=0.8, stop=2.1, step=0.4)

# for wake_word
pitch_shifts = createShiftSequence(start=-1.6, stop=1.6, step=0.2)
volume_shifts = createShiftSequence(start=0.8, stop=3, step=0.2)


noise_shifts = [0.0008, 0.0005, 0.0002]


print("Volume shifts small: ", volume_shifts_small)
print("Volume shifts: ", volume_shifts)

print("Pitch shifts small: ", pitch_shifts_small)
print("Pitch shifts: ", pitch_shifts)

print("Noise Shifts")

Volume shifts small:  [0.8 1.2 1.6 2. ]
Volume shifts:  [0.8 1.  1.2 1.4 1.6 1.8 2.  2.2 2.4 2.6 2.8]
Pitch shifts small:  [-1.5 -1.  -0.5  0.   0.5  1.   1.5]
Pitch shifts:  [-1.6 -1.4 -1.2 -1.  -0.8 -0.6 -0.4 -0.2 -0.   0.2  0.4  0.6  0.8  1.
  1.2  1.4]
Noise Shifts


# Data Augmentation

### Wake-word Audios

In [5]:
volumeAugmentation(input_directory=raw_wake,output_directory=wake_augmented, volume_shifts=volume_shifts, sr=sr)
noiseAugmentation(input_directory=wake_augmented,output_directory=wake_augmented, noise_factors=noise_shifts, sr=sr)
pitchAugmentation(input_directory=wake_augmented,output_directory=wake_augmented, pitch_shifts=pitch_shifts, sr=sr)

---

### Background Audios

In [23]:
# background
volumeAugmentation(input_directory=raw_back,output_directory=back_augmented, volume_shifts=volume_shifts, sr=sr)
noiseAugmentation(input_directory=back_augmented,output_directory=back_augmented, noise_factors=noise_shifts, sr=sr)
pitchAugmentation(input_directory=back_augmented,output_directory=back_augmented, pitch_shifts=pitch_shifts_small, sr=sr)

In [25]:

# background talk
volumeAugmentation(input_directory=raw_talk,output_directory=talk_augmented, volume_shifts=volume_shifts_small, sr=sr)
noiseAugmentation(input_directory=talk_augmented,output_directory=talk_augmented, noise_factors=noise_shifts, sr=sr)
pitchAugmentation(input_directory=talk_augmented,output_directory=talk_augmented, pitch_shifts=pitch_shifts_small, sr=sr)

---

### Urban Audios

In [15]:
def processsUrbanData(
    dataset_path: str = "urban-dataset",
    output_directory: str = "urban_audio",
    n_samples: int = 100,
):
    def list_subfolders(directory):
        subfolders = [f.path for f in os.scandir(directory) if f.is_dir()]

        return subfolders

    def split_audio_by_length(
        audio_path: str,
        output_directory: str,
        segment_duration: int = 2,
        sr: int = 44100,
        segment_counter: int | None = None,
        n_samples: int | None = None,
    ):

        if segment_counter is None:
            segment_counter = 0

        # Load audio data
        y, _ = librosa.load(audio_path, sr=sr)  # Load without specifying sample rate

        # Check audio duration
        duration = librosa.get_duration(y=y, sr=sr)

        if duration < segment_duration:
            return segment_counter

        # Determine number of segments
        num_segments = int(duration / segment_duration)

        # Split audio into segments
        segment_length = int(
            sr * segment_duration
        )  # Convert segment duration to samples

        for sequence_idx in range(num_segments):
            start = sequence_idx * segment_length
            end = start + segment_length
            segment = y[start:end]

            # Create output filename
            file_name, file_ext = os.path.splitext(os.path.basename(audio_path))
            output_file = f"urban_segment_{segment_counter}{file_ext}"
            output_path = os.path.join(output_directory, output_file)

            # Stop iteration when sample count is enough
            if segment_counter == n_samples:
                return -1

            # Update the number of made segments
            segment_counter += 1
            # Save segment
            write(output_path, segment, sr)

        return segment_counter

    subfolders = list_subfolders(dataset_path)
    os.makedirs(output_directory, exist_ok=True)
    segment_counter = 0

    for folder in subfolders:
        # Iterate through each subfolder
        for audio_file in os.scandir(folder):
            # Iterate through each audio file in the subfolder

            # Get the path of the audio file
            audio_path = audio_file.path

            segment_counter = split_audio_by_length(
                audio_path=audio_path,
                output_directory=output_directory,
                segment_duration=seconds,
                sr=sr,
                segment_counter=segment_counter,
                n_samples=n_samples,
            )
            if segment_counter == -1:
                return


processsUrbanData(dataset_path="urban-dataset", output_directory="urban_audio", n_samples=urban_sample_count)

urban_sample_count = len(os.listdir("urban_audio"))

In [None]:
n_samples_each = len(os.listdir(wake_train)) # n_samples background (talk+back +urban) and n_samples wake word
n_samples_each = n_samples_each // 3 * 3  # ensure equal number of samples

talk_sample_count = back_sample_count = (n_samples_each - urban_sample_count) //2

n_samples_each, sum((talk_sample_count, back_sample_count, urban_sample_count))

### Assemble all the files into data directory

In [19]:
def move_to_directory(
    source_dir, destination_dir, n_samples: int | None = None, use_shuffle=True
):

    # List files in the source directory
    files = os.listdir(source_dir)
    if use_shuffle:
        random.shuffle(files)

    if n_samples is not None:
        files = files[:n_samples]

    # Copy each file from the source directory to the destination directory
    for file in files:
        source_file = os.path.join(source_dir, file)
        destination_file = os.path.join(destination_dir, file)
        shutil.move(source_file, destination_file)
        
        

move_to_directory(source_dir=wake_augmented, destination_dir=wake_train, n_samples=n_samples_each)
move_to_directory(source_dir=urban_audio, destination_dir=back_train, n_samples=urban_sample_count)
move_to_directory(source_dir=back_augmented, destination_dir=back_train, n_samples=back_sample_count)
move_to_directory(source_dir=talk_augmented, destination_dir=back_train, n_samples=talk_sample_count)



In [None]:
# print(f"Audio shape: {audio.shape}")
# print(f"Spectrogram shape: {spectrogram.shape}")
# print(f"Normalized spectrogram shape: {normalized_spectrogram.shape}")