In [6]:
import os
TF_ENABLE_ONEDNN_OPTS=0
import numpy as np
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio
import tensorflow as tf
import random
import soundfile as sf

In [57]:
print(tf.version.VERSION)

2.18.0


In [28]:
class Rozszerzanie:
    def __init__(self):
        self.transforms = {
            'add_white_noise': self.add_white_noise,
            'time_stretch': self.time_stretch,
            'pitch_scale': self.pitch_scale,
            'random_gain': self.random_gain,
            'add_noise': self.add_noise
        }

        self.noise_library = {} #przechowywanie dzwiekow tla
    def add_noise_to_library(self, name, noise_signal):
        self.noise_library[name] = noise_signal
        
    def rozszerz(self, signal, sr, configs):
        rozszerzony = np.copy(signal)
        for config in configs:
            name = config['name']
            p = config.get('p', 1.0)
            params = config.get('params', {})
            if name == 'add_noise':
                if 'noise_options' in params:
                    noise_options = params.pop('noise_options', None) #pop pomaga usunac z parametrow noise options i przekazac je do zmiennej
                    if random.random() < p:
                        noise_name = [opt[0] for opt in noise_options]
                        weights = [opt[1] for opt in noise_options]

                        total_weight = sum(weights)
                        if total_weight > 0:
                            weights = [w/total_weight for w in weights]
                        selected_noise = random.choices(noise_name, weights = weights, k=1)[0]
                        if selected_noise in self.noise_library:
                            selected_noise = self.noise_library[selected_noise]
                            rozszerzony = self.add_noise(rozszerzony, noise, **params)
            elif name in self.transforms and random.random() < p:
                transform_func = self.transforms[name]
                rozszerzony = transform_func(rozszerzony, sr, **params)
        return rozszerzony
        
    def add_white_noise(self, signal, sr, noise_factor):
        noise = np.random.normal(0, signal.std(), signal.size)
        zaszumiony = signal + noise * noise_factor
        return zaszumiony

    def time_stretch(self, signal, sr, stretch_rate, target_duration):
        stretched_signal = librosa.effects.time_stretch(signal, rate=stretch_rate)
        target_len = int(sr*target_duration)
        if len(stretched_signal) > target_len:
            stretched_signal = stretched_signal[:target_len]
        elif len(stretched_signal) < target_len:
            padding_length = target_len - len(stretched_signal)
            stretched_signal = np.pad(stretched_signal, (0, padding_length), mode = 'constant')
        return stretched_signal

    def pitch_scale(self, signal, sr, num_semitones): 
        #na + skala idzie w gore, na - skala idzie w dol
        return librosa.effects.pitch_shift(signal, sr=sr, n_steps=num_semitones)

    def random_gain(self, signal, sr, min_gain, max_gain):
        gain_factor = random.uniform(min_gain, max_gain)
        return signal * gain_factor

    def add_noise(self, signal, noise, snr):
        if len(noise) < len(signal):
            noise_repeated = np.tile(noise, int(np.ceil(len(signal) / len(noise)))) #powtarzanie noise, aby dopelnil czas sygnalu
            noise = noise_repeated[:len(signal)]
        else: #jesli szum dluzszy to losowy segment
            start = np.random.randint(0, len(noise) - len(signal) + 1) #dodane plus 1 bo indeks high jest wykluczony w randint
            noise = noise[start:start + len(signal)] #start-losowy fragment gdzie zaczyna, start+len koniec

        audio_power = np.sum(signal**2)/len(signal)
        noise_power = np.sum(noise**2)/len(noise)

        if noise_power==0:
            return signal

        noise_adjusted = noise * np.sqrt(audio_power / (10**(snr/10) * noise_power))
        audio_noisy = signal + noise_adjusted
        if np.max(np.abs(audio_noisy)) > 1.0: #normalizacja
            audio_noisy = audio_noisy / np.max(np.abs(audio_noisy))

        return audio_noisy

In [47]:
sr=16000
audio_augmenter = Rozszerzanie()
noise = librosa.load(r"C:\Users\Hubert\Desktop\Praca_jupyter\background\doing_the_dishes.wav", sr=sr)[0]
signal = librosa.load(r"C:\Users\Hubert\Desktop\Praca_jupyter\yes\yes_001.wav", sr=sr)[0]
audio_augmenter.add_noise_to_library("zmywanie", noise)
audio_augment_config = [
            {'name': 'add_noise', 'p': 1.0, 'params': {
            'noise_options': [
            ["zmywanie", 1.0]], 'snr': random.randint(5, 15)}}]
augmented_audio = audio_augmenter.rozszerz(signal, sr, audio_augment_config)
Audio(data=augmented_audio, rate=sr)

In [15]:
folders = {
    "yes": r"C:\Users\Hubert\Desktop\Praca_jupyter\yes",
    "hello": r"C:\Users\Hubert\Desktop\Praca_jupyter\hello"
}

for label, folder_path in folders.items():
    for filename in os.listdir(folder_path):
        if filename.endswith(".wav"):
            audio_path = os.path.join(folder_path, filename)
            audio, sr = librosa.load(audio_path, sr=16000)
            augmented_audio = audio_augmenter.rozszerz(audio, sr, audio_augment_config)
            base_name, ext = os.path.splitext(filename)
            new_filename = f"{base_name}_augmented{ext}"
            output_path = os.path.join(folder_path, new_filename)
            sf.write(output_path, augmented_audio, sr)
            print(f"Zapisano augmentowany plik: {output_path}")

Zapisano augmentowany plik: C:\Users\Hubert\Desktop\Praca_jupyter\yes\yes_001_augmented.wav
Zapisano augmentowany plik: C:\Users\Hubert\Desktop\Praca_jupyter\yes\yes_002_augmented.wav
Zapisano augmentowany plik: C:\Users\Hubert\Desktop\Praca_jupyter\yes\yes_003_augmented.wav
Zapisano augmentowany plik: C:\Users\Hubert\Desktop\Praca_jupyter\hello\hello_001_augmented.wav
Zapisano augmentowany plik: C:\Users\Hubert\Desktop\Praca_jupyter\hello\hello_002_augmented.wav
Zapisano augmentowany plik: C:\Users\Hubert\Desktop\Praca_jupyter\hello\hello_003_augmented.wav
Zapisano augmentowany plik: C:\Users\Hubert\Desktop\Praca_jupyter\hello\hello_004_augmented.wav
