In [2]:
import librosa

import tensorflow as tf
from tensorflow.keras.utils import Sequence

import numpy as np
import os
import glob
from tqdm.auto import tqdm

import IPython.display as ipd

In [6]:
class Signal_Synthesis_Datagen_tf(Sequence):
    def __init__(self, noise_dir, signal_dir, signal_nums_save=None, num_noise_samples=None, num_signal_samples=None, noise_path_save=None,\
                 n_fft=400, win_length=400, hop_len=200, create_specgram=False, \
                 perform_stft=True, normalize=True, default_sr=16000, sec=6, batch_size=32, shuffle=True, augment=False):
        
        self.noise_dir = noise_dir
        self.signal_dir = signal_dir
        self.signal_nums_save = signal_nums_save
        self.num_noise_samples = num_noise_samples
        self.num_signal_samples = num_signal_samples
        self.noise_path_save = noise_path_save
        self.n_fft = n_fft
        self.win_length = win_length
        self.hop_len = hop_len
        self.create_specgram = create_specgram
        self.perform_stft = perform_stft
        self.normalize = normalize
        self.default_sr = default_sr
        self.sec = sec
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.augment = augment
        
        
        if self.create_specgram == True and self.perform_stft == True:
            raise Exception("Use only one option out of 'create_specgram' and 'perform_stft'")
            
        if os.path.exists(self.noise_path_save):
            print("Loading noise from saved file")
            noise_paths = np.load(self.noise_path_save)
        else:
            noise_paths = []
            for root, dirs, files in os.walk(noise_dir):
                for name in files:
                    if name.endswith(".wav"):
                        noise_paths.append(os.path.join(root, name))
            noise_paths = np.asarray(noise_paths)
            
        if self.num_noise_samples is not None:
            self.noise_paths = noise_paths[:self.num_noise_samples]
        else:
            self.noise_paths = noise_paths
            
        if os.path.exists(signal_nums_save):
            print("Loading nums from npy file")
            self.signal_nums = np.load(signal_nums_save)
        else:
            self.signal_nums = self.get_signal_paths(signal_dir)
            
            
        if self.num_signal_samples is not None:
            self.signal_nums = self.signal_nums[:self.num_signal_samples]
        print(len(self.signal_nums))
        self.prefix = "common_voice_en_"
        self.suffix = ".mp3"
        
        self.on_epoch_end()

        
        
    def on_epoch_end(self):
        self.ids = np.arange(len(self.signal_nums)*len(self.noise_paths))
        
        if self.shuffle:
            np.random.shuffle(self.ids)
            
        
        
    def get_signal_paths(self, clips_path):

        file_nums = []
        for file in tqdm(os.listdir(clips_path)):
            num = file.split("_")[3]
            num = int(num.split(".")[0])
            file_nums.append(num)
        file_nums = np.asarray(file_nums)
        return file_nums
    
    
    
    
    def get_noise_from_sound(self, signal, noise, SNR):

        RMS_s = np.sqrt(np.mean(signal**2))

        RMS_n = np.sqrt(RMS_s**2/pow(10., SNR/10))

        RMS_n_current = np.sqrt(torch.mean(np.square(noise)))
        noise = noise*(RMS_n/RMS_n_current)

        return noise
    
    
    
    
    
    def get_mixed_signal(self, signal, noise, default_sr, sec, SNR):

        snip_audio = np.random.randint(0, 2)
        # if snip_audio:
        #     signal = ta.transforms.Vad(sample_rate=default_sr)(signal)

        sig_length = int(default_sr * sec)

        if len(signal) > sig_length:
            signal = signal[: sig_length]
        elif len(signal) <= sig_length:
            zero_signal = np.zeros((signal.shape))
            while len(signal) < sig_length:
                signal = np.concatenate((signal, zero_signal))
                zero_signal = np.zeros(signal.shape)
            signal = signal[ : sig_length]
            
        noise_len = len(noise)
        signal_len = len(signal)

        if len(noise) > len(signal):
            noise = noise[0 : len(signal)]
        elif len(noise) <= len(signal):

            #noise = torch.cat((noise, torch.zeros((len(signal) - len(noise)))))
            for i in range(int(len(signal)/len(noise))+1):
                noise = np.concatenate((noise, noise))

            noise = noise[:len(signal)]

        noise = self.get_noise_from_sound(signal, noise, SNR)

        signal_noise = signal+noise
        return signal_noise, signal
    
    
    
    
    def construct_signal_path(self, signal_id):

        file_num = self.signal_nums[signal_id]
        if torch.is_tensor(file_num):
            # print("Enter_tensor")
            file_num = file_num.item()
        file_name = self.prefix + str(file_num) + self.suffix
        path = os.path.join(self.signal_dir, file_name)
        if os.path.exists(path):
            return path
        else:
            raise FileExistsError(f"{path}")
            
            
            
            
    def get_ids(self, idx):

        signal_id = idx//len(self.noise_paths)
        noise_id = idx - signal_id*len(self.noise_paths)
#         print(signal_id, noise_id)

        signal_path, noise_path = self.construct_signal_path(signal_id), self.noise_paths[noise_id]

        signal_noise_add, signal = self.develop_data(signal_path, noise_path)

        return signal_noise_add, signal
    
    def develop_data(self, signal_path, noise_path):

        SNR = np.random.randint(0, np.random.randint(0, 50)+1)
#         print(SNR)

        noise, nsr = librosa.load(noise_path, sr=self.default_sr)
        signal, ssr = librosa.load(signal_path, sr=self.default_sr)
        
        signal_noise_add, signal = self.get_mixed_signal(signal, noise, self.default_sr, self.sec, SNR)
        
        if self.perform_stft:
            signal_noise_add = librosa.stft(signal_noise_add, n_fft=self.n_fft, hop_length=self.hop_len, win_length=self.win_length)
        
        
    def __len__(self):
        return len(self.signal_nums)*len(self.noise_paths)/self.batch_size
    
    def get_item(self, idx):
        
        signal_noise_add, signal = self.get_ids(idx)
        
        return signal_noise_add, signal
    
    def __getitem__(self, index):
        indexes = self.ids[index*self.batch_size : (index+1)*self.batch_size]
        signal_noises = []
        signals = []
        for idx in indexes:
            signal_noise, signal = self.get_item(idx)
            signal_noises.append(signal_noise)
            signals.append(signal)
            
        return np.asarray(signal_noises), np.asarray(signals)

In [7]:
noise_dir = "./dataset/UrbanSound8K-Resampled/audio/"
signal_dir = "./dataset/cv-corpus-5.1-2020-06-22-Resampled/en/clips/"
signal_nums_save = "./dataset_loader_files/signal_paths_nums_save.npy"
num_noise_samples=100
num_signal_samples = 1000
noise_save_path = "./dataset_loader_files/noise_paths_resampled_save.npy"
n_fft=400
win_length=n_fft
hop_len=n_fft//4
create_specgram = False
perform_stft = False
default_sr = 16000
sec = 6
augment=True

signal_synthesis_dataset = Signal_Synthesis_Datagen_tf(noise_dir, signal_dir, \
                signal_nums_save=signal_nums_save, num_noise_samples=num_noise_samples, \
                num_signal_samples=num_signal_samples, noise_path_save=noise_save_path, \
                 n_fft=n_fft, win_length=win_length, hop_len=hop_len, create_specgram=create_specgram, \
                 perform_stft=perform_stft, normalize=True, default_sr=default_sr, sec=sec, augment=False)

AttributeError: 'Signal_Synthesis_Datagen_tf' object has no attribute 'signal_nums'

In [8]:
os.path.exists("./dataset_loader_files/signal_paths_nums_save.npy")

True