In [4]:
import os, glob
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import librosa

seed = 42 # Set the seed value for experiment reproducibility.
tf.random.set_seed(seed)
np.random.seed(seed)

DATASET_PATH = '.\\datasets\\UrbanSounds8K\\resampled_wav_16000'
DATASET_PROCESSED_PATH = '.\\datasets\\UrbanSounds8K\\resampled_wav_16000_processed'
folds = FOLD_PATHS = np.array(['fold1','fold2','fold3','fold4','fold5',
                               'fold6','fold7','fold8','fold9','fold10'])

In [5]:
def get_audio_segments_and_labels(filenames_list, window_size):

    '''
    Get audio segments of same window_size from each file in filenames_list
    Each window has 0.1 overlap and rolls back to start of file if as padding
    Labels are extracted from filenames of Urbansounds8K Dataset
    
    Inputs: <list> filenames_list 
            <int>  window_size
    Outputs: <2D np.array> waves_all ; shape=(n_segments,window_size)
             <list>        labels 
    
    '''

    ### Private function, generate windows with 0.1 overlap
    def _windows(data, window_size):
        start = 0
        while start < len(data):
            yield int(start), int(start + window_size)
            start += int(window_size * 0.5)

    def _get_label(filename):
        label = filename.split('\\')[-1].split('-')[1]
        return label

    waves_all,labels_all, lengths_all=[],[],[]
    for file in filenames_list:
        label=_get_label(file)
        waveform, sr = librosa.load(file, mono=1)
        length = len(waveform)
        lengths_all.append(length)
        # print(np.shape(waveform))

        # waves, labels=[],[]
        for (start, stop) in _windows(waveform, window_size):
            wave = waveform[start:stop]    
            if wave.shape[0] < window_size:
            # while wave.shape[0] < window_size:
                # wave = np.concatenate((wave, waveform[:window_size-wave.shape[0]]))
                wave = np.concatenate((wave, np.zeros(window_size-wave.shape[0])))
            waves_all.append(wave)
            labels_all.append(label)
        
        # waves_all.append(waves)
        # labels_all.append(labels)

    return waves_all, labels_all

# # Test code
WINDOW_SIZE = 16000
filenames = glob.glob(os.path.join(DATASET_PATH, FOLD_PATHS[0], "*.wav"))
wavs, labels = get_audio_segments_and_labels(filenames, WINDOW_SIZE)

In [None]:
# len(wavs)
import tensorflow_io as tfio
def get_spectrogram(waveform):
  waveform = tf.cast(waveform, dtype=tf.float32)

  # Convert to spectrogram
  spectrogram = tfio.audio.spectrogram(waveform, nfft=512, 
      # window=512, stride=256)
      window=1024, stride=512)
#   spectrogram = tf.abs(spectrogram)

  spectrogram = tfio.audio.melscale(
      spectrogram, rate=16000, mels=64, fmin=0, fmax=6000)
      # spectrogram, rate=16000, mels=128, fmin=0, fmax=8000)

  spectrogram = tfio.audio.dbscale(
      spectrogram, top_db=80)
      
  return spectrogram

for wav in wavs[9000:]:
    plt.plot(wav)
    plt.show()

In [27]:
def preprocesses_and_get_labels(file_list, window_size):
    data_list, labels = get_audio_segments_and_labels(file_list, window_size)
    # data_list_flat = [item for sublist in data_list for item in sublist]
    # labels_flat = [item for sublist in labels for item in sublist]
    # return data_list_flat, labels_flat
    return data_list, labels

# test_list, test_labels = preprocesses_and_get_labels(filenames)

In [28]:
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift
import numpy as np

augment = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
    TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
    PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
    # Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
    # AddBackgroundNoise
    # AddShortNoises
])

# Augment/transform/perturb the audio data
def augment_list(wave_array):
    augmented_list = []
    for wave in wave_array:
        augmented_list.append(augment(samples=wave, sample_rate=16000))
    return augmented_list
    
# augmented_test_list = augment_list(np.copy(test_list))

In [39]:
def save_dataset(fold_path, dataset_path, datasets_path, window_size):
  filenames = glob.glob(os.path.join(dataset_path, fold_path, "*.wav"))
  # print(np.shape(filenames))

  fold_list, fold_labels = preprocesses_and_get_labels(filenames, window_size)
  ds = tf.data.Dataset.from_tensor_slices((fold_list, fold_labels))
  ds = ds.map(
    lambda data, labels: (tf.cast(data, dtype='float32'), int(labels)),
  )
#   augmented_list = augment_list(np.copy(fold_list))
#   augmented_ds = tf.data.Dataset.from_tensor_slices((augmented_list, fold_labels))
#   combined_ds = ds.concatenate(augmented_ds)
  combined_ds = ds

  def dataset_save(dataset, dataset_path):
      tf.data.experimental.save(
          dataset, dataset_path, compression=None, shard_func=None, checkpoint_args=None
      )

  dataset_save(combined_ds, datasets_path + '/' + fold_path)

In [40]:
WINDOW_SIZE = 16000
# Generate the dataset
os.system("mkdir "+ DATASET_PROCESSED_PATH)
for FOLD_PATH in FOLD_PATHS:
    save_dataset(FOLD_PATH, DATASET_PATH, DATASET_PROCESSED_PATH, WINDOW_SIZE)