In [132]:
import numpy as np
import pandas as pd
import os
import glob
from scipy.io import wavfile
import torch
import torchaudio
from torchaudio import transforms
from IPython.display import Audio
import librosa
import librosa.display
## For plotting
import matplotlib.pyplot as plt
from seaborn import set_style
from scipy.io import wavfile
import math, random
## This sets the plot style
## to have a grid on a white background
set_style("darkgrid")

In [133]:
###functions

#time_shift
# ----------------------------
  # Shifts the signal to the left or right by some percent. Values at the end
  # are 'wrapped around' to the start of the transformed signal.
  # ----------------------------
    
## I still have doubt about whether this is making the dataset to completely nonsense.
@staticmethod
def time_shift(aud, shift_limit):
    sig,sr = aud
    sig_len = sig.shape[0]
    shift_amt = int(random.random() * shift_limit * sig_len)
    return (np.roll(sig,shift_amt), sr)


## add random noise
## add random noise
def random_noise(aud):
    sig,sr = aud
    noise_factor = max(original_audio)/30
    white_noise = np.random.randn(len(original_audio)) * noise_factor
    white_noise = white_noise.astype(np.float32)
    sig_noise = sig + white_noise
    return sig_noise,sr

##time stretch
def time_stretch(aud,strech):
    sig,sr = aud
    sig_len = sig.shape[0]
    stre_perc = strech*(2*np.random.rand()-1) # goes from -1 to 1
    
    strectch_t = librosa.core.resample(
        sig.astype(np.float32), 
        orig_sr=sr,
        target_sr=sr*(1+stre_perc))
    
    return (strectch_t,sr*(1+stre_perc))

# ----------------------------
  # Augment the Spectrogram by masking out some sections of it in both the frequency
  # dimension (ie. horizontal bars) and the time dimension (vertical bars) to prevent
  # overfitting and to help the model generalise better. The masked sections are
  # replaced with the mean value.
  # ----------------------------
    
@staticmethod
def spectro_augment(spec, max_mask_pct=0.08, n_freq_masks=2, n_time_masks=2):
    spec = torch.from_numpy(spec)
    n_mels, n_steps = spec.shape
    mask_value = spec.mean()
    aug_spec = spec

    freq_mask_param = max_mask_pct * n_mels
    for _ in range(n_freq_masks):
        aug_spec = transforms.FrequencyMasking(freq_mask_param)(aug_spec, mask_value)

    time_mask_param = max_mask_pct * n_steps
    for _ in range(n_time_masks):
        aug_spec = transforms.TimeMasking(time_mask_param)(aug_spec, mask_value)
    
    
    return aug_spec.numpy()


In [134]:
## will be used to process augmented audio files
def pad_trunc(aud, max_t): ## this function is copied and modified from the link: https://towardsdatascience.com/audio-deep-learning-made-simple-sound-classification-step-by-step-cebc936bbe5
    sig, sr = aud
    num_rows,sig_len = sig.shape
    max_len = max_t ## the maximum length of each spectrogram result

    if (sig_len > max_len):
      # Truncate the signal to the given length
        tru_begin = (sig_len - max_len)//2
        tru_end = max_len + tru_begin
        sig = sig[:,tru_begin:tru_end]

    elif (sig_len < max_len):
      # Length of padding to add at the beginning and end of the signal
        pad_begin_len = (max_len - sig_len)//2
        pad_end_len = max_len - sig_len - pad_begin_len

      # Pad with 0s
        pad_begin = torch.zeros((num_rows, pad_begin_len))
        pad_end = torch.zeros((num_rows, pad_end_len))

        sig = torch.cat((pad_begin, sig, pad_end), 1)
      
    return (sig, sr)

def spectro_gram(aud, n_mels=128, n_fft=600, hop_len=None):
    sig,sr = aud
    top_db = 80

    # spec has shape [channel, n_mels, time], where channel is mono, stereo etc
    spec = transforms.MelSpectrogram(sr, n_fft=n_fft, hop_length=hop_len, n_mels=n_mels)(sig)

    # Convert to decibels
    spec = transforms.AmplitudeToDB(top_db=top_db)(spec)
    return (spec)

In [135]:
##let's add one time shift; one time stretch; one random noise; one spectro_augment; (separatly)
## Here I will produce 3 datasets for each method and save it for later use.

In [136]:
##load spectrogram data
X = np.load('../../data/processed_data/specgram_db_pad_trunc.npy')
print(X.shape)
df = pd.read_csv("../../data/processed_data/metadata.csv")
category_to_number = {'brushing': 0, 'food': 1, 'isolation': 2}

# Create a new column with numerical values based on the mapping
df['numerical_situation'] = df['situation'].map(category_to_number)

y = df['numerical_situation'].values

(440, 128, 67)


In [137]:
ftmask1 = []
ftmask1_flat = []
ftmask2 = []
ftmask2_flat = []
ftmask3 = []
ftmask3_flat = []
for spec in X:
    aug1 = spectro_augment(spec, max_mask_pct=0.08, n_freq_masks=2, n_time_masks=2)
    aug2 = spectro_augment(spec, max_mask_pct=0.08, n_freq_masks=2, n_time_masks=2)
    aug3 = spectro_augment(spec, max_mask_pct=0.08, n_freq_masks=2, n_time_masks=2)
    ftmask1.append(aug1)
    ftmask1_flat.append(aug1.flatten())
    ftmask2.append(aug2)
    ftmask2_flat.append(aug2.flatten())
    ftmask3.append(aug3)
    ftmask3_flat.append(aug2.flatten())

In [138]:
##load audio data
data_dir = '../../data/raw_data/zenodo.4008297/'
data_fps = glob.glob(os.path.join(data_dir, '*.wav'))

In [139]:
tshift1 = []
tshift2 = []
tshift3 = []
randnoi1 = []
randnoi2 = []
randnoi3 = []
tstret1 = []
tstret2 = []
tstret3 = []

for data_fp in data_fps:
    original_audio,sr = librosa.load(data_fp, sr=None) 
    tshift1.append(time_shift((original_audio,sr), 0.2))
    tshift2.append(time_shift((original_audio,sr), 0.2))
    tshift3.append(time_shift((original_audio,sr), 0.2))
    randnoi1.append(random_noise((original_audio,sr)))
    randnoi2.append(random_noise((original_audio,sr)))
    randnoi3.append(random_noise((original_audio,sr)))
    tstret1.append(time_stretch(((original_audio,sr)),0.2))
    tstret2.append(time_stretch(((original_audio,sr)),0.2))
    tstret3.append(time_stretch(((original_audio,sr)),0.2))
    

In [140]:
spec_pt_tsh1 = []
spec_pt_flat_tsh1 = [] ##flattened version
for aud in tshift1:
    sig,sr = aud
    #print((torch.tensor(sig).view(1, -1),sr))
    audnew = pad_trunc((torch.tensor(sig).view(1, -1),sr),20000) ##pad or truncate it ###Has to transfer a list to a torch tensor of the shape: (1,len(list))
    spec = spectro_gram(audnew) ##transform the audo to a spec
    spec_pt_tsh1.append(np.array(spec)[0,:,:]) ##save it. Note:transfer tensor to array will lead on one redundant dimension, here we get rid of it
    spec_pt_flat_tsh1.append(np.array(spec)[0,:,:].flatten())

In [141]:
spec_pt_tsh2 = []
spec_pt_flat_tsh2 = [] ##flattened version
for aud in tshift2:
    sig,sr = aud
    #print((torch.tensor(sig).view(1, -1),sr))
    audnew = pad_trunc((torch.tensor(sig).view(1, -1),sr),20000) ##pad or truncate it ###Has to transfer a list to a torch tensor of the shape: (1,len(list))
    spec = spectro_gram(audnew) ##transform the audo to a spec
    spec_pt_tsh2.append(np.array(spec)[0,:,:]) ##save it. Note:transfer tensor to array will lead on one redundant dimension, here we get rid of it
    spec_pt_flat_tsh2.append(np.array(spec)[0,:,:].flatten())

In [142]:
spec_pt_tsh3 = []
spec_pt_flat_tsh3 = [] ##flattened version
for aud in tshift3:
    sig,sr = aud
    #print((torch.tensor(sig).view(1, -1),sr))
    audnew = pad_trunc((torch.tensor(sig).view(1, -1),sr),20000) ##pad or truncate it ###Has to transfer a list to a torch tensor of the shape: (1,len(list))
    spec = spectro_gram(audnew) ##transform the audo to a spec
    spec_pt_tsh3.append(np.array(spec)[0,:,:]) ##save it. Note:transfer tensor to array will lead on one redundant dimension, here we get rid of it
    spec_pt_flat_tsh3.append(np.array(spec)[0,:,:].flatten())

In [143]:
spec_pt_rd1 = []
spec_pt_flat_rd1 = [] ##flattened version
for aud in randnoi1:
    sig,sr = aud
    audnew = pad_trunc((torch.tensor(sig).view(1, -1),sr),20000) ##pad or truncate it ###Has to transfer a list to a torch tensor of the shape: (1,len(list))
    spec = spectro_gram(audnew) ##transform the audo to a spec
    spec_pt_rd1.append(np.array(spec)[0,:,:]) ##save it. Note:transfer tensor to array will lead on one redundant dimension, here we get rid of it
    spec_pt_flat_rd1.append(np.array(spec)[0,:,:].flatten())

In [144]:
spec_pt_rd2 = []
spec_pt_flat_rd2 = [] ##flattened version
for aud in randnoi2:
    sig,sr = aud
    audnew = pad_trunc((torch.tensor(sig).view(1, -1),sr),20000) ##pad or truncate it ###Has to transfer a list to a torch tensor of the shape: (1,len(list))
    spec = spectro_gram(audnew) ##transform the audo to a spec
    spec_pt_rd2.append(np.array(spec)[0,:,:]) ##save it. Note:transfer tensor to array will lead on one redundant dimension, here we get rid of it
    spec_pt_flat_rd2.append(np.array(spec)[0,:,:].flatten())

In [145]:
spec_pt_rd3 = []
spec_pt_flat_rd3 = [] ##flattened version
for aud in randnoi3:
    sig,sr = aud
    audnew = pad_trunc((torch.tensor(sig).view(1, -1),sr),20000) ##pad or truncate it ###Has to transfer a list to a torch tensor of the shape: (1,len(list))
    spec = spectro_gram(audnew) ##transform the audo to a spec
    spec_pt_rd3.append(np.array(spec)[0,:,:]) ##save it. Note:transfer tensor to array will lead on one redundant dimension, here we get rid of it
    spec_pt_flat_rd3.append(np.array(spec)[0,:,:].flatten())

In [146]:
spec_pt_tst1 = []
spec_pt_flat_tst1 = [] ##flattened version
for aud in tstret1:
    sig,sr = aud
    #print((torch.tensor(sig).view(1, -1),sr))
    audnew = pad_trunc((torch.tensor(sig).view(1, -1),sr),20000) ##pad or truncate it ###Has to transfer a list to a torch tensor of the shape: (1,len(list))
    spec = spectro_gram(audnew) ##transform the audo to a spec
    spec_pt_tst1.append(np.array(spec)[0,:,:]) ##save it. Note:transfer tensor to array will lead on one redundant dimension, here we get rid of it
    spec_pt_flat_tst1.append(np.array(spec)[0,:,:].flatten())

In [147]:
spec_pt_tst2 = []
spec_pt_flat_tst2 = [] ##flattened version
for aud in tstret2:
    sig,sr = aud
    #print((torch.tensor(sig).view(1, -1),sr))
    audnew = pad_trunc((torch.tensor(sig).view(1, -1),sr),20000) ##pad or truncate it ###Has to transfer a list to a torch tensor of the shape: (1,len(list))
    spec = spectro_gram(audnew) ##transform the audo to a spec
    spec_pt_tst2.append(np.array(spec)[0,:,:]) ##save it. Note:transfer tensor to array will lead on one redundant dimension, here we get rid of it
    spec_pt_flat_tst2.append(np.array(spec)[0,:,:].flatten())

In [148]:
spec_pt_tst3 = []
spec_pt_flat_tst3 = [] ##flattened version
for aud in tstret3:
    sig,sr = aud
    #print((torch.tensor(sig).view(1, -1),sr))
    audnew = pad_trunc((torch.tensor(sig).view(1, -1),sr),20000) ##pad or truncate it ###Has to transfer a list to a torch tensor of the shape: (1,len(list))
    spec = spectro_gram(audnew) ##transform the audo to a spec
    spec_pt_tst3.append(np.array(spec)[0,:,:]) ##save it. Note:transfer tensor to array will lead on one redundant dimension, here we get rid of it
    spec_pt_flat_tst3.append(np.array(spec)[0,:,:].flatten())

In [149]:
np.save('../../data/processed_data/spec_pt_timeshift1.npy',spec_pt_tsh1)
np.save('../../data/processed_data/spec_pt_timeshift1_flatten.npy',spec_pt_flat_tsh1)
np.save('../../data/processed_data/spec_pt_timeshift2.npy',spec_pt_tsh2)
np.save('../../data/processed_data/spec_pt_timeshift2_flatten.npy',spec_pt_flat_tsh2)
np.save('../../data/processed_data/spec_pt_timeshift3.npy',spec_pt_tsh3)
np.save('../../data/processed_data/spec_pt_timeshift3_flatten.npy',spec_pt_flat_tsh3)

In [150]:
np.save('../../data/processed_data/spec_pt_randnoise1.npy',spec_pt_rd1)
np.save('../../data/processed_data/spec_pt_randnoise1_flatten.npy',spec_pt_flat_rd1)
np.save('../../data/processed_data/spec_pt_randnoise2.npy',spec_pt_rd2)
np.save('../../data/processed_data/spec_pt_randnoise2_flatten.npy',spec_pt_flat_rd2)
np.save('../../data/processed_data/spec_pt_randnoise3.npy',spec_pt_rd3)
np.save('../../data/processed_data/spec_pt_randnoise3_flatten.npy',spec_pt_flat_rd3)

In [151]:
np.save('../../data/processed_data/spec_pt_timestretch1.npy',spec_pt_tst1)
np.save('../../data/processed_data/spec_pt_timestretch1_flatten.npy',spec_pt_flat_tst1)
np.save('../../data/processed_data/spec_pt_timestretch2.npy',spec_pt_tst2)
np.save('../../data/processed_data/spec_pt_timestretch2_flatten.npy',spec_pt_flat_tst2)
np.save('../../data/processed_data/spec_pt_timestretch3.npy',spec_pt_tst3)
np.save('../../data/processed_data/spec_pt_timestretch3_flatten.npy',spec_pt_flat_tst3)

In [152]:
np.save('../../data/processed_data/spec_pt_ftmask1.npy',ftmask1)
np.save('../../data/processed_data/spec_pt_ftmask1_flatten.npy',ftmask1_flat)
np.save('../../data/processed_data/spec_pt_ftmask2.npy',ftmask2)
np.save('../../data/processed_data/spec_pt_ftmask2_flatten.npy',ftmask2_flat)
np.save('../../data/processed_data/spec_pt_ftmask3.npy',ftmask3)
np.save('../../data/processed_data/spec_pt_ftmask3_flatten.npy',ftmask3_flat)