In [13]:
import import_ipynb
from sklearn.preprocessing import StandardScaler
import numpy as np
# from train_test_split import *

In [9]:
x_train = torch.load('data_train.pt')
x_test = torch.load('data_test.pt')
y_train = torch.load('label_train.pt')
y_test = torch.load('label_test.pt')

## Scaler

In [15]:
#feature scalling
def StandardScalling(x_train, x_test):
    scaler = StandardScaler()

    N,H,W = x_train.shape # N: number of sample, W: weight, H: height
    # standard scale use for only 1-D array
    x_train = np.reshape(x_train, (N, -1))
    x_train = scaler.fit_transform(x_train)
    x_train = np.reshape(x_train, (N,H,W))

    N,H,W = x_test.shape # N: number of sample, W: weight, H: height
    # standard scale use for only 1-D array
    x_test = np.reshape(x_test, (N, -1))
    x_test = scaler.fit_transform(x_test)
    x_test = np.reshape(x_test, (N,H,W))    

    return x_train, x_test


In [16]:
test_train, test_test = StandardScalling(x_train,x_test)


In [17]:
test_train

array([[[-0.85033411, -0.82655798, -0.87359245, ..., -1.06382492,
         -0.85888235, -0.74856868],
        [-0.85486668, -0.92792723, -1.11349171, ...,  0.88425083,
         -0.12484985, -0.5913921 ],
        [-1.47181326, -1.41053707, -1.29490231, ...,  0.1816313 ,
          1.21383089,  1.88261939],
        ...,
        [-0.55831412, -1.29570119,  0.39607754, ..., -0.60844876,
         -0.87671903, -1.54084755],
        [ 0.81564789, -0.6885231 ,  1.02124348, ..., -0.90104151,
          0.42103862, -0.40299395],
        [-1.81489537, -1.82292977, -0.84070192, ..., -0.36691825,
          0.20829942,  0.89589393]],

       [[-0.705178  , -0.76771564, -0.61581883, ...,  1.0302122 ,
          0.70114849,  0.48647458],
        [ 0.32279839,  0.25930134,  0.4759695 , ...,  0.31767062,
          0.43300528,  0.24294388],
        [ 0.93824599,  1.09958221,  1.03788338, ..., -0.32349481,
         -0.18656375, -0.14057401],
        ...,
        [ 1.6474035 ,  1.22922889, -0.03888185, ..., -

In [18]:
x_train

tensor([[[-5.3326e+02, -5.1674e+02, -5.2158e+02,  ..., -5.0547e+02,
          -4.8210e+02, -4.6951e+02],
         [ 4.1372e+01,  5.1749e+01,  4.6744e+01,  ...,  1.1066e+02,
           7.3169e+01,  5.6484e+01],
         [-6.0397e+01, -6.5991e+01, -6.3002e+01,  ..., -3.5748e+01,
          -7.0745e-01,  2.2494e+01],
         ...,
         [-1.1323e+01, -1.4405e+01, -4.6878e+00,  ..., -1.2625e+01,
          -1.4264e+01, -1.8545e+01],
         [-2.9265e+00, -1.4128e+01, -4.2026e+00,  ..., -1.6311e+01,
          -7.6199e+00, -1.2979e+01],
         [-2.1119e+01, -1.8919e+01, -1.3448e+01,  ..., -1.1872e+01,
          -8.3873e+00, -4.2365e+00]],

        [[-5.1961e+02, -5.1167e+02, -4.9927e+02,  ..., -2.6634e+02,
          -3.0366e+02, -3.2777e+02],
         [ 7.5837e+01,  8.3576e+01,  8.8969e+01,  ...,  8.9499e+01,
           9.3866e+01,  8.6867e+01],
         [-3.1295e+00, -1.9822e+00, -3.4101e+00,  ..., -5.2977e+01,
          -4.8782e+01, -4.7323e+01],
         ...,
         [ 5.5176e+00,  8

## Augmentation

### AWGN - Additive White Gaussian Noise

In [1]:
def awgn_augmentation(waveform, multiples=2, bits=16, snr_min=15, snr_max=30): 
    
    # get length of waveform (should be 3*48k = 144k)
    wave_len = len(waveform)
    
    # Generate normally distributed (Gaussian) noises
    # one for each waveform and multiple (i.e. wave_len*multiples noises)
    noise = np.random.normal(size=(multiples, wave_len))
    
    # Normalize waveform and noise
    norm_constant = 2.0**(bits-1)
    norm_wave = waveform / norm_constant
    norm_noise = noise / norm_constant
    
    # Compute power of waveform and power of noise
    signal_power = np.sum(norm_wave ** 2) / wave_len
    noise_power = np.sum(norm_noise ** 2, axis=1) / wave_len
    
    # Choose random SNR in decibels in range [15,30]
    snr = np.random.randint(snr_min, snr_max)
    
    # Apply whitening transformation: make the Gaussian noise into Gaussian white noise
    # Compute the covariance matrix used to whiten each noise 
    # actual SNR = signal/noise (power)
    # actual noise power = 10**(-snr/10)
    covariance = np.sqrt((signal_power / noise_power) * 10 ** (- snr / 10))
    # Get covariance matrix with dim: (144000, 2) so we can transform 2 noises: dim (2, 144000)
    covariance = np.ones((wave_len, multiples)) * covariance

    # Since covariance and noise are arrays, * is the haddamard product 
    # Take Haddamard product of covariance and noise to generate white noise
    multiple_augmented_waveforms = waveform + covariance.T * noise
    
    return multiple_augmented_waveforms

In [2]:
def augment_waveforms(waveforms, features, emotions, multiples):
    # keep track of how many waveforms we've processed so we can add correct emotion label in the same order
    emotion_count = 0
    # keep track of how many augmented samples we've added
    added_count = 0
    # convert emotion array to list for more efficient appending
    emotions = emotions.tolist()

    for waveform in waveforms:

        # Generate 2 augmented multiples of the dataset, i.e. 1440 native + 1440*2 noisy = 4320 samples total
        augmented_waveforms = awgn_augmentation(waveform, multiples=multiples)

        # compute spectrogram for each of 2 augmented waveforms
        for augmented_waveform in augmented_waveforms:

            # Compute MFCCs over augmented waveforms
            augmented_mfcc = feature_mfcc(augmented_waveform, sample_rate=sample_rate)

            # append the augmented spectrogram to the rest of the native data
            features.append(augmented_mfcc)
            emotions.append(emotions[emotion_count])

            # keep track of new augmented samples
            added_count += 1

            # check progress
            print('\r'+f'Processed {emotion_count + 1}/{len(waveforms)} waveforms for {added_count}/{len(waveforms)*multiples} new augmented samples',end='')

        # keep track of the emotion labels to append in order
        emotion_count += 1
        
        # store augmented waveforms to check their shape
        augmented_waveforms_temp.append(augmented_waveforms)
    
    return features, emotions