# Model 3 
### 3. Data augmentation with raw features 

Till now we have done with 2000 samples only. It is very less data. We are giving the process of generating augmented data below.

There are two types of augmentation:
1. time stretching - Time stretching either increases or decreases the length of the file. For time stretching we move the file 30% faster or slower
2. pitch shifting - pitch shifting moves the frequencies higher or lower. For pitch shifting we shift up or down one half-step.


In [1]:
import numpy as np
import pandas as pd
import librosa
import os
from glob import glob
from tqdm import tqdm
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.models import Model
import tensorflow as tf
from sklearn.metrics import f1_score



#### Making the data Frame

In [2]:
paths = './recordings/*'
all_files = []
for path in tqdm(glob(paths)):
    all_files.append(path)

data = []
for path in all_files:
    label = path.split('/')[-1].split('_')[0]
    data.append([path,label])
df_audio = pd.DataFrame(data,columns=['path','label'])
df_audio.head()

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:00<00:00, 2165919.96it/s]


Unnamed: 0,path,label
0,./recordings/1_theo_39.wav,1
1,./recordings/0_jackson_21.wav,0
2,./recordings/9_theo_26.wav,9
3,./recordings/9_jackson_29.wav,9
4,./recordings/8_nicolas_33.wav,8


#### Shuffle the data so that same labels dont get passed in the sequence

In [3]:
from sklearn.utils import shuffle
df_audio = shuffle(df_audio, random_state=33)#don't change the random state

In [4]:
sample_rate = 22050
def load_wav(x, get_duration=True):
    '''This return the array values of audio with sampling rate of 22050 and Duration'''
    #loading the wav file with sampling rate of 22050
    samples, sample_rate = librosa.load(x, sr=22050)
    if get_duration:
        duration = librosa.get_duration(y=samples, sr=sample_rate)
        return [samples, duration]
    else:
        return samples

def generate_augmented_data(file_path):
    ''' This will agument the data on pitch and time a create 9 variation of a single sound'''
    augmented_data = []
    samples = load_wav(file_path,get_duration=False)
    for time_value in [0.7, 1, 1.3]:
        for pitch_value in [-1, 0, 1]:
            time_stretch_data = librosa.effects.time_stretch(samples, rate=time_value)
            final_data = librosa.effects.pitch_shift(time_stretch_data, sr=sample_rate, n_steps=pitch_value)
            augmented_data.append(final_data)
    return augmented_data



In [5]:
temp_path = df_audio.iloc[0].path
aug_temp = generate_augmented_data(temp_path)
len(aug_temp[8])

8180

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(df_audio['path'],df_audio['label'].to_numpy(dtype='int'),
                                                  random_state=45,test_size=0.2,stratify=df_audio['label'])

#### Agument the train data which would result in 1400*9 data points 

In [7]:
agumented_train_data = []
agumented_train_label = [] 

for i,path in tqdm(enumerate(X_train)):
    agumented_train_data.extend(generate_augmented_data(path))
    agumented_train_label.extend([y_train[i] for j in range(9)])
agumented_train_label = np.array(agumented_train_label)

1600it [04:38,  5.74it/s]


In [8]:
test_data = []
for path in tqdm(X_test):
    test_data.append(load_wav(path,get_duration=False))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400/400 [00:04<00:00, 92.09it/s]


### Padding and masking the sequence

In [9]:
max_length = 17640

X_train_pad_seq_m3 = tf.keras.utils.pad_sequences(agumented_train_data, maxlen = max_length, padding = 'post', 
                                                  dtype = 'float32', truncating = 'post')
X_test_pad_seq_m3 = tf.keras.utils.pad_sequences(test_data, maxlen = max_length, padding = 'post', 
                                                 dtype = 'float32', truncating = 'post')

# This step can be Masking layer by tf.keras.layers.Masking

X_train_mask_m3 = X_train_pad_seq_m3 != 0 
X_test_mask_m3 = X_test_pad_seq_m3 != 0

In [10]:
X_train_pad_seq_m3.shape , X_test_pad_seq_m3.shape

((14400, 17640), (400, 17640))

In [11]:
np.save('./X_train_pad_seq_m3.npy',X_train_pad_seq_m3)
np.save('./X_test_pad_seq_m3.npy',X_test_pad_seq_m3)
np.save('./X_train_mask_m3.npy',X_train_mask_m3)
np.save('./X_test_mask_m3.npy',X_test_mask_m3)
np.save('./y_test.npy',y_test)
np.save("./agumented_train_label.npy",agumented_train_label)

In [2]:
max_length = 17640
X_train_pad_seq_m3 = np.load('./X_train_pad_seq_m3.npy')
X_test_pad_seq_m3 = np.load('X_test_pad_seq_m3.npy')
X_train_mask_m3 = np.load('X_train_mask_m3.npy')
X_test_mask_m3 = np.load('X_test_mask_m3.npy')
agumented_train_label = np.load('agumented_train_label.npy')
y_test = np.load('y_test.npy')

In [3]:
class F1ScoreCB(tf.keras.callbacks.Callback):
    
    def __init__(self, train_data, test_data):
        super().__init__()
        self.train_data = train_data
        self.test_data = test_data
        self.history = {}
        self.history['val_f1_score'] = []
  
    def on_epoch_end(self, epochs, logs = {}):
        train_preds = np.argmax(self.model.predict(self.train_data[0]), axis = -1)
        train_f1_score = f1_score(self.train_data[1], train_preds, average='micro')
        train_f1_score = np.round(train_f1_score, 4)

        test_preds = np.argmax(self.model.predict(self.test_data[0]), axis = -1)
        test_f1_score = f1_score(self.test_data[1], test_preds, average='micro')
        test_f1_score = np.round(test_f1_score, 4)
        self.history['val_f1_score'].append(test_f1_score)

        print(f"train_f1_score: {train_f1_score} - val_f1_score: {test_f1_score}")
        print("########### EPOCH ENDED ##########")

In [4]:
input_layer_m3 = Input(shape=(max_length,1,) ,name = "Input_sequence_layer")
input_mask_layer_m3 = Input(shape=(max_length,),dtype='bool', name = 'Input_mask_layer')
lstm_layer_m3 = LSTM(units = 100)(input_layer_m3,mask = input_mask_layer_m3)
flatten_m3 = tf.keras.layers.Flatten(name='Flatten')(lstm_layer_m3)
output_m3 = Dense(10,activation = 'softmax')(flatten_m3)

model_3 = Model([input_layer_m3,input_mask_layer_m3],output_m3)
model_3.summary()

2022-10-28 23:55:19.952758: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-28 23:55:20.668321: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9651 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:82:00.0, compute capability: 7.5


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Input_sequence_layer (InputLay  [(None, 17640, 1)]  0           []                               
 er)                                                                                              
                                                                                                  
 Input_mask_layer (InputLayer)  [(None, 17640)]      0           []                               
                                                                                                  
 lstm (LSTM)                    (None, 100)          40800       ['Input_sequence_layer[0][0]',   
                                                                  'Input_mask_layer[0][0]']       
                                                                                              

In [6]:
model_3.compile(optimizer = tf.keras.optimizers.Adam(0.001), loss = 'sparse_categorical_crossentropy')
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.05, patience=2,verbose=1)
model_3.fit([X_train_pad_seq_m3, X_train_mask_m3], agumented_train_label,
           validation_data = ([X_test_pad_seq_m3, X_test_mask_m3], y_test),
           batch_size = 16,epochs = 10,
           callbacks = [early_stop,
               F1ScoreCB(([X_train_pad_seq_m3, X_train_mask_m3], agumented_train_label), ([X_test_pad_seq_m3, X_test_mask_m3], y_test))]
           )

Epoch 1/10
train_f1_score: 0.0999 - val_f1_score: 0.1
########### EPOCH ENDED ##########
Epoch 2/10
train_f1_score: 0.0938 - val_f1_score: 0.0875
########### EPOCH ENDED ##########
Epoch 3/10
train_f1_score: 0.1 - val_f1_score: 0.1
########### EPOCH ENDED ##########
Epoch 3: early stopping


<keras.callbacks.History at 0x14ed1956fcd0>