In [None]:
# Installing audiomentations library
!pip install audiomentations

Collecting audiomentations
  Downloading audiomentations-0.23.0-py3-none-any.whl (65 kB)
[?25l[K     |█████                           | 10 kB 18.4 MB/s eta 0:00:01[K     |██████████                      | 20 kB 20.6 MB/s eta 0:00:01[K     |███████████████                 | 30 kB 23.0 MB/s eta 0:00:01[K     |████████████████████            | 40 kB 24.6 MB/s eta 0:00:01[K     |█████████████████████████       | 51 kB 7.9 MB/s eta 0:00:01[K     |██████████████████████████████  | 61 kB 9.1 MB/s eta 0:00:01[K     |████████████████████████████████| 65 kB 3.1 MB/s 
Installing collected packages: audiomentations
Successfully installed audiomentations-0.23.0


In [None]:
import os
import time
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

from keras.applications.vgg16 import VGG16, preprocess_input
from keras.applications.resnet import preprocess_input, ResNet50, ResNet101
from keras.applications.densenet import preprocess_input, DenseNet121
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from keras.models import Model, Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Input, Dropout, GlobalAveragePooling2D, Lambda, BatchNormalization, concatenate
from tensorflow.keras.callbacks.experimental import BackupAndRestore
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from keras.models import load_model
from sklearn.decomposition import PCA
import pickle as pkl
from audiomentations import Compose, AddGaussianSNR, AddGaussianNoise, TimeStretch, PitchShift
from sklearn.preprocessing import MultiLabelBinarizer

Data Loading, Splitting and Saving

In [None]:
# Loading CSV file of audio paths and labels
statFeatures = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Final_OverlapData_StatFeatures_Nocall_Labels.csv")

# Filtering out nocall labels audios
statData = statFeatures.drop(statFeatures.index[statFeatures['VGGCustStat'] == 'Nocall'])

# Taking only filtered audio path and classlabels 
statData1 = statData.loc[:, 'AudioPath']
Stat_labels = statData.loc[:, 'VGGCustStat']

statData1 = statData1[:325952]
Stat_labels = Stat_labels[:325952]

print("Statistical Data Shape after Nocall Removal = {}".format(statData1.shape))

Statistical Data Shape after Nocall Removal = (325952,)


In [None]:
# Combining audio path and classlabel columns and converted into numpy array
data = pd.concat([statData1, Stat_labels], axis=1)
data = np.array(data)
print(data.shape)

(325952, 2)


Feature Extraction for Noise Augmentated Data

In [None]:
# Function for adding gaussian noise and timestretch using audiomentations library 
def augmentation():
  transforms = Compose(
      [
          AddGaussianSNR(max_SNR=0.05, p=0.5),
          AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
          TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
      ]
  )
  return transforms

In [None]:
# Initializing augmentation function
augment = augmentation()

In [None]:
# Custom batch generator which takes 64 chunks audios at a time and generates noise augmented mel-spectrograms and converts it into RGB channel and 
# returns it with class label
def customBatchGenerator(data, augment):
  for i in range(0, len(data)//64):     
    batchX = np.zeros((64, 128, 431, 3), dtype='uint8')
    batchY = np.zeros((64, 1), dtype='U32')
    curchunk = 0

    # Processing 64 chunk audios at a time
    for j in range(i*len(batchX), (i+1)*len(batchX)):
      # Laoding chunk audio
      x1, sr1 = librosa.load(data[j][0], sr=44100)

      # Adding noise to loaded chunk audio
      augmentedX1 = augment(samples=x1, sample_rate=44100)

      # Generating Mel-spectrogram
      mel = librosa.feature.melspectrogram(x1, 44100, n_mels=128, n_fft=1024, hop_length=512, fmin=20, fmax=16000)
      mel_scale = librosa.power_to_db(mel)

      # Converting mel-spectrograms into RGB channel
      rgbData0 = np.stack([mel_scale, mel_scale, mel_scale], axis=-1)
      mean = rgbData0.mean()
      std = rgbData0.std()
      Xstd = (rgbData0 - mean) / (std + 1e-6)
      _min, _max = Xstd.min(), Xstd.max()
      norm_max = _max
      norm_min = _min
      if (_max - _min) > 1e-6:
          # Scale to [0, 255]
          V = Xstd
          V[V < norm_min] = norm_min
          V[V > norm_max] = norm_max
          V = 255 * (V - norm_min) / (norm_max - norm_min)
          batchX[curchunk] = V
      else:
          # Just zero
          V = np.zeros_like(Xstd, dtype=np.uint8)
          batchX[curchunk] = V
      batchY[curchunk] = data[j][1]
      curchunk = curchunk + 1
    yield batchX, batchY

In [None]:
# Calling custom generator to process 64 chunk audios batch at a time
trainBatchGenerator = customBatchGenerator(data, augment)

# Enumerating generator and saving noise augmented mel-spectrograms and class labels into numpy files 
i = 0
for gendata in trainBatchGenerator:
  np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data Augmentation/Augmented Data/aug_vis_feat_"+str(i)+".npy", gendata[0])
  np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data Augmentation/Labels/aug_vis_labels_"+str(i)+".npy", gendata[1])
  i += 1

Deep Feature Extraction for Augmented Data


In [None]:
# Custom generator to load files one by one
def customBatchLoaderGenerator(featureFiles, labelFiles, batchSize):    
  for i in range(batchSize):
    batchX = np.load(featureFiles[i])
    batchY = np.load(labelFiles[i])

    yield batchX, batchY

In [None]:
# Retrieving file paths of augmented visual features and labels
featureFilePaths = os.listdir("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data Augmentation/Augmented Data/")
labelFilePaths = os.listdir("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data Augmentation/Labels/")

for i in range(len(featureFilePaths)):
  featureFilePaths[i] = "/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data Augmentation/Augmented Data/aug_vis_feat_" + str(i) + ".npy"
  labelFilePaths[i] = "/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data Augmentation/Labels/aug_vis_labels_" + str(i) + ".npy"

print("Total Files = {}".format(len(featureFilePaths)))

Total Files = 5093


ResNet50 (128, 431, 3)

In [None]:
# ResNet50 Model with pretrained ImageNet weights to extract deep features

# Input Layer
input_layer = Input(shape = (128, 431, 3))
# Preprocess Layer
preprocess = Lambda(lambda x: preprocess_input(x), name='preprocess')(input_layer)
layer_1 = ResNet50(include_top=False, weights="imagenet", input_shape=(128, 431, 3))(preprocess)
# Global Average Pooling Layer
average = GlobalAveragePooling2D()(layer_1)

resnetModel = Model(inputs=input_layer, outputs=average)
resnetModel.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 431, 3)]     0         
                                                                 
 preprocess (Lambda)         (None, 128, 431, 3)       0         
                                                                 
 resnet50 (Functional)       (None, 4, 14, 2048)       23587712  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
Total params: 23,587,712
Trainable params: 23,534,592
Non-trainable params: 53,120
_________________________________

In [None]:
length = len(featureFilePaths)    # Variable to hold total number of files

# Variables to store deep features and labels
features = np.zeros((length*64, 2048))
labels = np.zeros((length*64, 1), dtype='U32')

# Calling custom generator to load files one by one
trainBatchGenerator = customBatchLoaderGenerator(featureFilePaths, labelFilePaths, length)

# Enumerating generator and extracting deep features of various files one by one 
for i, gendata in enumerate(trainBatchGenerator):
  if i==0:
    features = resnetModel.predict(gendata[0])
    labels = gendata[1]
  else:
    features = np.vstack([features, resnetModel.predict(gendata[0])])
    labels = np.vstack([labels, gendata[1]])

In [None]:
# Saving extracted deep features and labels into numpy files
np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data Augmentation/Deep Features_Augment/ResNet50_augmentFeatures_128.npy", features)
np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data Augmentation/Deep Features_Augment/ResNet50_augmentLabels_128.npy", labels)

ResNet101 (128, 431, 3)

In [None]:
# ResNet101 Model with pretrained ImageNet weights to extract deep features

# Input Layer
input_layer = Input(shape = (128, 431, 3))
# Preprocess Layer
preprocess = Lambda(lambda x: preprocess_input(x), name='preprocess')(input_layer)
layer_1 = ResNet101(include_top=False, weights="imagenet", input_shape=(128, 431, 3))(preprocess)
# Global Average Pooling Layer
average = GlobalAveragePooling2D()(layer_1)

resnetModel = Model(inputs=input_layer, outputs=average)
resnetModel.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet101_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 431, 3)]     0         
                                                                 
 preprocess (Lambda)         (None, 128, 431, 3)       0         
                                                                 
 resnet101 (Functional)      (None, 4, 14, 2048)       42658176  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
Total params: 42,658,176
Trainable params: 42,552,832
Non-trainable params: 105,344
_______________________________

In [None]:
length = len(featureFilePaths)    # Variable to hold total number of files

# Variables to store deep features and labels
features = np.zeros((length*64, 2048))
labels = np.zeros((length*64, 1), dtype='U32')

# Calling custom generator to load files one by one
trainBatchGenerator = customBatchLoaderGenerator(featureFilePaths, labelFilePaths, length)

# Enumerating generator and extracting deep features of various files one by one 
for i, gendata in enumerate(trainBatchGenerator):
  if i==0:
    features = resnetModel.predict(gendata[0])
    labels = gendata[1]
  else:
    features = np.vstack([features, resnetModel.predict(gendata[0])])
    labels = np.vstack([labels, gendata[1]])

In [None]:
# Saving extracted deep features into numpy file
np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data Augmentation/Deep Features_Augment/ResNet101_augmentFeatures_128.npy", features)

VGG16 (128, 431, 3)

In [None]:
# VGG16 Model with pretrained ImageNet weights to extract deep features

# Input Layer
input_layer = Input(shape = (128, 431, 3))
# Preprocess Layer
preprocess = Lambda(lambda x: preprocess_input(x), name='preprocess')(input_layer)
layer_1 = VGG16(include_top=False, weights="imagenet", input_shape=(128, 431, 3))(preprocess)
# Global Average Pooling Layer
average = GlobalAveragePooling2D()(layer_1)

vggModel = Model(inputs=input_layer, outputs=average)
vggModel.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 431, 3)]     0         
                                                                 
 preprocess (Lambda)         (None, 128, 431, 3)       0         
                                                                 
 vgg16 (Functional)          (None, 4, 13, 512)        14714688  
                                                                 
 global_average_pooling2d (G  (None, 512)              0         
 lobalAveragePooling2D)                                          
                                                                 
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
__________________________________________

In [None]:
length = len(featureFilePaths)    # Variable to hold total number of files

# Variables to store deep features and labels
features = np.zeros((length*64, 512))
labels = np.zeros((length*64, 1), dtype='U32')

# Calling custom generator to load files one by one
trainBatchGenerator = customBatchLoaderGenerator(featureFilePaths, labelFilePaths, length)

# Enumerating generator and extracting deep features of various files one by one
for i, gendata in enumerate(trainBatchGenerator):
  if i==0:
    features = vggModel.predict(gendata)
    labels = gendata[1]
  else:
    features = np.vstack([features, vggModel.predict(gendata)])
    labels = np.vstack([labels, gendata[1]])

In [None]:
# Saving extracted deep features into numpy file
np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data Augmentation/Deep Features_Augment/VGG16_augmentFeatures_128.npy", features)

DenseNet121 (128, 431, 3)

In [None]:
# DenseNet121 Model with pretrained ImageNet weights to extract deep features

# Input Layer
input_layer = Input(shape = (128, 431, 3))
# Preprocess Layer
preprocess = Lambda(lambda x: preprocess_input(x), name='preprocess')(input_layer)
layer_1 = DenseNet121(include_top=False, weights="imagenet", input_shape=(128, 431, 3))(preprocess)
# Global Average Pooling Layer
average = GlobalAveragePooling2D()(layer_1)

densenetModel = Model(inputs=input_layer, outputs=average)
densenetModel.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 431, 3)]     0         
                                                                 
 preprocess (Lambda)         (None, 128, 431, 3)       0         
                                                                 
 densenet121 (Functional)    (None, 4, 13, 1024)       7037504   
                                                                 
 global_average_pooling2d (G  (None, 1024)             0         
 lobalAveragePooling2D)                                          
                                                                 
Total params: 7,037,504
Trainable params: 6,953,856
Non-trainable params: 83,648
______________________________

In [None]:
length = len(featureFilePaths)    # Variable to hold total number of files

# Variables to store deep features and labels
features = np.zeros((length*64, 1024))
labels = np.zeros((length*64, 1), dtype='U32')

# Calling custom generator to load files one by one
trainBatchGenerator = customBatchLoaderGenerator(featureFilePaths, labelFilePaths, length)

# Enumerating generator and extracting deep features of various files one by one 
for i, gendata in enumerate(trainBatchGenerator):
  if i==0:
    features = densenetModel.predict(gendata)
    labels = gendata[1]
  else:
    features = np.vstack([features, densenetModel.predict(gendata)])
    labels = np.vstack([labels, gendata[1]])

In [None]:
# Saving extracted deep features into numpy file
np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data Augmentation/Deep Features_Augment/DenseNet121_augmentFeatures_128.npy", features)