In [84]:
import numpy as np
import os
import glob
import pandas as pd
import random
from scipy.ndimage import zoom
from tensorflow import keras
from tensorflow import keras
import tensorflow as tf
from keras import Sequential
from sklearn.model_selection import train_test_split
from keras.layers import Activation,Dense,Dropout
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Data Extraction**

In [6]:
####Spectrogram Resizing Function#######
def spec_resize(X):
  X = zoom(X, (1, 1, 500/X.shape[2])) 
  return X[0]

In [39]:
#######Read CSV File#########
def read_csv(filename):
    id_label = {}
    with open(filename,'r') as fid:
        for line in fid: # '176787-5-0-27.wav,engine_idling\n'
            tokens = line.strip().split(',') # ['176787-5-0-27.wav', 'engine_idling']
            id_label[tokens[0]] = tokens[1]
    return id_label

# **Test & Training Data Split**

In [85]:
def train_test_data_split(train_directory,csv_file):
    Data = pd.read_csv(csv_file)
    spec_label = read_csv(csv_file)      ######Spectrogram File name and Label corresponding to each of them
    audio_labels = list(Data['label'].unique())
    audio_labels_int = list(range(len(audio_labels)))
    no_audio_classes = len(audio_labels_int)
    map_label_to_int = dict(zip(audio_labels,audio_labels_int))
    Data['class_int'] = Data['label'].map(map_label_to_int)
    #N = Data['label'].size

    spec_lst = [[] for i in range(10)]

    for spec_name,label in spec_label.items():
      if(label=='Bark'):
       spec_lst[0].append(spec_name)
       random.shuffle(spec_lst[0])

      elif(label=='Meow'):
       spec_lst[1].append(spec_name)
       random.shuffle(spec_lst[1])

      elif(label=='Siren'):
       spec_lst[2].append(spec_name)
       random.shuffle(spec_lst[2])

      elif(label=='Shatter'):
       spec_lst[3].append(spec_name)
       random.shuffle(spec_lst[3])

      elif(label=='Knock') :
       spec_lst[4].append(spec_name)
       random.shuffle(spec_lst[4])

      elif(label=='Crying_and_sobbing'):
       spec_lst[5].append(spec_name)
       random.shuffle(spec_lst[5])

      elif(label=='Microwave_oven'):
       spec_lst[6].append(spec_name)
       random.shuffle(spec_lst[6])

      elif(label=='Vehicle_horn_and_car_horn_and_honking'):
       spec_lst[7].append(spec_name)
       random.shuffle(spec_lst[7])

      elif(label=='Doorbell'):
       spec_lst[8].append(spec_name)
       random.shuffle(spec_lst[8])

      elif(label=='Walk_and_footsteps'):
       spec_lst[9].append(spec_name)  
       random.shuffle(spec_lst[9]) 
      
    spec_lst_train = [[] for i in range(10)]
    spec_lst_test = [[] for i in range(10)]

    spec_lst_train[0] = spec_lst[0][0:50]
    spec_lst_test[0] = spec_lst[0][50:]

    spec_lst_train[1] = spec_lst[1][0:50]
    spec_lst_test[1] = spec_lst[1][50:]

    spec_lst_train[2] = spec_lst[2][0:50]
    spec_lst_test[2] = spec_lst[2][50:]

    spec_lst_train[3] = spec_lst[3][0:50]
    spec_lst_test[3] = spec_lst[3][50:]

    spec_lst_train[4] = spec_lst[4][0:50]
    spec_lst_test[4] = spec_lst[4][50:]

    spec_lst_train[5] = spec_lst[5][0:50]
    spec_lst_test[5] = spec_lst[5][50:]

    spec_lst_train[6] = spec_lst[6][0:50]
    spec_lst_test[6] = spec_lst[6][50:]

    spec_lst_train[7] = spec_lst[7][0:50]
    spec_lst_test[7] = spec_lst[7][50:]

    spec_lst_train[8] = spec_lst[8][0:50]
    spec_lst_test[8] = spec_lst[8][50:]

    spec_lst_train[9] = spec_lst[9][0:50]
    spec_lst_test[9] = spec_lst[9][50:]  

    spec_lst_train_data = []
    pred_train_data = []
    spec_lst_test_data = []
    pred_test_data = []

    for i in range(len(spec_lst_train)):
      for j in range(len(spec_lst_train[i])):
        spec_lst_train_data.append(spec_resize(np.load(train_directory+"/"+spec_lst_train[i][j])))
        #spec_lst_train_data.append(np.load(train_directory+"/"+spec_lst_train[i][j]))
        pred_train_data.append(map_label_to_int[spec_label[spec_lst_train[i][j]]])

    for i in range(len(spec_lst_test)):
      for j in range(len(spec_lst_test[i])):
        spec_lst_test_data.append(spec_resize(np.load(train_directory+"/"+spec_lst_test[i][j])))
        #spec_lst_test_data.append(np.load(train_directory+"/"+spec_lst_test[i][j]))
        pred_test_data.append(map_label_to_int[spec_label[spec_lst_test[i][j]]]) 

    merg_lst = list(zip(spec_lst_train_data, pred_train_data)) 
    random.shuffle(merg_lst) 
    spec_lst_train_data, pred_train_data = zip(*merg_lst)     

    merg_lst = list(zip(spec_lst_test_data, pred_test_data)) 
    random.shuffle(merg_lst) 
    spec_lst_test_data, pred_test_data = zip(*merg_lst)    

    spec_lst_train_data = np.array(spec_lst_train_data)
    pred_train_data = np.hstack(pred_train_data)
    size = pred_train_data.size
    pred_train_data = pred_train_data.reshape(size,1)

    spec_lst_test_data = np.array(spec_lst_test_data)
    pred_test_data = np.hstack(pred_test_data)
    size = pred_test_data.size
    pred_test_data = pred_test_data.reshape(size,1)

    return spec_lst_train_data,spec_lst_test_data,pred_train_data,pred_test_data,no_audio_classes

# **CNN Model**

In [86]:
def Conv2D(no_classes,input_data):
    inputs = keras.Input(shape = input_data)
    x = keras.layers.LayerNormalization(axis=2, name='batch_norm')(inputs)
    x = keras.layers.Conv2D(8, kernel_size=(7,7), activation='relu', padding='same', name='conv2d_relu_1')(x)
    x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_1')(x)
    x = keras.layers.Dropout(rate=0.2, name='dropout_1')(x)
    x = keras.layers.Conv2D(16, kernel_size=(5,5), activation='relu', padding='same', name='conv2d_relu_2')(x)
    x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_2')(x)
    x = keras.layers.Dropout(rate=0.2, name='dropout_2')(x)
    x = keras.layers.Conv2D(16, kernel_size=(3,3), activation='relu', padding='same', name='conv2d_relu_3')(x)
    x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_3')(x)
    x = keras.layers.Dropout(rate=0.2, name='dropout_3')(x)
    x = keras.layers.Conv2D(32, kernel_size=(3,3), activation='relu', padding='same', name='conv2d_relu_4')(x)
    x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_4')(x)
    x = keras.layers.Dropout(rate=0.2, name='dropout_4')(x)
    x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_5')(x)
    x = keras.layers.Flatten(name='flatten')(x)
    x = keras.layers.Dropout(rate=0.2, name='dropout_5')(x)
    x = keras.layers.Dense(64, activation='relu', activity_regularizer=tf.keras.regularizers.l2(0.001), name='dense')(x)
    output_layer = keras.layers.Dense(no_classes, activation='softmax', name='softmax')(x)
    cnn_model = tf.keras.Model(inputs=inputs, outputs=output_layer, name='2d_convolution')
    cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return cnn_model

# **Model Traning**

In [87]:
def cnn_training(spec_train_data,label_train_data,spec_test_data,label_test_data,no_audio_classes,no_epochs):
    
    label_train_data = keras.utils.to_categorical(label_train_data,no_audio_classes)
    spec_train_data = spec_train_data.reshape(spec_train_data.shape[0],spec_train_data.shape[1],spec_train_data.shape[2],1)

    label_test_data = keras.utils.to_categorical(label_test_data,no_audio_classes)
    spec_test_data = spec_test_data.reshape(spec_test_data.shape[0],spec_test_data.shape[1],spec_test_data.shape[2],1)


    input_shape = tuple(spec_train_data[0].shape)

    Model1 = Conv2D(no_audio_classes, input_shape)
    history = Model1.fit(
    x=spec_train_data, y=label_train_data,
    validation_split = 0.2,
    verbose=2,
    epochs=no_epochs,
    shuffle = True,
    batch_size=32)

    Model1.evaluate(spec_test_data,label_test_data)

    return Model1

In [11]:
def save_model(model,filename):
    model.save(filename)
    return

In [12]:
def load_model(filename):
    model = keras.models.load_model(filename)
    return model

# **Evaluation: Accuracy of the Model**

In [13]:
d1 = "/content/drive/MyDrive/EE603_Assignment/train"
d2 = "/content/drive/MyDrive/EE603_Assignment/annotations.csv"
X_train_data,X_test_data, Y_train_data,Y_test_data,NUM_CLASSES = train_test_data_split(d1,d2)
print(X_train_data.shape,Y_test_data.shape)

(500, 128, 500) (500, 1)


In [14]:
epochs = 200
m= cnn_training(X_train_data,Y_train_data,X_test_data,Y_test_data,NUM_CLASSES,epochs)


Epoch 1/200
13/13 - 32s - loss: 2.4076 - accuracy: 0.1025 - val_loss: 2.2752 - val_accuracy: 0.1800 - 32s/epoch - 2s/step
Epoch 2/200
13/13 - 30s - loss: 2.2367 - accuracy: 0.1650 - val_loss: 2.2395 - val_accuracy: 0.1900 - 30s/epoch - 2s/step
Epoch 3/200
13/13 - 30s - loss: 2.0872 - accuracy: 0.2400 - val_loss: 2.0929 - val_accuracy: 0.3700 - 30s/epoch - 2s/step
Epoch 4/200
13/13 - 32s - loss: 1.9149 - accuracy: 0.3175 - val_loss: 1.9432 - val_accuracy: 0.4100 - 32s/epoch - 2s/step
Epoch 5/200
13/13 - 30s - loss: 1.7281 - accuracy: 0.4425 - val_loss: 1.7703 - val_accuracy: 0.4100 - 30s/epoch - 2s/step
Epoch 6/200
13/13 - 30s - loss: 1.5972 - accuracy: 0.4825 - val_loss: 1.7323 - val_accuracy: 0.4700 - 30s/epoch - 2s/step
Epoch 7/200
13/13 - 30s - loss: 1.4336 - accuracy: 0.5550 - val_loss: 1.6073 - val_accuracy: 0.5200 - 30s/epoch - 2s/step
Epoch 8/200
13/13 - 30s - loss: 1.3108 - accuracy: 0.6275 - val_loss: 1.5703 - val_accuracy: 0.5300 - 30s/epoch - 2s/step
Epoch 9/200
13/13 - 30s 

In [15]:
save_model(m,"/content/drive/MyDrive/EE603_Assignment/my_audio_model")
np.save("/content/drive/MyDrive/EE603_Assignment/X_train_data",X_train_data)
np.save("/content/drive/MyDrive/EE603_Assignment/X_test_data",X_test_data)
np.save("/content/drive/MyDrive/EE603_Assignment/Y_train_data",Y_train_data)
np.save("/content/drive/MyDrive/EE603_Assignment/Y_test_data",Y_test_data)

In [88]:
df = pd.read_csv("/content/drive/MyDrive/EE603_Assignment/annotations.csv")
Labels = list(df['label'].unique())
Labels_int = list(range(len(Labels)))
NUM_CLASSES = len(Labels_int)
map_char_to_int = dict(zip(Labels,Labels_int))
map_int_to_char = dict(zip(Labels_int,Labels))
#map_int_to_char
#map_char_to_int

In [17]:
m= load_model("/content/drive/MyDrive/EE603_Assignment/my_audio_model")
m.summary()

Model: "2d_convolution"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 500, 1)]     0         
                                                                 
 batch_norm (LayerNormalizat  (None, 128, 500, 1)      1000      
 ion)                                                            
                                                                 
 conv2d_relu_1 (Conv2D)      (None, 128, 500, 8)       400       
                                                                 
 max_pool_2d_1 (MaxPooling2D  (None, 64, 250, 8)       0         
 )                                                               
                                                                 
 dropout_1 (Dropout)         (None, 64, 250, 8)        0         
                                                                 
 conv2d_relu_2 (Conv2D)      (None, 64, 250, 16)    

# **Prediction on the Test Set**

In [89]:
def map(Y_pred,map_char_to_int):
  Y_pred_char = []

  for i in range(Y_pred.size):
    for key in map_char_to_int.keys():
      if(map_char_to_int[key]==Y_pred[i]):
        Y_pred_char.append(key)
        break

  return Y_pred_char  

In [90]:
def map_rev(Y_truth_label,map_int_to_char):
  Y_truth_int = []

  for i in range(len(Y_truth_label)):
    for key in map_int_to_char.keys():
      if(map_int_to_char[key]==Y_truth_label[i]):
        Y_truth_int.append(key)
        break

  return Y_truth_int  

In [75]:
####################### PREDICTION ON TEST SET ####################################

directory = "/content/drive/MyDrive/EE603_Assignment/test"
direc = os.fsencode(directory)
lis_dir = {}
test_pred_int = []
for np_fil in os.listdir(direc):
  fil = os.fsdecode(np_fil)
  X = spec_resize(np.load(directory+"/"+fil))
  label = np.argmax(m.predict(np.array([X])),axis=1)
  test_pred_int.append(int(label))
  for key in map_char_to_int.keys():
    if(map_char_to_int[key]==label):
      lis_dir[fil]=key
      break

In [78]:
lis_dir
#test_pred_int

{'MLSP_test1071.npy': 'Vehicle_horn_and_car_horn_and_honking',
 'MLSP_test1163322.npy': 'Crying_and_sobbing',
 'MLSP_test1194176.npy': 'Microwave_oven',
 'MLSP_test1134284.npy': 'Crying_and_sobbing',
 'MLSP_test1116162.npy': 'Meow',
 'MLSP_test1192345.npy': 'Meow',
 'MLSP_test1053175.npy': 'Microwave_oven',
 'MLSP_test1328457.npy': 'Walk_and_footsteps',
 'MLSP_test1186185.npy': 'Meow',
 'MLSP_test113179.npy': 'Walk_and_footsteps',
 'MLSP_test1077269.npy': 'Microwave_oven',
 'MLSP_test1516233.npy': 'Microwave_oven',
 'MLSP_test1498106.npy': 'Bark',
 'MLSP_test2154380.npy': 'Walk_and_footsteps',
 'MLSP_test1457299.npy': 'Microwave_oven',
 'MLSP_test1362223.npy': 'Microwave_oven',
 'MLSP_test1443477.npy': 'Microwave_oven',
 'MLSP_test208923.npy': 'Siren',
 'MLSP_test1373407.npy': 'Microwave_oven',
 'MLSP_test140846.npy': 'Shatter',
 'MLSP_test1379148.npy': 'Shatter',
 'MLSP_test1357311.npy': 'Shatter',
 'MLSP_test1404304.npy': 'Bark',
 'MLSP_test1451403.npy': 'Walk_and_footsteps',
 'MLSP_

# **Final Test CSV File**

In [79]:
########################## Converting to .csv ##################################

import csv
header_name = ['FileId','Prediction']
with open("/content/drive/MyDrive/EE603_Assignment/test.csv",'w') as f:
  dw = csv.DictWriter(f,fieldnames = header_name)
  dw.writeheader()
  for key in lis_dir.keys():
    f.write("%s,%s\n"%(key,lis_dir[key]))

In [80]:
df = pd.read_csv("/content/drive/MyDrive/EE603_Assignment/test.csv")
#np.save("/content/drive/MyDrive/EE603_Assignment/test",)
print(df)

                  FileId                             Prediction
0      MLSP_test1071.npy  Vehicle_horn_and_car_horn_and_honking
1   MLSP_test1163322.npy                     Crying_and_sobbing
2   MLSP_test1194176.npy                         Microwave_oven
3   MLSP_test1134284.npy                     Crying_and_sobbing
4   MLSP_test1116162.npy                                   Meow
..                   ...                                    ...
95  MLSP_test2085405.npy                                   Bark
96  MLSP_test2062232.npy                                  Siren
97  MLSP_test2072214.npy                                  Siren
98  MLSP_test2077439.npy                                  Siren
99  MLSP_test2082223.npy                                  Siren

[100 rows x 2 columns]


# **Accuracy, Precision, Recall, F1 Score, Confusion Matrix**

In [91]:
####Calculated Accuracy, Precision, Recall etc. for "DUMMY TEST DATA"######

gnd_truth_csv_file = "/content/drive/MyDrive/EE603_Assignment/test.csv"
test_csv_file = "/content/drive/MyDrive/EE603_Assignment/test.csv"

id_label_gnd = read_csv(gnd_truth_csv_file)
id_label_test = read_csv(test_csv_file)  

header_name = ['Fname','Label']
with open("/content/drive/MyDrive/EE603_Assignment/gnd_truth.csv",'w') as f:
  dw = csv.DictWriter(f,fieldnames = header_name)
  dw.writeheader()
  for key in id_label_test.keys():
    f.write("%s,%s\n"%(key,id_label_gnd[key]))

In [82]:
df = pd.read_csv("/content/drive/MyDrive/EE603_Assignment/gnd_truth.csv")
gnd_truth_label = list(df.Label)
#print(gnd_truth_label)
gnd_truth_int = map_rev(gnd_truth_label, map_int_to_char)
#print(gnd_truth_int)

In [83]:
print('Accuracy: %.3f' %accuracy_score(gnd_truth_int, test_pred_int))
print('Precision: %.3f' %precision_score(gnd_truth_int, test_pred_int , average="macro"))
print('Recall: %.3f' %recall_score(gnd_truth_int, test_pred_int , average="macro"))
print('F1 Score: %.3f' %f1_score(gnd_truth_int, test_pred_int , average="macro"))
print('Confusion Matrix: ')
print(confusion_matrix(gnd_truth_int, test_pred_int))

Accuracy: 1.000
Precision: 1.000
Recall: 1.000
F1 Score: 1.000
Confusion Matrix: 
[[ 9  0  0  0  0  0  0  0  0  0]
 [ 0 11  0  0  0  0  0  0  0  0]
 [ 0  0  9  0  0  0  0  0  0  0]
 [ 0  0  0 13  0  0  0  0  0  0]
 [ 0  0  0  0  1  0  0  0  0  0]
 [ 0  0  0  0  0 10  0  0  0  0]
 [ 0  0  0  0  0  0 16  0  0  0]
 [ 0  0  0  0  0  0  0 11  0  0]
 [ 0  0  0  0  0  0  0  0  6  0]
 [ 0  0  0  0  0  0  0  0  0 14]]
