In [0]:
'''
This Notebook provides implementation how to build Neural network based model for Audio Classification. 

A. download urban, flicker dataset and iterate over it to get the MFCC values 
B. Mix urban/flicker audio with noise and get MFCC values
C. Use these generated MFCC values for model building
 
'''

In [0]:
import urllib.request
urllib.request.urlretrieve ("https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz","a.tar.gz")
import tarfile
tar = tarfile.open("a.tar.gz")
tar.extractall()
tar.close()

In [0]:
!wget https://os.unil.cloud.switch.ch/fma/fma_small.zip
!unzip fma_small.zip

In [0]:
!wget https://groups.csail.mit.edu/sls/downloads/flickraudio/downloads/flickr_audio.tar.gz
!gunzip -d flickr_audio.tar.gz
!tar -xvf flickr_audio.tar

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import math

from tqdm import tqdm
from librosa import display
import librosa
import sklearn.metrics as metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
import sklearn.metrics as metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [0]:
def extract_mfcc_scaled_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None 
     
    return mfccsscaled

In [0]:
max_pad_len = 174

def extract_features_with_padding(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
        
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None 
     
    return mfccs

def extract_mfcc_features_with_padding(audio, sample_rate):
    try:
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
        
    except Exception as e:
        print("Error encountered while parsing file: ")
        return None 
     
    return mfccs

In [0]:
#given a signal, noise (audio) and desired SNR, this gives the noise (scaled version of noise input) that gives the desired SNR

def get_noise_from_sound(signal,noise,SNR):
    RMS_s=np.sqrt(np.mean(signal**2))
    #required RMS of noise
    RMS_n=np.sqrt(RMS_s**2/(pow(10,SNR/20)))
    #current RMS of noise
    RMS_n_current=np.sqrt(np.mean(noise**2))

    noise=noise*(RMS_n/RMS_n_current)
    return noise

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
#extarct samples of original flickr audio
import random
flicker_features= []
urban_features=[]
music_features=[]
original_audio = pd.DataFrame()
crackle_noise_files = ['Crackle_02.wav', 'Crackling_Fireplace.wav', 'Tape_Noise_02.wav']
electric_noise_files= ['Air_FX_01.wav', 'electriccurrent.wav', 'EMOTOR.wav', 'ESPARK1.wav', 'fire.wav', 'Gate_Filtered_01(130BPM).wav', 'ha.wav', 'hi-tensionpower.wav', 'Juno_60_Raw_b.wav']
other_noise_files = ['Noise_09.wav', 'Noise_Hit_01.wav', 'Perc_Hit_06.wav']
white_pink_brown_noise_files = ['brown.wav', 'pink.wav', 'white.wav', 'noise.wav']
spear_noise_files = ['f16noiseR2_16.wav', 'factoryR1_16.wav', 'pinkR5_16.wav', 'volvoR1_16.wav']

Files=os.listdir('/content/flickr_audio/wavs')
n=0
for file in Files:
  n = n +1
  if n%1000 ==0:
    print('processed records', n) 
  
  if n> 3000:
    break;
  
  signal, sample_rate = librosa.load('/content/flickr_audio/wavs/'+ str(file), res_type='kaiser_fast') 
  
  noise_file= str(random.choice(spear_noise_files))
  noise_sample, sr1 = librosa.load('/content/Spear_Noise/'+str(noise_file), res_type='kaiser_fast')
  
  if(len(noise_sample) > len(signal)):
      noise_sample=noise_sample[0:len(signal)]

  if(len(noise_sample) < len(signal)):
      signal=signal[0:len(noise_sample)]

  mixed_noise_signal = get_noise_from_sound(signal,noise_sample,SNR=10)

  trim_signal = librosa.effects.trim(mixed_noise_signal)
  total_duration = 4*sample_rate
  split_signal = trim_signal[0] 
  if len(trim_signal[0]) > total_duration:
    split_signal=trim_signal[0][0:total_duration]

  aud_feature = extract_mfcc_features_with_padding(split_signal, sample_rate)
  if aud_feature is not None:
    class_label = "spear_noise"
    flicker_features.append([aud_feature, class_label])

print('Processing done for Flicker Sound with', len(flicker_features))

inner_n=0
outer_n=0

folders=os.listdir('/content/UrbanSound8K/audio')
for folder in folders:
  if outer_n > 3000:
    break;
  if outer_n%1000 ==0:
    print('processed records', outer_n)
  if folder != '.DS_Store' :
    files = os.listdir('/content/UrbanSound8K/audio/'+ str(folder))
    inner_n = 0
    for file in files:
      if file != '.DS_Store':
        inner_n = inner_n +1
        outer_n = outer_n+1
        if inner_n > 350:
          break;
        try:
          signal, sample_rate = librosa.load('/content/UrbanSound8K/audio/'+ str(folder)+'/'+str(file) , res_type='kaiser_fast') 

          noise_file= str(random.choice(spear_noise_files))
          noise_sample, sr1 = librosa.load('/content/Spear_Noise/'+str(noise_file), res_type='kaiser_fast')

          if(len(noise_sample) > len(signal)):
              noise_sample=noise_sample[0:len(signal)]

          if(len(noise_sample) < len(signal)):
              signal=signal[0:len(noise_sample)]

          mixed_noise_signal = get_noise_from_sound(signal,noise_sample,SNR=10)

          trim_signal = librosa.effects.trim(mixed_noise_signal)
          total_duration = 4*sample_rate
          split_signal = trim_signal[0] 
          if len(trim_signal[0]) > total_duration:
            split_signal=trim_signal[0][0:total_duration]

          aud_feature = extract_mfcc_features_with_padding(split_signal, sample_rate)
          if aud_feature is not None:
            class_label = "spear_noise"
            urban_features.append([aud_feature, class_label])
        except Exception as e:
          print("Error encountered while parsing file: ", file)

print('Processing done for Urban Sound with ', len(urban_features))


inner_n=0
outer_n=0

folders=os.listdir('/content/fma_small')
for folder in folders:
  if outer_n > 3000:
    break;
  if outer_n%1000 ==0:
    print('processed records', outer_n)
  if folder != '.DS_Store' and folder != 'README.txt' and folder != 'checksums' :
    files = os.listdir('/content/fma_small/'+ str(folder))
    inner_n = 0

    for file in files:
      if file != '.DS_Store' and file != 'README.txt' and file != 'checksums':
        inner_n = inner_n +1
        outer_n = outer_n+1
        if inner_n > 100:
          break;
        try:
          signal, sample_rate = librosa.load('/content/fma_small/'+ str(folder)+'/'+str(file) , res_type='kaiser_fast') 

          noise_file= str(random.choice(spear_noise_files))
          print(outer_n , ' : InnerN-> ', inner_n, ' : Folder-> ', folder, ' : File:', file)
          noise_sample, sr1 = librosa.load('/content/Spear_Noise/'+str(noise_file), res_type='kaiser_fast')

          if(len(noise_sample) > len(signal)):
              noise_sample=noise_sample[0:len(signal)]

          if(len(noise_sample) < len(signal)):
              signal=signal[0:len(noise_sample)]

          mixed_noise_signal = get_noise_from_sound(signal,noise_sample,SNR=10)

          trim_signal = librosa.effects.trim(mixed_noise_signal)
          total_duration = 4*sample_rate
          split_signal = trim_signal[0] 
          if len(trim_signal[0]) > total_duration:
            split_signal=trim_signal[0][0:total_duration]

          aud_feature = extract_mfcc_features_with_padding(split_signal, sample_rate)
          if aud_feature is not None:
            class_label = "spear_noise"
            music_features.append([aud_feature, class_label])
        except Exception as e:
          print("Error encountered while parsing file: ", file)

print(len(flicker_features))
print(len(urban_features))
print(len(music_features))

temp = np.concatenate((flicker_features, urban_features), axis=0)
final_noise_feature = np.concatenate((temp, music_features), axis=0)

print(len(final_noise_feature))

featuresdf = pd.DataFrame(final_noise_feature, columns=['feature','class_label'])
spear_noise_X = np.array(featuresdf.feature.tolist())
spear_noise_y = np.array(featuresdf.class_label.tolist())

np.save('/content/drive/My Drive/Audio_Quality_Analysis/data/spear_noise_6K_np_array_Y' , spear_noise_y)
np.save('/content/drive/My Drive/Audio_Quality_Analysis/data/spear_noise_6K_np_array_X' , spear_noise_X)


In [0]:
creckling_noise_X = np.load('/content/drive/My Drive/Audio_Quality_Analysis/data/creckling_noise_9K_np_array_X.npy', allow_pickle=True)
creckling_noise_X = creckling_noise_X[:4000]
creckling_noise_y = np.full((len(creckling_noise_X)), 'noise')
print('creckling_noise len: ', len(creckling_noise_X))

electric_noise_X = np.load('/content/drive/My Drive/Audio_Quality_Analysis/data/electric_noise_9K_np_array_X.npy', allow_pickle=True)
electric_noise_X = electric_noise_X[:4000]
electric_noise_y = np.full((len(electric_noise_X)), 'noise')
print('electric_noise_ len: ', len(electric_noise_X))

other_noise_X = np.load('/content/drive/My Drive/Audio_Quality_Analysis/data/other_noise_9K_np_array_X.npy', allow_pickle=True)
other_noise_X = other_noise_X[:4000]
other_noise_y = np.full((len(other_noise_X)), 'noise')
print('other_noise_ len: ', len(other_noise_X))

real_noise_X = np.load('/content/drive/My Drive/Audio_Quality_Analysis/data/real_noise_9K_np_array_X.npy', allow_pickle=True)
real_noise_X = real_noise_X[:4000]
real_noise_y = np.full((len(real_noise_X)), 'noise')
print('real_noise_ len: ', len(real_noise_X))

flicker_sound_X = np.load('/content/drive/My Drive/Audio_Quality_Analysis/data/flicker_sound_15K_np_array_X.npy', allow_pickle=True)
flicker_sound_X = flicker_sound_X[:9000]
flicker_sound_y = np.full((len(flicker_sound_X)), 'good')
print('flicker_sound_ len: ', len(flicker_sound_X))

music_sound_X = np.load('/content/drive/My Drive/Audio_Quality_Analysis/data/music_sound_8K_np_array_X.npy', allow_pickle=True)
music_sound_y = np.full((len(music_sound_X)), 'good')
print('music_sound_ len: ', len(music_sound_X))

spear_noise_X = np.array(featuresdf.feature.tolist())
spear_noise_y = np.full((len(spear_noise_X)), 'noise') 
print('spear_noise_ len: ', len(spear_noise_X))


creckling_noise len:  4000
electric_noise_ len:  4000
other_noise_ len:  4000
real_noise_ len:  4000
flicker_sound_ len:  9000
music_sound_ len:  7997
spear_noise_ len:  6023


"\nurban_sound_X = np.load('/content/drive/My Drive/Audio_Quality_Analysis/data/result_np_array_X_padding_0410.npy', allow_pickle=True)\nurban_sound_y = np.full((len(urban_sound_X)), 'urban_sound')\nprint('urban_sound_ len: ', len(urban_sound_X))\n"

In [0]:
final_X = np.concatenate((creckling_noise_X, electric_noise_X, other_noise_X, real_noise_X, flicker_sound_X, music_sound_X, spear_noise_X ), axis=0)
final_y = np.concatenate((creckling_noise_y, electric_noise_y, other_noise_y, real_noise_y, flicker_sound_y, music_sound_y, spear_noise_y ), axis=0)

print(len(final_X))
print(len(final_y))

39020
39020


In [0]:
# Encode the classification labels
from sklearn.preprocessing import LabelEncoder
import pickle
from keras.utils import to_categorical
le = LabelEncoder()
yy = to_categorical(le.fit_transform(final_y))

with open('/content/drive/My Drive/Audio_Quality_Analysis/model/labelEncoderFile_With_good_noise_LE_04182020_v1.pkl', 'wb') as fid:
    pickle.dump(le, fid)

In [0]:
print(final_X.shape)
print(yy.shape)

(39020, 40, 174)
(39020, 2)


In [0]:
# split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(final_X, yy, test_size=0.2, random_state = 42)
print(x_train.shape)
print(x_test.shape)

(31216, 40, 174)
(7804, 40, 174)


In [0]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

num_rows = 40
num_columns = 174
num_channels = 1

x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)

num_labels = yy.shape[1]
filter_size = 2

# Construct model 
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())

model.add(Dense(num_labels, activation='softmax'))

model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 39, 173, 16)       80        
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 19, 86, 16)        0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 19, 86, 16)        0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 18, 85, 32)        2080      
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 9, 42, 32)         0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 9, 42, 32)         0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 8, 41, 64)        

In [0]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

num_epochs = 50
num_batch_size = 32

# checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_mlp.hdf5', verbose=1, save_best_only=True)
start = datetime.now()

# model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)
model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), verbose=1)

duration = datetime.now() - start
print("Training completed in time: ", duration)

model.save("/content/drive/My Drive/Audio_Quality_Analysis/model/SoundClassification_model_With_good_noise_V1_04182020_v1.h5")
model.save_weights("/content/drive/My Drive/Audio_Quality_Analysis/model/SoundClassification_Weight_With_good_noise_V1_04182020_v1.h5")

Train on 31216 samples, validate on 7804 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Training completed in time:  1:14:26.269676


In [0]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.9941068887710571
Testing Accuracy:  0.985007643699646


In [0]:
from keras import models
model = models.load_model('/content/drive/My Drive/Audio_Quality_Analysis/model/SoundClassification_model_With_all_sound_V1_04162020.h5')
#model = tf.keras.models.load_model('/content/drive/My Drive/Image_Quality_Analysis/model/ImageQuality_TL_GD_V1_0408.h5',custom_objects={'KerasLayer':hub.KerasLayer})
model.load_weights('/content/drive/My Drive/Audio_Quality_Analysis/model/SoundClassification_Weight_With_all_sound_V1_04162020.h5')
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 39, 173, 16)       80        
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 19, 86, 16)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 19, 86, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 18, 85, 32)        2080      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 9, 42, 32)         0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 9, 42, 32)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 8, 41, 64)        

In [0]:
predict = model1.predict(x_test)
predict=np.round(predict,decimals=5)
result = map(lambda v : np.argmax(v), predict)
prediction_result = np.array(list(result))
print(prediction_result)

actual=np.round(y_test,decimals=5)
actualresult = map(lambda v : np.argmax(v), actual)
actualresult_result = np.array(list(actualresult))
print(actualresult_result)

classification_report = metrics.classification_report(actualresult_result, prediction_result)
print("Classification report: \n", classification_report)
confusion_matrix = metrics.confusion_matrix(actualresult_result, prediction_result)
print("Confusion matrix: \n",confusion_matrix)

print("Accuracy Score: ", accuracy_score(actualresult_result, prediction_result))
print("F1 Score: ", f1_score(actualresult_result, prediction_result, average='weighted'))
print("Precision Score: ", precision_score(actualresult_result, prediction_result, average='weighted'))
print("Recall Score: ", recall_score(actualresult_result, prediction_result, average='weighted'))

[6 4 1 ... 1 6 5]
[6 4 1 ... 1 6 5]
Classification report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1789
           1       1.00      1.00      1.00      1839
           2       1.00      1.00      1.00      1821
           3       0.98      0.90      0.94      1557
           4       1.00      1.00      1.00      1792
           5       1.00      1.00      1.00      1905
           6       0.92      0.98      0.95      1770

    accuracy                           0.99     12473
   macro avg       0.99      0.98      0.98     12473
weighted avg       0.99      0.99      0.98     12473

Confusion matrix: 
 [[1789    0    0    0    0    0    0]
 [   0 1839    0    0    0    0    0]
 [   0    0 1818    0    0    0    3]
 [   1    0    2 1406    0    1  147]
 [   0    0    0    0 1791    0    1]
 [   0    0    0    0    0 1905    0]
 [   0    8    0   22    0    2 1738]]
Accuracy Score:  0.9850076164515353
F1 Score:  0.984947

In [0]:
# With only good sound
X = np.load('/content/drive/My Drive/Audio_Quality_Analysis/data/result_np_array_X_padding_0410.npy', allow_pickle=True)
y = np.full((len(X)), 'urban_sound')

with open('/content/drive/My Drive/Audio_Quality_Analysis/model/labelEncoderFile_With_all_sound_LE_04162020.pkl', 'rb') as fid:
    le_loaded = pickle.load(fid)
yy1 = to_categorical(le_loaded.transform(y))

X = np.expand_dims(X, axis=3)
x_train1, x_test1, y_train1, y_test1 = train_test_split(X, yy1, test_size=0.9, random_state = 42)

predict = model.predict(x_test1)
predict=np.round(predict,decimals=5)
result = map(lambda v : np.argmax(v), predict)
prediction_result = np.array(list(result))
print(prediction_result)

actual=np.round(y_test1,decimals=5)
actualresult = map(lambda v : np.argmax(v), actual)
actualresult_result = np.array(list(actualresult))
print(actualresult_result)
#actualresult_result = np.full((len(y_test1)), 0)

classification_report = metrics.classification_report(actualresult_result, prediction_result)
print("Classification report: \n", classification_report)
confusion_matrix = metrics.confusion_matrix(actualresult_result, prediction_result)
print("Confusion matrix: \n",confusion_matrix)

print("Accuracy Score: ", accuracy_score(actualresult_result, prediction_result))
print("F1 Score: ", f1_score(actualresult_result, prediction_result, average='weighted'))
print("Precision Score: ", precision_score(actualresult_result, prediction_result, average='weighted'))
print("Recall Score: ", recall_score(actualresult_result, prediction_result, average='weighted'))

[6 6 6 ... 6 6 6]
[6 6 6 ... 6 6 6]
Classification report: 
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       1.00      0.99      1.00      7859

    accuracy                           0.99      7859
   macro avg       0.20      0.20      0.20      7859
weighted avg       1.00      0.99      1.00      7859

Confusion matrix: 
 [[   0    0    0    0    0]
 [   0    0    0    0    0]
 [   0    0    0    0    0]
 [   0    0    0    0    0]
 [   2   17   23    7 7810]]
Accuracy Score:  0.9937651100648938
F1 Score:  0.9968728061778033
Precision Score:  1.0
Recall Score:  0.9937651100648938


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [0]:
import pickle
flicker_sound_X = np.load('/content/drive/My Drive/Audio_Quality_Analysis/data/flicker_sound_15K_np_array_X.npy', allow_pickle=True)
flicker_sound_X = flicker_sound_X[9001:len(flicker_sound_X)]
flicker_sound_y = np.full((len(flicker_sound_X)), 'flicker_sound')
print('flicker_sound_ len: ', len(flicker_sound_X))

with open('/content/drive/My Drive/Audio_Quality_Analysis/model/labelEncoderFile_With_all_sound_LE_04162020.pkl', 'rb') as fid:
    le_loaded = pickle.load(fid)

yy1 = to_categorical(le_loaded.transform(flicker_sound_y))

flicker_sound_X = np.expand_dims(flicker_sound_X, axis=3)

x_train1, x_test1, y_train1, y_test1 = train_test_split(flicker_sound_X, yy1, test_size=0.9, random_state = 42)

predict = model.predict(x_test1)
predict=np.round(predict,decimals=5)
result = map(lambda v : np.argmax(v), predict)
prediction_result = np.array(list(result))
print(prediction_result)

actual=np.round(y_test1,decimals=5)
actualresult = map(lambda v : np.argmax(v), actual)
actualresult_result = np.array(list(actualresult))
print(actualresult_result)

classification_report = metrics.classification_report(actualresult_result, prediction_result)
print("Classification report: \n", classification_report)
confusion_matrix = metrics.confusion_matrix(actualresult_result, prediction_result)
print("Confusion matrix: \n",confusion_matrix)

print("Accuracy Score: ", accuracy_score(actualresult_result, prediction_result))
print("F1 Score: ", f1_score(actualresult_result, prediction_result, average='weighted'))
print("Precision Score: ", precision_score(actualresult_result, prediction_result, average='weighted'))
print("Recall Score: ", recall_score(actualresult_result, prediction_result, average='weighted'))

flicker_sound_ len:  5998
[2 2 2 ... 2 2 2]
[2 2 2 ... 2 2 2]
Classification report: 
               precision    recall  f1-score   support

           2       1.00      1.00      1.00      5399
           3       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0

    accuracy                           1.00      5399
   macro avg       0.25      0.25      0.25      5399
weighted avg       1.00      1.00      1.00      5399

Confusion matrix: 
 [[5389    5    1    4]
 [   0    0    0    0]
 [   0    0    0    0]
 [   0    0    0    0]]
Accuracy Score:  0.9981478051491017
F1 Score:  0.9990730441230996
Precision Score:  1.0
Recall Score:  0.9981478051491017


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [0]:
import os
features= []
folders = os.listdir('/content/fma_small')
n=0 ;
prcessed_folders=['checksums', 'README.txt', '111','104','027','085','095','091','153','073','062','117','119','140','149','086','101','089','066','084','012','017','070','004','114','053','010','106','040','009','049','130','087','064','050','078','045','002','019','128','005','098','043','103','065','011','032','116','154','148','006','151','034','024','021','097','123','137','099','120','082','033','125','115','008','122','074','113','100','127','139','077','067','129','039','069','041','042','150','143','022','013','075','133','029','044','155','134','023','079','142','124','083','118','141','081','057','018','132','136','072','144','108','145','090','001']
for folder in folders:
  try:
    files = os.listdir('/content/fma_small/'+ str(folder))
    print('Processing Start for: ', folder)
    for file in files:
        if n>100:
          break;
        n = n+1
        try:
          audio, sample_rate = librosa.load('/content/fma_small/'+ str(folder)+'/'+str(file), sr=22050, res_type='kaiser_fast') 
          trim_signal = librosa.effects.trim(audio)
          total_duration = 4*sample_rate
          split_signal = trim_signal[0] 
          if len(trim_signal[0]) > total_duration:
            split_signal=trim_signal[0][0:total_duration]
          
          aud_feature = extract_mfcc_features_with_padding(split_signal, sample_rate)
          if aud_feature is not None:
            class_label = "music_sound"
            features.append([aud_feature, class_label])
        except Exception as e:
          print("Error encountered while parsing file: ", file)
  except Exception as e:
        print("Error encountered while parsing folder: ", folder)


featuresdf_music = pd.DataFrame(features, columns=['feature','class_label'])
music_sound_X = np.array(featuresdf_music.feature.tolist())
music_sound_y = np.array(featuresdf_music.class_label.tolist())
print(len(music_sound_X))
print(music_sound_X.shape)

with open('/content/drive/My Drive/Audio_Quality_Analysis/model/labelEncoderFile_With_all_sound_LE_04162020.pkl', 'rb') as fid:
    le_loaded = pickle.load(fid)

yy1 = to_categorical(le_loaded.transform(music_sound_y))

music_sound_X = np.expand_dims(music_sound_X, axis=3)

x_train1, x_test1, y_train1, y_test1 = train_test_split(music_sound_X, yy1, test_size=0.9, random_state = 42)

predict = model.predict(x_test1)
predict=np.round(predict,decimals=5)
result = map(lambda v : np.argmax(v), predict)
prediction_result = np.array(list(result))
print(prediction_result)

actual=np.round(y_test1,decimals=5)
actualresult = map(lambda v : np.argmax(v), actual)
actualresult_result = np.array(list(actualresult))
print(actualresult_result)

classification_report = metrics.classification_report(actualresult_result, prediction_result)
print("Classification report: \n", classification_report)
confusion_matrix = metrics.confusion_matrix(actualresult_result, prediction_result)
print("Confusion matrix: \n",confusion_matrix)

print("Accuracy Score: ", accuracy_score(actualresult_result, prediction_result))
print("F1 Score: ", f1_score(actualresult_result, prediction_result, average='weighted'))
print("Precision Score: ", precision_score(actualresult_result, prediction_result, average='weighted'))
print("Recall Score: ", recall_score(actualresult_result, prediction_result, average='weighted'))

[3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 6 3 3 3 3 3 3 3 6 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3]
[3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3]
Classification report: 
               precision    recall  f1-score   support

           3       1.00      0.98      0.99        91
           6       0.00      0.00      0.00         0

    accuracy                           0.98        91
   macro avg       0.50      0.49      0.49        91
weighted avg       1.00      0.98      0.99        91

Confusion matrix: 
 [[89  2]
 [ 0  0]]
Accuracy Score:  0.978021978021978
F1 Score:  0.9888888888888888
Precision Score:  1.0
Recall Score:  0.978021978021978


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [0]:
#extarct samples of original flickr audio
import random

flicker_features= []
urban_features=[]
music_features=[]
original_audio = pd.DataFrame()
crackle_noise_files = ['Crackle_02.wav', 'Crackling_Fireplace.wav', 'Tape_Noise_02.wav']
electric_noise_files= ['Air_FX_01.wav', 'electriccurrent.wav', 'EMOTOR.wav', 'ESPARK1.wav', 'fire.wav', 'Gate_Filtered_01(130BPM).wav', 'ha.wav', 'hi-tensionpower.wav', 'Juno_60_Raw_b.wav']
other_noise_files = ['Noise_09.wav', 'Noise_Hit_01.wav', 'Perc_Hit_06.wav']
white_pink_brown_noise_files = ['brown.wav', 'pink.wav', 'white.wav', 'noise.wav']

inner_n=0
outer_n=0

folders=os.listdir('/content/UrbanSound8K/audio')
for folder in folders:
  if outer_n > 30:
    break;
  if outer_n%1000 ==0:
    print('processed records', outer_n)
  if folder != '.DS_Store' :
    files = os.listdir('/content/UrbanSound8K/audio/'+ str(folder))
    inner_n = 0
    for file in files:
      if file != '.DS_Store':
        inner_n = inner_n +1
        outer_n = outer_n+1
        if inner_n > 3:
          break;
        try:
          signal, sample_rate = librosa.load('/content/UrbanSound8K/audio/'+ str(folder)+'/'+str(file) , res_type='kaiser_fast') 
          
          noise_file= str(random.choice(white_pink_brown_noise_files))
          print('Selected noise file: ', noise_file)
          noise_sample, sr1 = librosa.load('/content/Noise/white_pink_brown_noise/'+str(noise_file), res_type='kaiser_fast')
          print('after loading noise signal')
          if(len(noise_sample) > len(signal)):
              noise_sample=noise_sample[0:len(signal)]

          if(len(noise_sample) < len(signal)):
              signal=signal[0:len(noise_sample)]

          mixed_noise_signal = get_noise_from_sound(signal,noise_sample,SNR=10)

          trim_signal = librosa.effects.trim(mixed_noise_signal)
          total_duration = 4*sample_rate
          split_signal = trim_signal[0] 
          if len(trim_signal[0]) > total_duration:
            split_signal=trim_signal[0][0:total_duration]

          aud_feature = extract_mfcc_features_with_padding(split_signal, sample_rate)
          if aud_feature is not None:
            class_label = "real_noise"
            urban_features.append([aud_feature, class_label])
        except Exception as e:
          print("Error encountered while parsing file: ", file)

print('Processing done for Urban Sound with ', len(urban_features))
inner_n=0
outer_n=0

folders=os.listdir('/content/fma_small')
for folder in folders:
  if outer_n > 30:
    break;
  if outer_n%1000 ==0:
    print('processed records', outer_n)
  if folder != '.DS_Store' and folder != 'README.txt' and folder != 'checksums' :
    files = os.listdir('/content/fma_small/'+ str(folder))
    inner_n = 0

    for file in files:
      if file != '.DS_Store' and file != 'README.txt' and file != 'checksums':
        inner_n = inner_n +1
        outer_n = outer_n+1
        if inner_n > 2:
          break;
        try:
          signal, sample_rate = librosa.load('/content/fma_small/'+ str(folder)+'/'+str(file) , res_type='kaiser_fast') 

          noise_file= str(random.choice(white_pink_brown_noise_files))
          print(outer_n , ' : InnerN-> ', inner_n, ' : Folder-> ', folder, ' : File:', file)
          noise_sample, sr1 = librosa.load('/content/Noise/white_pink_brown_noise/'+str(noise_file), res_type='kaiser_fast')

          if(len(noise_sample) > len(signal)):
              noise_sample=noise_sample[0:len(signal)]

          if(len(noise_sample) < len(signal)):
              signal=signal[0:len(noise_sample)]

          mixed_noise_signal = get_noise_from_sound(signal,noise_sample,SNR=10)

          trim_signal = librosa.effects.trim(mixed_noise_signal)
          total_duration = 4*sample_rate
          split_signal = trim_signal[0] 
          if len(trim_signal[0]) > total_duration:
            split_signal=trim_signal[0][0:total_duration]

          aud_feature = extract_mfcc_features_with_padding(split_signal, sample_rate)
          if aud_feature is not None:
            class_label = "real_noise"
            music_features.append([aud_feature, class_label])
        except Exception as e:
          print("Error encountered while parsing file: ", file)

print(len(flicker_features))
print(len(urban_features))
print(len(music_features))

#temp = np.concatenate((flicker_features, urban_features), axis=0)
final_noise_feature = np.concatenate((urban_features, music_features), axis=0)

print(len(final_noise_feature))

featuresdf = pd.DataFrame(final_noise_feature, columns=['feature','class_label'])
real_noise_X = np.array(featuresdf.feature.tolist())
real_noise_y = np.array(featuresdf.class_label.tolist())

with open('/content/drive/My Drive/Audio_Quality_Analysis/model/labelEncoderFile_With_all_sound_LE_04162020.pkl', 'rb') as fid:
    le_loaded = pickle.load(fid)

yy1 = to_categorical(le_loaded.transform(real_noise_y))

real_noise_X = np.expand_dims(real_noise_X, axis=3)

x_train1, x_test1, y_train1, y_test1 = train_test_split(real_noise_X, yy1, test_size=0.9, random_state = 42)

predict = model.predict(x_test1)
predict=np.round(predict,decimals=5)
result = map(lambda v : np.argmax(v), predict)
prediction_result = np.array(list(result))
print(prediction_result)

actual=np.round(y_test1,decimals=5)
actualresult = map(lambda v : np.argmax(v), actual)
actualresult_result = np.array(list(actualresult))
print(actualresult_result)

classification_report = metrics.classification_report(actualresult_result, prediction_result)
print("Classification report: \n", classification_report)
confusion_matrix = metrics.confusion_matrix(actualresult_result, prediction_result)
print("Confusion matrix: \n",confusion_matrix)

print("Accuracy Score: ", accuracy_score(actualresult_result, prediction_result))
print("F1 Score: ", f1_score(actualresult_result, prediction_result, average='weighted'))
print("Precision Score: ", precision_score(actualresult_result, prediction_result, average='weighted'))
print("Recall Score: ", recall_score(actualresult_result, prediction_result, average='weighted'))

46
[5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5]
[5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5]
Classification report: 
               precision    recall  f1-score   support

           5       1.00      1.00      1.00        42

    accuracy                           1.00        42
   macro avg       1.00      1.00      1.00        42
weighted avg       1.00      1.00      1.00        42

Confusion matrix: 
 [[42]]
Accuracy Score:  1.0
F1 Score:  1.0
Precision Score:  1.0
Recall Score:  1.0


In [0]:
from sklearn.model_selection import train_test_split
noise_x = np.load('/content/drive/My Drive/Audio_Quality_Analysis/data/noise_result_np_array_X1.npy', allow_pickle=True)
noise_y = np.full((len(noise_x)), 'real_noise')
#le = LabelEncoder()
yy1 = to_categorical(le.fit_transform(noise_y))

noise_x = np.expand_dims(noise_x, axis=3)

x_train1, x_test1, y_train1, y_test1 = train_test_split(noise_x, yy1, test_size=0.9, random_state = 42)

predict = model.predict(x_test1)
predict=np.round(predict,decimals=5)
result = map(lambda v : np.argmax(v), predict)
prediction_result = np.array(list(result))
print(prediction_result)

actual=np.round(y_test1,decimals=5)
actualresult = map(lambda v : np.argmax(v), actual)
actualresult_result = np.array(list(actualresult))
print(actualresult_result)

classification_report = metrics.classification_report(actualresult_result, prediction_result)
print("Classification report: \n", classification_report)
confusion_matrix = metrics.confusion_matrix(actualresult_result, prediction_result)
print("Confusion matrix: \n",confusion_matrix)

print("Accuracy Score: ", accuracy_score(actualresult_result, prediction_result))
print("F1 Score: ", f1_score(actualresult_result, prediction_result, average='weighted'))
print("Precision Score: ", precision_score(actualresult_result, prediction_result, average='weighted'))
print("Recall Score: ", recall_score(actualresult_result, prediction_result, average='weighted'))

In [0]:
actualresult_result = np.full((len(y_test1)), 1)
print(actualresult_result)
print(len(actualresult_result))