In [1]:
import os
import numpy as np
import librosa
import scipy
import math
import pickle

In [None]:
#Convert to pcm16
def convert_urban_pcm24_to_pcm16():
    """Convert urbansound codec from PCM_24 to PCM_16."""
    src_dir = ['D:/Thesis/UrbanSound8K/audio/fold{:d}'.format(i+1) for i in range(10)]
    dst_dir = ['D:/Thesis/UrbanSound8K-16bit/audio/fold{:d}'.format(i+1) for i in range(10)]
    converted_wav_paths = []
    for dsrc, ddst in zip(src_dir, dst_dir):
        create_directory(ddst)
        wav_files = filter(lambda FP: FP if FP.endswith('.wav') else None, 
                           [FP for FP in os.listdir(dsrc)])
        for wav_file in wav_files:
            src_wav, dst_wav = os.path.join(dsrc, wav_file), os.path.join(ddst, wav_file)
            convert_wav(src_wav, dst_wav, subtype='PCM_16')
            converted_wav_paths.append(dst_wav)
            print('converted count:', len(converted_wav_paths))
    print(converted_wav_paths, len(converted_wav_paths))


def arange_urban_sound_file_by_class():
    """Arange urbansound files by their classes."""
    src_paths = ["D:/Thesis/UrbanSound8K/audio/fold{:d}".format(i+1) for i in range(10)]
    dst_dir = 'D:/Thesis/UrbanSound8K-16bit/audio-classified'
    CLASSES = [
        'air conditioner',
        'car horn',
        'children playing',
        'dog bark',
        'drilling',
        'engine idling',
        'gun shot',
        'jackhammer',
        'siren',
        'street music']
    CLASSES_STRIPED = [c.replace(' ', '_') for c in CLASSES]
    for src in src_paths:
        fold_dir = glob.glob(os.path.join(src, "*.wav"))
        for fn in fold_dir:
            lbl = int(fn.split('\\')[1].split('-')[1])
            dst = '{dir}/{label}'.format(dir=dst_dir, label=CLASSES_STRIPED[lbl])
            create_directory(dst)
            copy_file(fn, '{dst}/{name}'.format(dst=dst, name=fn.split('\\')[-1]))


In [None]:
convert_urban_pcm24_to_pcm16()
arange_urban_sound_file_by_class()

In [2]:
#Sort folders and convert to 16 bit pcm
#Add chimp sounds in folder labelled chimp
#Add Bg sounds
DATASET_16BIT_PATH = "D:/Thesis/UrbanSound8K-16bit/audio-classified"
#DATASET_16BIT_PATH = "D:/Thesis/Keras/Attempt5/subset"

In [3]:
def getListOfFiles(dirpath):
    # create a list of file and sub directories 
    # names in the given directory 
    listOfFile = os.listdir(dirpath)
    allFiles = list()
    # Iterate over all the entries
    for entry in listOfFile:
        # Create full path
        fullPath = os.path.join(dirpath, entry)
        # If entry is a directory then get the list of files in this directory 
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)        
    
    return allFiles

def urban_labels(Y, fpaths):
    """urban sound dataset labels."""
    urban_label = lambda path: int(os.path.split(path)[-1].split('-')[1])
    for p in fpaths:
        Y = np.append(Y, [urban_label(p)])
    return Y

def load_sound_files(file_paths):

    X,sr = librosa.load(file_paths, sr=11025, res_type='kaiser_fast')
    mfccs=np.mean(librosa.feature.mfcc(y=X,sr=sr,n_mfcc=40).T,axis=0)

    return mfccs

In [4]:
filepaths = np.asarray(getListOfFiles(DATASET_16BIT_PATH))
print("No of entries in our Dataset: ", filepaths.shape)

Y = np.ndarray(0)
labels = urban_labels(Y, filepaths)
print("Labels : ",labels.shape)

with open("filepaths.txt", "wb") as fp:   #Pickling
    pickle.dump(filepaths, fp)
    
with open("labels.txt", "wb") as fp:   #Pickling
    pickle.dump(labels, fp)

No of entries in our Dataset:  (11708,)
Labels :  (11708,)


In [5]:
raw = []
for i in range(len(filepaths)):
    #len(filepaths) when ready
    raw.append(load_sound_files(filepaths[i]))
    if i%100 == 0 :
        print("Raw Sound loaded for :", filepaths[i])

#np.save("Attempt5/raw",np.asarray(raw))
#del raw

Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\air_conditioner\100852-0-0-0.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\air_conditioner\13230-0-0-22.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\air_conditioner\146714-0-0-41.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\air_conditioner\162103-0-0-14.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\air_conditioner\177726-0-0-15.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\air_conditioner\178686-0-0-43.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\air_conditioner\189982-0-0-20.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\air_conditioner\204240-0-0-23.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\air_conditioner\57320-0-0-5.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\air_cond

Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\engine_idling\62567-5-0-1.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\engine_idling\94710-5-0-1.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\gun_shot\145206-6-2-0.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\gun_shot\159710-6-0-0.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\gun_shot\197320-6-9-0.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\jackhammer\103074-7-4-6.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\jackhammer\105029-7-3-2.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\jackhammer\14772-7-2-0.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\jackhammer\165039-7-12-0.wav
Raw Sound loaded for : D:/Thesis/UrbanSound8K-16bit/audio-classified\jackhammer\177537-7-0-1.wav
Raw Sound loaded for : D:/Thesis

FileNotFoundError: [Errno 2] No such file or directory: 'Attempt5/raw.npy'

In [6]:
np.save("Attempt5-NN/raw",np.asarray(raw))
#del raw
raw = np.load("Attempt5-NN/raw.npy")

In [8]:
from sklearn.model_selection import train_test_split
XTrain,XTest,YTrain,YTest=train_test_split(raw,labels,test_size=0.2)

from keras.utils import to_categorical

YTrain = to_categorical(YTrain, num_classes=12)
YTest = to_categorical(YTest, num_classes=12)

del raw

In [9]:
import keras
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D, GlobalAveragePooling2D
from keras import optimizers
from keras.utils import np_utils
from sklearn import metrics 


In [13]:
# copied 
try_model = Sequential()

try_model.add(Dense(256, input_shape=(40,)))
try_model.add(Activation('relu'))
try_model.add(Dropout(0.5))

try_model.add(Dense(256))
try_model.add(Activation('relu'))
try_model.add(Dropout(0.5))

# try_model.add(Dense(256))
# try_model.add(Activation('relu'))
# try_model.add(Dropout(0.5))

try_model.add(Dense(256))
try_model.add(Activation('relu'))
try_model.add(Dropout(0.5))

try_model.add(Dense(12))
try_model.add(Activation('softmax'))

try_model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [14]:
try_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 256)               10496     
_________________________________________________________________
activation_5 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 256)               65792     
_________________________________________________________________
activation_6 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 256)               65792     
__________

In [15]:
try_model.fit(np.array(XTrain), np.array(YTrain), batch_size=32, epochs=200, validation_data=(np.array(XTest), np.array(YTest)))

Train on 9366 samples, validate on 2342 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200


Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200


Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200


Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<keras.callbacks.History at 0x277cbf52128>

In [16]:
#On training Set
train_evaluate = try_model.evaluate(x=np.array(XTrain), y=np.array(YTrain), batch_size=32)
print(train_evaluate)
#On test Set
test_evaluate = try_model.evaluate(x=np.array(XTest), y=np.array(YTest), batch_size=32)
print(test_evaluate)

[0.2650230877831683, 0.9221652786675208]
[0.39296177146681144, 0.8787361229209183]


In [17]:
#voila

In [18]:
from sklearn.metrics import classification_report
import numpy as np

#Test Set Metrics
Y_test = np.argmax(YTest, axis=1) # Convert one-hot to index
y_pred = try_model.predict_classes(XTest)
print(classification_report(Y_test, y_pred))

             precision    recall  f1-score   support

          0       0.80      0.89      0.84       206
          1       0.99      0.86      0.92        99
          2       0.61      0.79      0.69       189
          3       0.84      0.68      0.75       191
          4       0.90      0.85      0.88       188
          5       0.95      0.92      0.93       200
          6       0.97      0.66      0.79        92
          7       0.88      0.98      0.93       213
          8       0.94      0.91      0.93       171
          9       0.76      0.75      0.76       199
         10       0.99      0.99      0.99       289
         11       0.99      1.00      1.00       305

avg / total       0.89      0.88      0.88      2342



In [19]:
#Train Set Metrics
Y_train = np.argmax(YTrain, axis=1) # Convert one-hot to index
y_pred = try_model.predict_classes(XTrain)
print(classification_report(Y_train, y_pred))

             precision    recall  f1-score   support

          0       0.85      0.94      0.89       794
          1       1.00      0.95      0.97       330
          2       0.69      0.85      0.76       811
          3       0.95      0.80      0.87       809
          4       0.95      0.90      0.93       812
          5       0.99      0.95      0.97       800
          6       0.97      0.71      0.82       282
          7       0.89      0.99      0.94       787
          8       0.96      0.96      0.96       758
          9       0.89      0.82      0.86       801
         10       1.00      1.00      1.00      1203
         11       1.00      1.00      1.00      1179

avg / total       0.93      0.92      0.92      9366



In [21]:
#testing on other samples
import IPython.display as ipd

inf = []
inf.append(load_sound_files('Attempt5-NN/Chimp_inference.wav'))
ipd.display(ipd.Audio('Attempt5-NN/Chimp_inference.wav'))

# inf.append(load_sound_files('Attempt5/Chimp_inference2.wav'))
# ipd.display(ipd.Audio('Attempt5/Chimp_inference2.wav'))

# inf.append(load_sound_files('Attempt5/Chimp_inference3.wav'))
# ipd.display(ipd.Audio('Attempt5/Chimp_inference3.wav'))

inf.append(load_sound_files('Attempt5-NN/Chimpanzee_Sound_Effect_1.wav'))
ipd.display(ipd.Audio('Attempt5-NN/Chimpanzee_Sound_Effect_1.wav'))

inf.append(load_sound_files('Attempt5-NN/Chimpanzee_Sound_Effect_2.wav'))
ipd.display(ipd.Audio('Attempt5-NN/Chimpanzee_Sound_Effect_2.wav'))

# inf.append(load_sound_files('Attempt5/Chimpanzee_Sound_Effect_3.wav'))
# ipd.display(ipd.Audio('Attempt5/Chimpanzee_Sound_Effect_3.wav'))

inf.append(load_sound_files('Attempt5-NN/Chimpanzee_Sound_Effect_4.wav'))
ipd.display(ipd.Audio('Attempt5-NN/Chimpanzee_Sound_Effect_4.wav'))


In [24]:
inf_pred = try_model.predict_classes(np.array(inf))
print(inf_pred)

[10 10  3 10]


### Conclusion 
  100% precision recall metrics for the chimp calls raises questions regarding overfitting. <br>
  The model has overfit and does not generalize well for other input formats which have chimp calls.<br>
  In the above example, the model has predicted some chimp calls(10) as dog_barks(3). 
  Solution : Gotta standardize the dataset a bit more. <br>
  Added a few files with different sample rates : Same effect of overfitting <br>
  Working : Appended chimp files with 334 low bitrate files, 174 youtube files of 4 secs length 


In [23]:
inf_pred = try_model.predict(np.array(inf))
inf_pred

array([[0.0000000e+00, 0.0000000e+00, 1.3622347e-25, 5.9886154e-29,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        7.8717925e-30, 0.0000000e+00, 1.0000000e+00, 0.0000000e+00],
       [2.6856870e-12, 2.4414741e-11, 7.9392940e-03, 6.4363959e-04,
        6.4986314e-09, 1.3281738e-11, 1.3543891e-07, 6.4560377e-19,
        4.0771301e-06, 9.0443208e-09, 9.9141288e-01, 7.7706753e-18],
       [2.5656612e-03, 1.4101674e-03, 1.9903398e-01, 3.5718465e-01,
        4.6280599e-03, 8.6301500e-03, 8.1848070e-02, 1.0121330e-05,
        1.7681420e-01, 5.0936669e-02, 1.1629251e-01, 6.4575009e-04],
       [2.5963948e-14, 1.9519315e-13, 2.1272654e-05, 9.2184364e-06,
        1.4755312e-11, 1.6721545e-12, 6.5447653e-10, 3.7647406e-22,
        1.1074900e-06, 2.9868255e-10, 9.9996841e-01, 5.3742082e-18]],
      dtype=float32)

0 = air_conditioner
1 = car_horn
2 = children_playing
3 = dog_bark
4 = drilling
5 = engine_idling
6 = gun_shot
7 = jackhammer
8 = siren
9 = street_music
10 = chimp
11 = background

In [None]:
#Raspberry pi