In [27]:
import os
import pandas as pd

data_dir='C:/Users/ashwin.s/Documents/Urban Sound Classification/Data' 
train = pd.read_csv(os.path.join(data_dir, 'train_1.csv'))

train.Class.value_counts(normalize=True)

#train=train.values
#print(train[0],train.shape[0])

jackhammer          0.122907
engine_idling       0.114811
siren               0.111684
drilling            0.110396
dog_bark            0.110396
air_conditioner     0.110396
children_playing    0.110396
street_music        0.110396
car_horn            0.056302
gun_shot            0.042318
Name: Class, dtype: float64

In [28]:
import os
import glob
import librosa
import numpy as np
import math

def windows(data, window_size):
    start = 0
    while start < len(data):
        yield start, start + window_size
        start += (window_size / 2)
        
def extract_features(filename, bands = 60, frames = 41):
    window_size = 512 * (frames - 1)
    log_specgrams = []
    sound_clip,s = librosa.load(filename)        
    
    
    if(len(sound_clip)<window_size):
        sound_clip,s=librosa.load(filename,sr=22050*math.ceil(window_size/len(sound_clip)))
    
    
    for (start,end) in windows(sound_clip,window_size):
            start = int(start)
            end = int(end)
            if(len(sound_clip[start:end]) == window_size):
                signal = sound_clip[start:end]
                melspec = librosa.feature.melspectrogram(signal, n_mels = bands)
                logspec = librosa.amplitude_to_db(melspec)
                logspec = logspec.T.flatten()[:, np.newaxis].T
                log_specgrams.append(logspec)
    
    log_specgrams = np.asarray(log_specgrams).reshape(len(log_specgrams),bands,frames,1)
    features = np.concatenate((log_specgrams, np.zeros(np.shape(log_specgrams))), axis = 3)
    for i in range(len(features)):
        features[i, :, :, 1] = librosa.feature.delta(features[i, :, :, 0])
    
    return np.array(features)

In [29]:
import numpy as np

def save_features(df,data_dir):#labels
    fea=np.empty((0,60,41,2))
    labels=[]
    
    for i in range (df.shape[0]):
        filename=os.path.join(data_dir,str(df[i][0])+'.wav')
        label=df[i][1]
    
        feature = extract_features(filename)
        
        for j in range(feature.shape[0]):
            labels.append(label)
            
        fea=np.concatenate((fea,feature),axis=0)
   
    return fea,labels
   

In [30]:
import pickle

data_dir='C:/Users/ashwin.s/Documents/Urban Sound Classification/Data' 
#train=pd.read_csv(os.path.join(data_dir, 'train_2.csv'))

dir1 ='C:/Users/ashwin.s/Documents/Urban Sound Classification/Data/Train'
      

train=train.values

cnnX,cnnY=save_features(train,dir1)
print(cnnX.shape,len(cnnY))


with open('cnnX.pickle', 'wb') as handle:
     pickle.dump(cnnX, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
with open('cnnY.pickle', 'wb') as handle:
     pickle.dump(cnnY, handle, protocol=pickle.HIGHEST_PROTOCOL)


(33850, 60, 41, 2) 33850


In [1]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
import pickle

with open('cnnX.pickle','rb') as handle:
     cnnX=pickle.load(handle)
with open('cnnY.pickle','rb') as handle:
     cnnY=pickle.load(handle)


lb=LabelEncoder();

cnnY=np_utils.to_categorical(lb.fit_transform(cnnY))
print(cnnY[0],cnnY.shape,cnnX.shape)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


[0. 0. 0. 0. 0. 0. 0. 0. 1. 0.] (33850, 10) (33850, 60, 41, 2)


In [2]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD, Adam
from keras.callbacks import EarlyStopping
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score

def build_model():
    
    model = Sequential()
    # input: 60x41 data frames with 2 channels => (60,41,2) tensors

    # filters of size 1x1 
    f_size = 1

    # first layer has 48 convolution filters 
    model.add(Convolution2D(48, f_size, strides=f_size, kernel_initializer='normal', padding='same', input_shape=(60, 41, 2)))
    model.add(Convolution2D(48, f_size, strides=f_size, kernel_initializer='normal', padding='same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))

    # next layer has 96 convolution filters
    model.add(Convolution2D(96, f_size, strides=f_size, kernel_initializer='normal', padding='same'))
    model.add(Convolution2D(96, f_size, strides=f_size, kernel_initializer='normal', padding='same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))

    # flatten output into a single dimension 
    # Keras will do shape inference automatically
    model.add(Flatten())

    # then a fully connected NN layer
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    # finally, an output layer with one node per class
    model.add(Dense(cnnY.shape[1]))
    model.add(Activation('softmax'))

    # use the Adam optimiser
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0)
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=adam)
    
    return model

In [3]:
model = build_model()

# a stopping function to stop training before we excessively overfit to the training set
earlystop = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto')

# now fit the model to the training data, evaluating loss against the validation data
print("Training model...")
history=model.fit(cnnX, cnnY, batch_size=20, epochs=150,validation_split=0.03)

Training model...
Train on 32834 samples, validate on 1016 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
  420/32834 [..............................] - ETA: 5:05 - loss: 0.7809 - acc: 0.7095

KeyboardInterrupt: 

In [5]:
from matplotlib import pyplot as plt

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

NameError: name 'history' is not defined

In [6]:
def test_features(df,data_dir):#labels
    fea=[]
    id=[]
    
    for i in range (df.shape[0]):
        filename=os.path.join(data_dir,str(df[i][0])+'.wav')
    
        feature = extract_features(filename)
        
        fea.append(feature)
        id.append(df[i])

    return fea,id

In [9]:
import pickle

dir_test='C:/Users/ashwin.s/Documents/Urban Sound Classification/Data/Test'

test = pd.read_csv(os.path.join(data_dir, 'test_data.csv'))
test=test.values

cnnX_test,cnnY_test=test_features(test,dir_test)
print(len(cnnX_test) , cnnX_test[0].shape , len(cnnY_test))


with open('cnnX_test.pickle', 'wb') as handle:
     pickle.dump(cnnX_test, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
with open('cnnY_test.pickle', 'wb') as handle:
     pickle.dump(cnnY_test, handle, protocol=pickle.HIGHEST_PROTOCOL)

KeyboardInterrupt: 

In [8]:
import numpy as np

with open('cnnX_test.pickle','rb') as handle:
     cnnX_test=pickle.load(handle)
with open('cnnY_test.pickle','rb') as handle:
     cnnY_test=pickle.load(handle)

labels=[]
id=[]
for i in range (len(cnnX_test)):
    feature=cnnX_test[i]
    l=np.zeros((10,))
    
    #print(feature.shape)
    for j in range(feature.shape[0]):
        #print(j)
        fea=feature[j].reshape(1,60,41,2)
        result=model.predict(fea)
        result=result.argmax(axis=-1)
        l[result]+=1
    l=l.argmax(axis=-1)
    labels.append(l)
    id.append(cnnY_test[i][0])
    
print(len(labels) , labels[1])

3297 3


In [9]:
d={
    0:'air_conditioner',
    1:'car_horn',
    2:'children_playing',
    3:'dog_bark',
    4:'drilling',
    5:'engine_idling',
    6:'gun_shot',
    7:'jackhammer',
    8:'siren',
    9:'street_music',
}


In [12]:
import pandas as pd

resultL=[]

for i in range(len(labels)):
    resultL.append(d[labels[i]])

temp2 = pd.DataFrame(
    {
     'Class': resultL,
     'ID': id,
    })

temp2.to_csv('C:/Users/ashwin.s/Documents/Urban Sound Classification/Data/result_CNN.csv',index=False)