In [1]:
# retrieve the preprocessed data from previous notebook

%store -r x_train 
%store -r x_test 
%store -r y_train 
%store -r y_test 
%store -r yy 
%store -r le

In [2]:
import numpy as np

def extract_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None 
     
    return mfccs

In [3]:
# Load various imports 
import pandas as pd
import os
import librosa

# Set the path to the full UrbanSound dataset 
fulldatasetpath = 'sample audio'

metadata = pd.read_csv('esc50.csv')

features = []

# Iterate through each sound file and extract the features 
for index, row in metadata.iterrows():
    
    file_name = os.path.join(os.path.abspath(fulldatasetpath),str(row["filename"]))
    
    class_label = row["category"]
    data = extract_features(file_name)
    
    features.append([data, class_label])

# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

print('Finished feature extraction from ', len(featuresdf), ' files') 

Finished feature extraction from  2000  files


In [4]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y)) 

# split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

Using TensorFlow backend.


#### The following is the CNN model. 1st input layer: (40,216,1), activation: Relu, Kernal filter zixe: (2x2), dropout: 20%, output layer: 50 nodes because there are 50 classes. last layer will be dense layer with softmax activation function.

In [5]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

num_rows = 40
num_columns = 216
num_channels = 1

x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)

num_labels = yy.shape[1]
filter_size = 2

# Construct model 
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())

model.add(Dense(num_labels, activation='softmax')) 

In [6]:
# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') 

In [7]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=1)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 39, 215, 16)       80        
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 19, 107, 16)       0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 19, 107, 16)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 18, 106, 32)       2080      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 9, 53, 32)         0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 9, 53, 32)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 8, 52, 64)        

##  Training

In [20]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

#num_epochs = 40
#num_batch_size = 128 
#test acc: 58.2%

num_epochs = 50
num_batch_size = 128

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_cnn.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Train on 1600 samples, validate on 400 samples
Epoch 1/50

Epoch 00001: val_loss improved from inf to 2.59283, saving model to saved_models/weights.best.basic_cnn.hdf5
Epoch 2/50

Epoch 00002: val_loss improved from 2.59283 to 2.56055, saving model to saved_models/weights.best.basic_cnn.hdf5
Epoch 3/50

Epoch 00003: val_loss improved from 2.56055 to 2.44581, saving model to saved_models/weights.best.basic_cnn.hdf5
Epoch 4/50

Epoch 00004: val_loss improved from 2.44581 to 2.44319, saving model to saved_models/weights.best.basic_cnn.hdf5
Epoch 5/50

Epoch 00005: val_loss did not improve from 2.44319
Epoch 6/50

Epoch 00006: val_loss did not improve from 2.44319
Epoch 7/50

Epoch 00007: val_loss did not improve from 2.44319
Epoch 8/50

Epoch 00008: val_loss did not improve from 2.44319
Epoch 9/50

Epoch 00009: val_loss did not improve from 2.44319
Epoch 10/50

Epoch 00010: val_loss did not improve from 2.44319
Epoch 11/50

Epoch 00011: val_loss did not improve from 2.44319
Epoch 12/50

E


Epoch 00041: val_loss did not improve from 2.44319
Epoch 42/50

Epoch 00042: val_loss did not improve from 2.44319
Epoch 43/50

Epoch 00043: val_loss did not improve from 2.44319
Epoch 44/50

Epoch 00044: val_loss did not improve from 2.44319
Epoch 45/50

Epoch 00045: val_loss did not improve from 2.44319
Epoch 46/50

Epoch 00046: val_loss did not improve from 2.44319
Epoch 47/50

Epoch 00047: val_loss did not improve from 2.44319
Epoch 48/50

Epoch 00048: val_loss did not improve from 2.44319
Epoch 49/50

Epoch 00049: val_loss did not improve from 2.44319
Epoch 50/50

Epoch 00050: val_loss did not improve from 2.44319
Training completed in time:  0:06:16.276303


In [21]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.9893749952316284
Testing Accuracy:  0.5849999785423279


In [22]:
# function for prediction on new wav files
def print_prediction(file_name):
    prediction_feature = extract_features(file_name) 
    prediction_feature = prediction_feature.reshape(1, num_rows, num_columns, num_channels)

    predicted_vector = model.predict_classes(prediction_feature)
    predicted_class = le.inverse_transform(predicted_vector) 
    print("The predicted class is:", predicted_class[0], '\n') 

    predicted_proba_vector = model.predict_proba(prediction_feature) 
    predicted_proba = predicted_proba_vector[0]
    for i in range(len(predicted_proba)): 
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )

## Validation

In [30]:
filename = 'validation audio/siren.wav'
print_prediction(filename) 
# need a wav file with (40,2016,1) for testing

ValueError: cannot reshape array of size 6920 into shape (1,40,216,1)