In [1]:
import os
import json

import numpy as np
import pandas


from keras.models import Sequential
from keras.layers import Dense, Conv2D, BatchNormalization, Activation, InputLayer, LeakyReLU, Reshape, Flatten
from keras.layers.pooling import MaxPooling1D, MaxPooling2D, AveragePooling1D, AveragePooling2D

from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint

from keras.optimizers import Adam

import librosa

Using TensorFlow backend.


In [2]:
def load_index(fold_id, key='train', n=None):
    
    with open(os.path.join(DESTDIR, 'experiment_{:02d}.json'.format(fold_id)), 'r') as fdesc:
        index = json.load(fdesc)[key]
    
    perm = np.random.permutation(len(index))
    if n is not None:
        perm = perm[:n]
        
    index = [index[_] for _ in perm]
    
    return index

In [3]:
def preprocess(X):
    return librosa.logamplitude(X**2, ref_power=np.max, top_db=80)


def load_data(index, n_harm=1):
    
    X, Y = [], []
    for item in index:
        with np.load(os.path.join(DESTDIR, 'features', '{}.npz'.format(item['filename']))) as data:
            if n_harm is None:
                n_harm = data['C'].shape[0]
            X.append(preprocess(data['C'][:n_harm]))
            Y.append(item['classID'])
    
    X = np.asarray(X).swapaxes(3, 1)
    Y = np.asarray(Y)
    
    return X, Y

In [4]:
DESTDIR = '/home/bmcfee/working/UrbanSound8K/'

In [5]:
FOLD_ID = 1

In [6]:
index = load_index(FOLD_ID)

In [7]:
test_index = load_index(FOLD_ID, key='test')

In [75]:
N_HARM = 1

In [76]:
X, Y = load_data(index, n_harm=N_HARM)

In [77]:
Xt, Yt = load_data(test_index, n_harm=N_HARM)

In [78]:
model = Sequential()

model.add(InputLayer(input_shape=X.shape[1:], name='input'))
model.add(BatchNormalization())

model.add(Conv2D(16, 5, (4 - X.shape[3] + 1) * 5, bias=False))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))

model.add(MaxPooling2D(pool_size=(5, 5)))

model.add(Conv2D(32, 5, 5,  bias=False))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))

#model.add(MaxPooling2D(pool_size=(5, 5)))

model.add(Conv2D(64, 3, model.output_shape[2], bias=False))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))


model.add(AveragePooling2D(pool_size=(model.output_shape[1], 1)))
model.add(Flatten())

model.add(Dense(10, activation='softmax'))



In [79]:
model.compile(Adam(), 'categorical_crossentropy', metrics=['accuracy'])

In [80]:
weight_fn = '/home/bmcfee/working/UrbanSound8K/models_{}.hdf5'.format(N_HARM)

In [81]:
history = model.fit(X, to_categorical(Y, nb_classes=10),
                    callbacks=[EarlyStopping('val_loss', patience=10),
                               ModelCheckpoint(weight_fn, save_best_only=True)],
                    validation_split=0.25, batch_size=32, shuffle=True,
                    nb_epoch=30)

Train on 5894 samples, validate on 1965 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [82]:
model.load_weights(weight_fn)

In [83]:
print(N_HARM)
model.evaluate(Xt, to_categorical(Yt))

1


[1.3314544711462548, 0.70103092797160282]

In [74]:
print(N_HARM)
model.evaluate(Xt, to_categorical(Yt))

2


[2.1324297761862492, 0.65063001165958068]

In [64]:
print(N_HARM)
model.evaluate(Xt, to_categorical(Yt))

3


[1.8588614084193524, 0.65979381470609233]

In [55]:
print(N_HARM)
model.evaluate(Xt, to_categorical(Yt))

4


[1.9130964667663268, 0.70332187871616181]