In [1]:
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf
from matplotlib.pyplot import specgram
import math
from random import shuffle
%matplotlib inline
plt.style.use('ggplot')

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 13

In [2]:
def appendSounds(data,labels,fp):
    X, sr = sf.read(fp)
    sound = np.array(X)
    # librosa operates on (lenght, channels) matrices, wheras soundfile gave us (channels, lenght) 
    # so we transpose
    sound = np.transpose(sound)
    sound = librosa.core.to_mono(sound)
    # resample so every wave has same sampling rate
    sound = librosa.core.resample(sound, sr, 10000)
    # set class number
    classNumber = int(fp.split("/")[1].split("-")[1])
    # compute and set mel spectrogram
    mel = librosa.feature.melspectrogram(sound, sr=10000,n_mels=60, hop_length=506)
    iterator, sliceSize = 0,10
    while iterator+10<=mel.shape[1]:
        data.append(mel[:,iterator:iterator+10].flatten())
        labels.append(classNumber)
        iterator+=10

In [3]:
from multiprocessing import Process, Lock, Pipe,Event
import time
from keras import utils



trainData,trainLabels= [],[]
tuneData,tuneLabels=[],[]
testData,testLabels=[],[]
def add(x,c):
    tempdata,templabels = [],[]
    for file in glob.glob(x):
        appendSounds(tempdata, templabels, file)
    c.send(zip(tempdata, templabels))
    del tempdata,templabels
        
ts = time.time()

if __name__ == '__main__':
    threads= []
    connections=[0]*10
    lock = Lock()
    for x in xrange(1,11):
        connections[x-1], childPipe=Pipe()
        threads.append(Process(target=add, args=("fold"+str(x)+"/*.wav",childPipe)))         
        threads[x-1].start()
    fold = 1
    for x,y in zip(threads,connections):
        tD, tL = zip(*y.recv())
        if fold <= 8:
            trainData += tD
            trainLabels += tL
#         elif fold == 8 :
#             tuneData += tD
#             tuneLabels += tL
        else:
            testData += tD
            testLabels += tL
        fold+=1
    
    trainData, trainLabels = np.array(trainData), np.array(utils.to_categorical(trainLabels, num_classes=10))
    tuneData, tuneLabels = np.array(tuneData), np.array(utils.to_categorical(tuneLabels, num_classes=10))
    testData, testLabels = np.array(testData), np.array(utils.to_categorical(testLabels, num_classes=10))
    
    print "I'm done, time was:" , time.time()-ts

Using TensorFlow backend.


I'm done, time was: 649.290421963


In [4]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
net = Sequential()
net.add(Dense(1000, activation='relu', input_dim=1280))
net.add(Dense(600, activation='relu'))
net.add(Dense(170, activation='relu'))
net.add(Dense(10, activation='softmax'))

net.compile(loss='categorical_crossentropy',
           optimizer = SGD(lr=0.0003),
           metrics = ['accuracy'])
net.fit(trainData,trainLabels, epochs = 100, batch_size=16, verbose=2)
score=net.evaluate(testData,testLabels, batch_size=16)
print score

Epoch 1/100
55s - loss: 2.4641 - acc: 0.4174
Epoch 2/100
49s - loss: 1.8108 - acc: 0.5266
Epoch 3/100
49s - loss: 1.6093 - acc: 0.5665
Epoch 4/100
49s - loss: 1.4734 - acc: 0.5972
Epoch 5/100
49s - loss: 1.3860 - acc: 0.6163
Epoch 6/100
49s - loss: 1.3199 - acc: 0.6310
Epoch 7/100
50s - loss: 1.2561 - acc: 0.6415
Epoch 8/100
49s - loss: 1.2178 - acc: 0.6501
Epoch 9/100
49s - loss: 1.1709 - acc: 0.6598
Epoch 10/100
49s - loss: 1.1654 - acc: 0.6624
Epoch 11/100
49s - loss: 1.1324 - acc: 0.6688
Epoch 12/100
49s - loss: 1.1141 - acc: 0.6729
Epoch 13/100
49s - loss: 1.0580 - acc: 0.6866
Epoch 14/100
49s - loss: 1.0504 - acc: 0.6881
Epoch 15/100
49s - loss: 1.0505 - acc: 0.6875
Epoch 16/100
49s - loss: 0.9988 - acc: 0.7009
Epoch 17/100
49s - loss: 0.9871 - acc: 0.7041
Epoch 18/100
48s - loss: 0.9789 - acc: 0.7064
Epoch 19/100
49s - loss: 0.9519 - acc: 0.7137
Epoch 20/100
49s - loss: 0.9248 - acc: 0.7199
Epoch 21/100
49s - loss: 0.9289 - acc: 0.7197
Epoch 22/100
49s - loss: 0.9797 - acc: 0.71

In [69]:
print score

[2.0773913470494461, 0.4402383750109779]
