In [1]:
data_dir = 'Training'

In [2]:
import librosa as lp
import matplotlib.pyplot as plt
import numpy as np
import os
import subprocess
import shutil
from scikits.talkbox import lpc


def convert_to_lpc(filename,number_of_coefficients):
    wave, sr = lp.load(filename, mono=True, sr=None)
    lpc_signal=lpc(wave,number_of_coefficients)
    lpcc_signal=lpcc(lpc_signal[0],lpc_signal[1])
    return lpcc_signal
                    

def lpcc(seq, err_term, order=None):
    if order is None:
        order = len(seq) - 1
    lpcc_coeffs = [np.log(err_term), -seq[0]]
    for n in xrange(2, order + 1):
        # Use order + 1 as upper bound for the last iteration
        upbound = (order + 1 if n > order else n)
        lpcc_coef = -sum(i * lpcc_coeffs[i] * seq[n - i - 1]
                         for i in xrange(1, upbound)) * 1. / upbound
        lpcc_coef -= seq[n - 1] if n <= len(seq) else 0
        lpcc_coeffs.append(lpcc_coef)
    return lpcc_coeffs


def run_preprocess(root):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
#             print(directory)
            lpcc_data = []
            npy_file = directory + '_' + 'lpcc' + '_'+ '.npy'
            file_path = os.path.join(subdir, directory)
            print(file_path)
            for filename in os.listdir(file_path):
                lpcc_data.append(convert_to_lpc(file_path+"\\"+filename,24))

            np.save(os.path.join(subdir, directory, npy_file), np.asarray(lpcc_data))
            print(np.asarray(lpcc_data).shape)
        break
            


In [None]:
import shutil

def rename_npy(root, length, split):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            npy_file = directory + '_' + length + '_' + split + '.npy'
            new_npy_file = directory + '_' + 'mfcc' + '_' + str(512) + '_' + length + '_' + split + '.npy'
            if os.path.isfile(os.path.join(subdir, directory, npy_file)):
                shutil.move(os.path.join(subdir, directory, npy_file), os.path.join(subdir, directory, new_npy_file))
        break

In [58]:
def load_features(root):
    lpcc_data=[]
    lpcc_label = []
    for subdir, dirs, files in os.walk(root):
        count=0
        for directory in dirs:
            npy_file = directory + '_' + 'lpcc' + '_' + '.npy'
            if(count==0):
                lpcc_data= np.load(os.path.join(subdir, directory, npy_file))
                lpcc_label=lpcc_data.shape[0]*[directory.split('.')[0]]
            else:
                lpcc=np.load(os.path.join(subdir, directory, npy_file))
                lpcc_data=np.vstack((lpcc_data,lpcc))
                lpcc_label += lpcc.shape[0] * [directory.split('.')[0]]
            count+=1
        break
    return lpcc_data, lpcc_label

In [None]:
def cleanup_split(root):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            shutil.rmtree(os.path.join(subdir, directory, "split"), ignore_errors = True)
        break

In [None]:
import glob

def cleanup_merged(root):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            for f in glob.glob(os.path.join(subdir, directory, "*_merged*.*")):
                os.remove(f)
        break

def cleanup_npy(root):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            for f in glob.glob(os.path.join(subdir, directory, "*.npy")):
                os.remove(f)
        break

In [None]:
def rename_samples(root):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            shutil.move(os.path.join(subdir, directory), os.path.join(subdir, directory.split("_")[0]))
        break

def remove_extra_samples(root):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            name_o = directory.split(".") 
            if len(name_o) == 2 and int(name_o[1]) > 10:
                shutil.rmtree(os.path.join(subdir, directory))
        break
    
def distribute_samples(root):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            if len(directory.split(".")) != 1:
                continue
            for d_subdir, d_dirs, d_files in os.walk(os.path.join(root, directory)):
                for i, sample in enumerate(d_files):
                    os.makedirs(os.path.join(subdir, directory + "." + str(i + 1)))
                    shutil.move(os.path.join(subdir, directory, sample), \
                                os.path.join(subdir, directory + "." + str(i + 1), sample))
                break
        remove_extra_samples(root)
        break

In [3]:
# cleanup_split(data_dir)
# cleanup_npy(data_dir)
# rename_samples(data_dir)
# distribute_samples(data_dir)
run_preprocess(data_dir)

Training\1
(292L, 25L)
Training\2
(102L, 25L)
Training\3
(109L, 25L)
Training\4
(108L, 25L)
Training\5
(97L, 25L)
Training\6
(111L, 25L)
Training\7
(81L, 25L)


In [76]:
from IPython.core.debugger import set_trace
X, y = load_features(data_dir)
X=np.asarray(X)
y=np.asarray(y)
X = X.reshape(X.shape[0], X.shape[1],1,1)

(900L, 25L)


In [77]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=64)

In [78]:
print(X.shape)
print(y_train.shape)


(900L, 25L, 1L, 1L)
(630L,)


In [72]:
import keras
import numpy as np
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

l_enc = LabelEncoder()
l_enc.fit(y_train)
y_train_enc = l_enc.transform(y_train)
y_train_norm = np_utils.to_categorical(y_train_enc)

l_enc.fit(y_test)
y_test_enc = l_enc.transform(y_test)
y_test_norm = np_utils.to_categorical(y_test_enc)

In [None]:
# del X, y, y_train_enc, y_test_enc, y_train, y_test
X_train = X_train[:, :64, :, :]
X_test = X_test[:, :64, :, :]

In [81]:
print(1)
from keras.layers import Activation, Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from keras.models import Sequential

model = Sequential()
model.add(Conv2D(32, kernel_size=(2, 2), activation='relu',
                 input_shape=(25,1,1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Activation('relu'))
model.add(Dropout(0.25))

model.add(Conv2D(32, kernel_size=(2, 2), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dense())
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(len(y_test_norm[0])))
model.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
ctr = 1

1


TypeError: __init__() takes at least 2 arguments (1 given)

In [None]:
from keras.utils import plot_model
plot_model(model, to_file='mfcc_model.png', show_shapes=True)

In [80]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 24, 0, 32)         160       
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 12, 0, 32)         0         
_________________________________________________________________
activation_15 (Activation)   (None, 12, 0, 32)         0         
_________________________________________________________________
dropout_12 (Dropout)         (None, 12, 0, 32)         0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 12, 0, 32)         4128      
_________________________________________________________________
activation_16 (Activation)   (None, 12, 0, 32)         0         
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 6, 0, 32)          0         
__________

In [55]:
train_result = model.fit(np.array(X_train), y_train_norm,
          batch_size=16,
          epochs=30,
          verbose=1,
          shuffle = True,
         validation_data=(np.array(X_test), y_test_norm))

ValueError: Error when checking input: expected conv2d_6_input to have 4 dimensions, but got array with shape (630L, 25L, 1L)

In [None]:
model.save_weights('mfcc_model_weights_' + str(audio_len) + '_' + str(window_size) + '-' + str(n_mfcc) + '_' + str(ctr) + '.h5')
ctr += 1

In [None]:
import matplotlib.pyplot as plt

plt.style.use('dark_background')

plt.plot(train_result.history['acc'], color="#5599FF")
plt.plot(train_result.history['val_acc'], color="#55FF99")
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()