Steps to Perform for MFCC Transfer Learning
Step 1: Load the files and convert to mfcc

In [4]:
import librosa as lp
import matplotlib.pyplot as plt
import numpy as np
import os
import subprocess
import shutil

def run_preprocess(root, length, split):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            print(directory)
            mfcc_data = []
            npy_file = directory + '_' + 'mfcc' + '_' + '64' + '_' + length + '_' + split + '.npy'
            if os.path.isfile(os.path.join(subdir, directory, npy_file)):
                continue

            if not os.path.isdir(os.path.join(subdir, directory, "split", split)):
                subprocess.call(["./preprocess", os.path.join(subdir, directory), length, split])

            file_path = os.path.join(subdir, directory, "split", split, "wav")
            for filename in os.listdir(file_path):
                y, sr = lp.load(os.path.join(file_path, filename))
                mfcc = lp.feature.mfcc(y = y, sr = 16000, n_mfcc = 64)
#                 print(mfcc.shape)
                mfcc = np.pad(mfcc, pad_width=((0, 0), (0, 128)), mode='constant')
                if mfcc.shape != (64, 128):
                    mfcc = mfcc[:, :128]
#                 print(mfcc.shape)
                mfcc_data.append(mfcc)

            np.save(os.path.join(subdir, directory, npy_file), np.asarray(mfcc_data))
            print(np.asarray(mfcc_data).shape)
            shutil.rmtree(os.path.join(subdir, directory, "split"), ignore_errors = True)
        break

In [5]:
import shutil

def rename_npy(root, length, split):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            npy_file = directory + '_' + length + '_' + split + '.npy'
            new_npy_file = directory + '_' + 'mfcc' + '_' + '15' + '_' + length + '_' + split + '.npy'
            if os.path.isfile(os.path.join(subdir, directory, npy_file)):
                shutil.move(os.path.join(subdir, directory, npy_file), os.path.join(subdir, directory, new_npy_file))
        break

In [6]:
def load_features(root, length, split):
    mfcc_data = np.zeros((0, 64, 128))
    mfcc_label = []
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            npy_file = directory + '_' + 'mfcc' + '_' + '64' + '_' + length + '_' + split + '.npy'
            mfcc = np.load(os.path.join(subdir, directory, npy_file))
            mfcc_data = np.concatenate((mfcc_data, mfcc))
            mfcc_label += mfcc.shape[0] * [directory]
        break
    return mfcc_data, mfcc_label

In [7]:
def cleanup_split(root):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            shutil.rmtree(os.path.join(subdir, directory, "split"), ignore_errors = True)
        break

In [8]:
# cleanup_split('audio-train-full')
run_preprocess('audio-train-full', "4200", "3")

young_folks_history_american_revolution_1503_librivox_64kb_mp3
youngwomansguide_1501_librivox_64kb_mp3
a_country_doctor_1504_librivox_64kb_mp3
a_dreamers_tales_dm_1501_librivox_64kb_mp3
adventuresbobwhite_1505_librivox_64kb_mp3
aequanimitas_1412_librivox_64kb_mp3
agnesgreyversion3_1501_librivox_64kb_mp3
airplaneflyinghandbookvol3_1409_librivox_64kb_mp3
alondonlife_1412_librivox_64kb_mp3
ancient_modern_celebrated_freethinkers_1503_librivox_64kb_mp3
aprendizdeconspirador_1412_librivox_64kb_mp3
aristotles_masterpiece_1506_librivox_64kb_mp3
artofdivinecontentment_1512_librivox_64kb_mp3
battle-pieces_aspects_war_1501_librivox_64kb_mp3
blackriders_1410_librivox_64kb_mp3
boatsoftheglencarrig_1411_librivox_64kb_mp3
bookofgoodcounsels_1506_librivox_64kb_mp3
briefe_paulus_auswahl_1506_librivox_64kb_mp3
britishsubject_president_1505_librivox_64kb_mp3
canti_leopardi_1512_librivox_64kb_mp3
lifeofcarltonparker_1409_librivox_64kb_mp3
littlebrothertothebear_1602_librivox_64kb_mp3
childs_garden_of_vers

In [9]:
X, y = load_features('audio-train-full', "4200", "3")
X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=64)

In [11]:
import keras
import numpy as np
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

l_enc = LabelEncoder()
l_enc.fit(y_train)
y_train_enc = l_enc.transform(y_train)
y_train_norm = np_utils.to_categorical(y_train_enc)

l_enc.fit(y_test)
y_test_enc = l_enc.transform(y_test)
y_test_norm = np_utils.to_categorical(y_test_enc)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [12]:
# del X, y, y_train_enc, y_test_enc, y_train, y_test

In [15]:
from keras.layers import Activation, Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from keras.models import Sequential

model = Sequential()
model.add(Conv2D(8, (3, 3), padding='same',
                 input_shape=(64, 128, 1)))
model.add(Activation('relu'))
model.add(Conv2D(8, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(16, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(16, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Activation('relu'))
model.add(Dense(117))
model.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [None]:
# epoch = 1
# while epoch <= 30:
# model.load_weights('mfcc_model_weights_6030_3-5.h5')
model.fit(np.array(X_train), y_train_norm,
          batch_size=32,
          epochs=5,
          verbose=1,
          shuffle = True,
         validation_data=(np.array(X_test), y_test_norm))
#     model.save_weights('spect_model_weights_' + str(epoch) + '.h5')
#     epoch += 1

Train on 98942 samples, validate on 42405 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

In [18]:
model.save_weights('mfcc_model_weights_4200_3-1.h5')