### Spectrogram based Transfer Learning

In [59]:
window_size = 10
audio_len = 60
audio_len_nn = 150
data_dir = 'audio-train-transfer'
n_samples = 112

#### Data cleanup

In [2]:
import matplotlib.pyplot as plt
import os
import subprocess

def run_preprocess(root, length, split):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            subprocess.call(["./preprocess_transfer", os.path.join(subdir, directory), length, split])
        break

In [29]:
import numpy as np
import librosa as lp
from scikits.talkbox import lpc


def convert_to_lpc(filename,number_of_coefficients):
    wave, sr = lp.load(filename, mono=True, sr=None)
    lpc_signal=lpc(wave,number_of_coefficients)
#     lpcc_signal=lpcc(lpc_signal[0],lpc_signal[1])
    return np.hstack((lpc_signal[0],lpc_signal[1],lpc_signal[2]))

def load_features(root, split):
    lpcc_data = []
    lpcc_label = []
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            file_path = os.path.join(subdir, directory, "split", split, "wav")
            if len(directory.split(".")) != 2:
                continue
            for filename in os.listdir(file_path):
                lpcc = convert_to_lpc(os.path.join(file_path, filename), 49)
                lpcc_data.append(lpcc)
                lpcc_label.append(directory.split('.')[0])
        break
    return lpcc_data, lpcc_label

In [5]:
import shutil
import glob

def cleanup_split(root):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            shutil.rmtree(os.path.join(subdir, directory, "split"), ignore_errors = True)
        break

def cleanup_merged(root):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            for f in glob.glob(os.path.join(subdir, directory, "*.wav")):
                os.remove(f)
        break

In [5]:
def distribute_samples(root, folder):
    for subdir, dirs, files in os.walk(os.path.join(root, folder)):
        for i, sample in enumerate(files):
            os.makedirs(os.path.join(root, folder + "." + str(i + 1)))
            shutil.move(os.path.join(root, folder, sample), os.path.join(root, folder + "." + str(i + 1), sample))
        break
        shutil.rmtree(os.path.join(root, folder))
# distribute_samples(data_dir, "Oprah")

In [60]:
# cleanup_merged(data_dir)
cleanup_split(data_dir)
run_preprocess(data_dir, str(audio_len), str(window_size))

In [61]:
X, y = load_features(data_dir, str(window_size))

#### Build the model

In [22]:
import keras
from keras.layers import Activation, Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from keras.models import Sequential

model = Sequential()
model.add(Conv2D(32, kernel_size=(2, 2), activation='relu',
                 input_shape=(10, 10, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Activation('relu'))
model.add(Dropout(0.25))

model.add(Conv2D(32, kernel_size=(2, 2), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(n_samples, activation='softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

#### Load pretrained model weights

In [23]:
# model.load_weights('neural-net-weights/NN_Weights_1203_3/spect_model_weights_1203_3_4.h5')
model.load_weights('lpc_model_weights_' + str(audio_len_nn) + '_' + str(window_size) + '-' + str(1) + '.h5')

In [24]:
from keras.models import Model

transfer_model = Model(inputs = model.input, outputs=model.get_layer('flatten_3').output)

#### Load spectrograms as matrices

In [62]:
X = np.dstack(X)

X = np.swapaxes(X, 0, 1)
X = np.swapaxes(X, 0, 2)
X = np.swapaxes(X, 1, 2)
# print X_1.shape
# print X_2.shape
# print X_3.shape
# print X_4.shape

X = X.reshape(X.shape[0], 10, -1, 1)
# print X.shape

# X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)

In [63]:
import numpy as np

X_SVM = []
for sample in range(len(X)):
    x_exp = np.expand_dims(X[sample], axis = 0)
    transfer_features = transfer_model.predict(x_exp)
    X_SVM.append(transfer_features)

In [64]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_SVM, y, test_size=0.25, random_state=42)

#### Encode the inputs

In [65]:
import keras
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

l_enc = LabelEncoder()
l_enc.fit(y_train)
y_train_norm = l_enc.transform(y_train)

l_enc.fit(y_test)
y_test_norm = l_enc.transform(y_test)

#### Remove unnecessary variables from memory (To clear memory)

In [66]:
from sklearn import svm

# model_SVM = svm.SVC(kernel='rbf', class_weight='balanced', C = 10.0, gamma = 0.00001)
model_SVM = svm.SVC(kernel='linear', class_weight='balanced')
X_train_SVM = np.array(X_train).reshape(len(X_train), -1)

In [67]:
model_SVM.fit(X_train_SVM, y_train_norm)

SVC(C=1.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [68]:
from sklearn.metrics import accuracy_score

X_test = np.array(X_test).reshape(len(X_test), -1)
accuracy_score(y_test_norm, model_SVM.predict(X_test))

0.71999999999999997

In [54]:
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

X_test_SVM = np.array(X_test).reshape(len(X_test), -1)

C_range = np.logspace(-2, 10, 13)
gamma_range = np.logspace(-9, 3, 13)
param_grid = dict(gamma=gamma_range, C=C_range)
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.25, random_state=64)
grid = GridSearchCV(svm.SVC(kernel='linear', class_weight='balanced'), param_grid=param_grid, cv=cv)
grid.fit(np.concatenate((X_train_SVM,X_test_SVM), axis = 0), np.concatenate((y_train_norm, y_test_norm)))

GridSearchCV(cv=StratifiedShuffleSplit(n_splits=5, random_state=64, test_size=0.25,
            train_size=None),
       error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': array([  1.00000e-02,   1.00000e-01,   1.00000e+00,   1.00000e+01,
         1.00000e+02,   1.00000e+03,   1.00000e+04,   1.00000e+05,
         1.00000e+06,   1.00000e+07,   1.00000e+08,   1.00000e+09,
         1.00000e+10]), 'gamma': array([  1.00000e-09,   1.00000e-08,   1.00000e-07,   1.00000e-06,
         1.00000e-05,   1.00000e-04,   1.00000e-03,   1.00000e-02,
         1.00000e-01,   1.00000e+00,   1.00000e+01,   1.00000e+02,
         1.00000e+03])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       

In [55]:
print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))

The best parameters are {'C': 0.10000000000000001, 'gamma': 1.0000000000000001e-09} with a score of 0.62
