### Spectrogram based Transfer Learning

In [36]:
n_mfcc = 64
n_mfcc_width = 432
window_size = 10
audio_len = 20
audio_len_nn = 150
data_dir = 'audio-train-transfer'
n_samples = 112

#### Data cleanup

In [3]:
import matplotlib.pyplot as plt
import os
import subprocess

def run_preprocess(root, length, split):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            subprocess.call(["./preprocess_transfer", os.path.join(subdir, directory), length, split])
        break

In [None]:
import numpy as np
import librosa as lp
from scikits.talkbox import lpc


def convert_to_lpc(filename,number_of_coefficients):
    wave, sr = lp.load(filename, mono=True, sr=16000)
    lpc_signal=lpc(wave,number_of_coefficients)
#     lpcc_signal=lpcc(lpc_signal[0],lpc_signal[1])
    return np.hstack((lpc_signal[0], lpc_signal[1], lpc_signal[2]))

def load_features(root, split):
    lpcc_data = []
    mfcc_data = []
    label = []
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            file_path = os.path.join(subdir, directory, "split", split, "wav")
            if len(directory.split(".")) != 2:
                continue
            for filename in os.listdir(file_path):
                lpcc = convert_to_lpc(os.path.join(file_path, filename), 49)
                lpcc_data.append(lpcc)
                
                y, sr = lp.load(os.path.join(file_path, filename))
                mfcc = lp.feature.mfcc(y = y, sr = 16000, n_mfcc = n_mfcc)
                if mfcc.size == 0:
                    continue
                mfcc = np.pad(mfcc, pad_width=((0, 0), (0, n_mfcc_width)), mode='constant')
                if mfcc.shape != (n_mfcc, n_mfcc_width):
                    mfcc = mfcc[:, :n_mfcc_width]
                mfcc_data.append(mfcc)
                
                label.append(directory.split('.')[0])
        break
    return lpcc_data, mfcc_data, label

In [5]:
import shutil
import glob

def cleanup_split(root):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            shutil.rmtree(os.path.join(subdir, directory, "split"), ignore_errors = True)
        break

def cleanup_merged(root):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            for f in glob.glob(os.path.join(subdir, directory, "*.wav")):
                os.remove(f)
        break

In [6]:
def distribute_samples(root, folder):
    for subdir, dirs, files in os.walk(os.path.join(root, folder)):
        for i, sample in enumerate(files):
            os.makedirs(os.path.join(root, folder + "." + str(i + 1)))
            shutil.move(os.path.join(root, folder, sample), os.path.join(root, folder + "." + str(i + 1), sample))
        break
        shutil.rmtree(os.path.join(root, folder))
# distribute_samples(data_dir, "Oprah")

In [37]:
# cleanup_merged(data_dir)
cleanup_split(data_dir)
run_preprocess(data_dir, str(audio_len), str(window_size))

In [38]:
X_lp, X_mf, y = load_features(data_dir, str(window_size))

BillGates.1
BillGates.2
BillGates.3
BillGates.4
BillGates.5
Blaha.1
Blaha.2
Blaha.3
Blaha.4
Christen.1
Christen.2
Christen.3
Christen.4
Christen.5
Christen.6
Christen.7
Christen.8
Christen.9
Clinton.1
Clinton.2
Clinton.3
Clinton.4
Clinton.5
Jamie.1
Jamie.2
Jamie.3
Jamie.4
Jamie.5
Jamie.6
minutephysics.1
minutephysics.2
minutephysics.3
minutephysics.4
minutephysics.5
minutephysics.6
Obama.1
Obama.2
Obama.3
Obama.5
OChem.1
OChem.2
OChem.3
OChem.4
OChem.5
OChem.6
OChem.7
OChem.8
Oprah.1
Oprah.2
Oprah.3
Oprah.4
Oprah.5
Oprah.6
Oprah.7
Patrick.1
Patrick.10
Patrick.2
Patrick.3
Patrick.4
Patrick.5
Patrick.6
Patrick.7
Patrick.8
Patrick.9
Sriraj.1
Sriraj.2
Sriraj.3
Sriraj.4
Sriraj.5
Sriraj.6
standupmaths.1
standupmaths.2
standupmaths.3
standupmaths.4
standupmaths.5
standupmaths.6
standupmaths.7
standupmaths.8
Sudeep.1
Sudeep.2
Sudeep.3
Sudeep.4
Trudeau.1
Trudeau.2
Trudeau.3
Trudeau.4
Trudeau.5
Trump.1
Trump.2
Trump.3
Trump.4
Tushar.1
Tushar.2
Tushar.3
Tushar.4
Tushar.5
Tushar.6
Tushar.7
Upendra

#### Build the model

In [15]:
import keras
from keras.layers import Activation, Conv2D, Dense, Dropout, Flatten, Input, MaxPooling2D
from keras.models import Model, Sequential

model_lp = Sequential()
model_lp.add(Conv2D(32, kernel_size=(2, 2), activation='relu',
                 input_shape=(10, 10, 1)))
model_lp.add(MaxPooling2D(pool_size=(2, 2)))
model_lp.add(Activation('relu'))
model_lp.add(Dropout(0.25))

model_lp.add(Conv2D(32, kernel_size=(2, 2), padding='same'))
model_lp.add(Activation('relu'))
model_lp.add(MaxPooling2D(pool_size=(2, 2)))
model_lp.add(Dense(64))
model_lp.add(Dropout(0.25))
model_lp.add(Flatten())

input_lp = Input(shape=(10, 10, 1))
layer_lp = model_lp(input_lp)

model_mf = Sequential()
model_mf.add(Conv2D(32, kernel_size=(2, 2), activation='relu',
                 input_shape=(n_mfcc, n_mfcc_width, 1)))
model_mf.add(MaxPooling2D(pool_size=(2, 2)))
model_mf.add(Activation('relu'))
model_mf.add(Dropout(0.25))

model_mf.add(Conv2D(32, kernel_size=(2, 2), padding='same'))
model_mf.add(Activation('relu'))
model_mf.add(MaxPooling2D(pool_size=(2, 2)))
model_mf.add(Dense(n_mfcc/2, activation='relu'))
model_mf.add(Dropout(0.25))
model_mf.add(Flatten())

input_mf = Input(shape=(n_mfcc, n_mfcc_width, 1))
layer_mf = model_mf(input_mf)

merged = keras.layers.concatenate([layer_lp, layer_mf])
output = Dense(n_samples, activation='softmax')(merged)

model = Model(inputs=[input_lp, input_mf], outputs=output)

# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [16]:
print model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 10, 10, 1)    0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 64, 432, 1)   0                                            
__________________________________________________________________________________________________
sequential_3 (Sequential)       (None, 256)          6400        input_3[0][0]                    
__________________________________________________________________________________________________
sequential_4 (Sequential)       (None, 51360)        5344        input_4[0][0]                    
__________________________________________________________________________________________________
concatenat

#### Load pretrained model weights

In [46]:
# model.load_weights('neural-net-weights/NN_Weights_1203_3/spect_model_weights_1203_3_4.h5')
model.load_weights('hybrid_model_weights_' + str(audio_len_nn) + '_' + str(window_size) + '-' + str(4) + '.h5')

In [47]:
from keras.models import Model

transfer_model = Model(inputs = model.input, outputs=model.get_layer('concatenate_1').output)

#### Load spectrograms as matrices

In [39]:
X_lp = np.array(X_lp)
X_mf = np.array(X_mf)

X_lp = X_lp.reshape(X_lp.shape[0], 10, -1, 1)
X_mf = X_mf.reshape(X_mf.shape[0], X_mf.shape[1], X_mf.shape[2], 1)

In [48]:
import numpy as np

X_SVM = []
for sample in range(len(X_lp)):
    X_lp_exp = np.expand_dims(X_lp[sample], axis = 0)
    X_mf_exp = np.expand_dims(X_mf[sample], axis = 0)
    transfer_features = transfer_model.predict([X_lp_exp, X_mf_exp])
    X_SVM.append(transfer_features)

In [49]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_SVM, y, test_size=0.25, random_state=42)

#### Encode the inputs

In [50]:
import keras
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

l_enc = LabelEncoder()
l_enc.fit(y_train)
y_train_norm = l_enc.transform(y_train)

l_enc.fit(y_test)
y_test_norm = l_enc.transform(y_test)

#### Remove unnecessary variables from memory (To clear memory)

In [51]:
from sklearn import svm

# model_SVM = svm.SVC(kernel='rbf', class_weight='balanced', C = 10.0, gamma = 0.00001)
model_SVM = svm.SVC(kernel='linear', class_weight='balanced')
X_train_SVM = np.array(X_train).reshape(len(X_train), -1)

In [52]:
model_SVM.fit(X_train_SVM, y_train_norm)

SVC(C=1.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [53]:
from sklearn.metrics import accuracy_score

X_test = np.array(X_test).reshape(len(X_test), -1)
accuracy_score(y_test_norm, model_SVM.predict(X_test))

0.5714285714285714

In [34]:
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

X_test_SVM = np.array(X_test).reshape(len(X_test), -1)

C_range = np.logspace(-2, 10, 13)
gamma_range = np.logspace(-9, 3, 13)
param_grid = dict(gamma=gamma_range, C=C_range)
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.25, random_state=64)
grid = GridSearchCV(svm.SVC(kernel='linear', class_weight='balanced'), param_grid=param_grid, cv=cv)
grid.fit(np.concatenate((X_train_SVM,X_test_SVM), axis = 0), np.concatenate((y_train_norm, y_test_norm)))

In [35]:
print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))