### Spectrogram based Transfer Learning

#### Data cleanup

In [1]:
import matplotlib.pyplot as plt
import os
import subprocess

def run_preprocess(root, length, split):
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            subprocess.call(["./preprocess", os.path.join(subdir, directory), length, split])
        break

In [2]:
def load_features(root, split):
    spect_data = []
    spect_label = []
    for subdir, dirs, files in os.walk(root):
        for directory in dirs:
            file_path = os.path.join(subdir, directory, "split", split, "spect")
            for filename in os.listdir(file_path):
                x = plt.imread(os.path.join(file_path, filename))
                spect_data.append(x)
                spect_label.append(directory)
        break
    return spect_data, spect_label

In [3]:
run_preprocess('audio-train-transfer', "3600", "30")

#### Build the model

In [18]:
from keras.layers import Activation, Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from keras.models import Sequential

model = Sequential()
model.add(Conv2D(8, (3, 3), padding='same',
                 input_shape=(513, 800, 3)))
model.add(Activation('relu'))
model.add(Conv2D(8, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(16, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(16, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
#model.add(Dropout(0.25))

model.add(Flatten())
#model.add(Dense(10))
model.add(Activation('relu'))
#model.add(Dropout(0.5))
model.add(Dense(7))
model.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

#### Load pretrained model weights

In [19]:
model.load_weights('spect_model_weights.h5')

In [21]:
from keras.models import Model

transfer_model = Model(inputs = model.input, outputs=model.get_layer('flatten_3').output)

#### Load spectrograms as matrices

In [34]:
X, y = load_features('audio-train-transfer', "30")

In [37]:
X_SVM = []
for sample in range(len(X)):
    x_exp = np.expand_dims(X[sample], axis = 0)
    transfer_features = transfer_model.predict(x_exp)
    X_SVM.append(transfer_features)

In [38]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_SVM, y, test_size=0.25, random_state=42)

#### One-hot encode the inputs

In [39]:
import keras
import numpy as np
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

l_enc = LabelEncoder()
l_enc.fit(y_train)
y_train_enc = l_enc.transform(y_train)
y_train_norm = np_utils.to_categorical(y_train_enc)

l_enc.fit(y_test)
y_test_enc = l_enc.transform(y_test)
y_test_norm = np_utils.to_categorical(y_test_enc)

#### Remove unnecessary variables from memory (To clear memory)

In [40]:
# del X, y, X_SVM, y_train_enc, y_test_enc, y_train, y_test

In [41]:
from sklearn import svm

model_SVM = svm.SVC(kernel='rbf', class_weight='balanced')
X_train_SVM = np.array(X_train).reshape(len(X_train), -1)

In [46]:
y_train_norm = [np.where(r == 1)[0][0] for r in y_train_norm]
X_train_SVM.shape

(699, 399168)

In [47]:
model_SVM.fit(X_train_SVM, y_train_norm)

SVC(C=1.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [54]:
from sklearn.metrics import accuracy_score

y_test_norm = [np.where(r == 1)[0][0] for r in y_test_norm]
X_test = np.array(X_test).reshape(len(X_test), -1)
accuracy_score(y_test_norm, model_SVM.predict(X_test))
# model_SVM.predict(X_test)

0.2264957264957265