# Train Local Feature Extractors

Train a set of CNNs handling a inputs (segments of mel-spectrograms) in different scale. These CNN models will be used as *local feature extrators* in music genre tagger.

In this project we trained 4 different scales of CNNs. Their segment length are: 30, 60, 120, 240. Structures of all 4 CNNs are very similar to the model we used in "multi-level CNN" part, except that models here only contain 4 conv layers. The "vertical" pooling size in each layer would be: `[2, 3, 2, 2]`. The horizontal pooling sizes are set according to their segment length:

Segment Length | Pooling sizes
- | -
30 | `[2, 2, 2, 2]`
60 | `[3, 2, 2, 2]`
120 | `[3, 3, 2, 2]`
240 | `[4, 4, 3, 2]`

The data loading and model training process for these 4 models are seperated into 4 sections below. To train models, run all cells under each section and use cells in **"Helper Cells" section ** for saving/loading model parameters.

In [None]:
import os    
os.environ['THEANO_FLAGS'] = "floatX=float32,device=cuda,exception_verbosity=high"
import theano

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
import keras
from keras.layers import Input, Dense, merge, Flatten, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import GlobalAveragePooling2D
from keras.models import Model
from keras.utils.visualize_util import plot
from keras.models import load_model
import matplotlib.pyplot as plt
import pickle

## Define model

The function below provides a "templete" for defining CNN models used as local feature extractors. All 4 CNN models are identical except the input size.

In [None]:
def gen_model_3(seg_length, pool_sizes_hori):
    '''
    Generate model with different scales.
    seg_length and pooling layer sizes are set adjusting to different scales.
    '''
    psh = [0] + pool_sizes_hori # padding at front for index alignment
    
    # input
    x = Input(shape=(1, 96, seg_length))

    # 1st conv layer
    conv1 = Convolution2D(32, 3, 3, border_mode='same', init='he_normal', name='conv1_{}'.format(seg_length))(x)
    conv1 = keras.layers.advanced_activations.ELU(alpha=1.0)(conv1)
    conv1 = MaxPooling2D(pool_size=(2, psh[1]))(conv1)
    conv1 = Dropout(0.2)(conv1)

    # 2nd conv layer
    conv2 = Convolution2D(32, 3, 3, border_mode='same', init='he_normal', name='conv2_{}'.format(seg_length))(conv1)
    conv2 = keras.layers.advanced_activations.ELU(alpha=1.0)(conv2)
    conv2 = MaxPooling2D(pool_size=(3, psh[2]))(conv2)
    conv2 = Dropout(0.2)(conv2)
    
    # 3rd conv layer
    conv3 = Convolution2D(32, 3, 3, border_mode='same', init='he_normal', name='conv3_{}'.format(seg_length))(conv2)
    conv3 = keras.layers.advanced_activations.ELU(alpha=1.0)(conv3)
    conv3 = MaxPooling2D(pool_size=(2, psh[3]))(conv3)
    conv3 = Dropout(0.2)(conv3)
    
    # 4th conv layer
    conv4 = Convolution2D(32, 3, 3, border_mode='same', init='he_normal', name='conv4_{}'.format(seg_length))(conv3)
    conv4 = keras.layers.advanced_activations.ELU(alpha=1.0)(conv4)
    conv4 = MaxPooling2D(pool_size=(2, psh[4]))(conv4)
    
    # Flatten the output of last conv layer (conv5)
    flat = Flatten()(conv4)
    
    # Dense layer # 1
    dense1 = Dense(64, activation='relu')(flat)
    dense1 = Dropout(0.2)(dense1)
    
    # output layer
    out = Dense(10, activation='softmax')(dense1)
    
    # define model
    model = Model(input=x, output=out)
    
    return model

## Helper Cells

Cells in this section are named "helper cells" because functions such as save/load model, save/load model weights, and save/load model training histories are all implemented here. 

Before / after training of each model, run cells below for saving / loading the data as needed.

In [None]:
### Save Model (for continue training) ###
model.save('./models/cnn_{}.h5'.format(seg_length))  # creates a HDF5 file 'my_model.h5'
# del model  # deletes the existing model

In [None]:
### Load previously trained model ###
# model = load_model('./models/cnn_{}.h5'.format(seg_length))

In [None]:
### Save Training History (for plotting) ###
import pickle, datetime
now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
with open('./train_logs/train_his_{}_{}'.format(seg_length, now), 'wb') as file_pi:
    pickle.dump(model_his.history, file_pi)

In [None]:
### Save Model weights (for future transferr) ###
if not os.path.exists('./weights/'):
    os.mkdir('./weights/')
model.save_weights('./weights/local_cnn_4_{}.h5'.format(seg_length))

In [None]:
### Load Model weights ###
if not os.path.exists('./weights/'):
    os.mkdir('./weights/')
model.load_weights('./weights/local_cnn__4_{}.h5'.format(seg_length), by_name=True)

## Train Model with seg_length 30

In [None]:
# Model parameters
seg_length = 30
pool_sizes_hori = [2, 2, 2, 2] # sizes of pooling layers (in horizontal direction)

# Load Training Data
X_train = np.load('./dataset/X_train_seg{}.npy'.format(seg_length))
Y_train_pre = np.load('./dataset/Y_train_seg{}.npy'.format(seg_length)).astype(int)
Y_train = np.zeros((Y_train_pre.shape[0], 10))
Y_train[np.arange(Y_train_pre.shape[0]), Y_train_pre] = 1

# Load Test Data
X_test = np.load('./dataset/X_test_seg{}.npy'.format(seg_length))
Y_test_pre = np.load('./dataset/Y_test_seg{}.npy'.format(seg_length)).astype(int)
Y_test = np.zeros((Y_test_pre.shape[0], 10))
Y_test[np.arange(Y_test_pre.shape[0]), Y_test_pre] = 1

# Generate model
model_30 = gen_model_3(seg_length, pool_sizes_hori)

# # Load Model Weights
# model_30.load_weights('./weights/local_cnn_{}.h5'.format(seg_length), by_name=True)

# Compile model
model_30.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train Model
model_his = model_30.fit(X_train, Y_train, batch_size=256, validation_data=(X_test, Y_test), nb_epoch=200)

## Train Model with seg_length 60

In [None]:
# Model parameters
seg_length = 60
pool_sizes_hori = [3, 3, 2, 2] # sizes of pooling layers (in horizontal direction)

# Load Training Data
X_train = np.load('./dataset/X_train_seg{}.npy'.format(seg_length))
Y_train_pre = np.load('./dataset/Y_train_seg{}.npy'.format(seg_length)).astype(int)
Y_train = np.zeros((Y_train_pre.shape[0], 10))
Y_train[np.arange(Y_train_pre.shape[0]), Y_train_pre] = 1

# Load Test Data
X_test = np.load('./dataset/X_test_seg{}.npy'.format(seg_length))
Y_test_pre = np.load('./dataset/Y_test_seg{}.npy'.format(seg_length)).astype(int)
Y_test = np.zeros((Y_test_pre.shape[0], 10))
Y_test[np.arange(Y_test_pre.shape[0]), Y_test_pre] = 1

# Generate model
model = gen_model_3(seg_length, pool_sizes_hori)

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train Model
model_his = model.fit(X_train, Y_train, batch_size=256, validation_data=(X_test, Y_test), nb_epoch=300)

## Train Model with seg_length 120

In [None]:
# Model parameters
seg_length = 120
pool_sizes_hori = [3, 3, 2, 2] # sizes of pooling layers (in horizontal direction)

# Load Training Data
X_train = np.load('./dataset/X_train_seg{}.npy'.format(seg_length))
Y_train_pre = np.load('./dataset/Y_train_seg{}.npy'.format(seg_length)).astype(int)
Y_train = np.zeros((Y_train_pre.shape[0], 10))
Y_train[np.arange(Y_train_pre.shape[0]), Y_train_pre] = 1

# Load Test Data
X_test = np.load('./dataset/X_test_seg{}.npy'.format(seg_length))
Y_test_pre = np.load('./dataset/Y_test_seg{}.npy'.format(seg_length)).astype(int)
Y_test = np.zeros((Y_test_pre.shape[0], 10))
Y_test[np.arange(Y_test_pre.shape[0]), Y_test_pre] = 1

# Generate model
model = gen_model_3(seg_length, pool_sizes_hori)

# Load Model
# model = load_model('./models/cnn_{}.h5'.format(seg_length))

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train Model
model_his = model.fit(X_train, Y_train, batch_size=256, validation_data=(X_test, Y_test), nb_epoch=400)

## Train Model with seg_length 240

In [None]:
# Model parameters
seg_length = 240
pool_sizes_hori = [4, 4, 3, 2] # sizes of pooling layers (in horizontal direction)

# Load Training Data
X_train = np.load('./dataset/X_train_seg{}.npy'.format(seg_length))
Y_train_pre = np.load('./dataset/Y_train_seg{}.npy'.format(seg_length)).astype(int)
Y_train = np.zeros((Y_train_pre.shape[0], 10))
Y_train[np.arange(Y_train_pre.shape[0]), Y_train_pre] = 1

# Load Test Data
X_test = np.load('./dataset/X_test_seg{}.npy'.format(seg_length))
Y_test_pre = np.load('./dataset/Y_test_seg{}.npy'.format(seg_length)).astype(int)
Y_test = np.zeros((Y_test_pre.shape[0], 10))
Y_test[np.arange(Y_test_pre.shape[0]), Y_test_pre] = 1

# Generate model
model = gen_model_3(seg_length, pool_sizes_hori)

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train Model
model_his = model.fit(X_train, Y_train, batch_size=256, validation_data=(X_test, Y_test), nb_epoch=400)