# 5 Convolutional Layers + 2 Fully-connected Layers

In this script, the entire neural network, including 5 conv layers and 2 fully-connected layers are defined using keras.

Before running this script, **you should first run "`Convert GTZAN to npy.ipynb`" to make data ready.**

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import keras
from keras.layers import Input, Dense, merge, Flatten, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import GlobalAveragePooling2D
from keras.models import Model
from kapre.time_frequency import Melspectrogram
from keras.utils.visualize_util import plot

Using Theano backend.


In [2]:
# PARAMETERS
input_length = 12000 * 29 # length of "vectorized" music clip
PATH_WEIGHTS = './weights/weights_layer4_theano.hdf5' # Path to pre-trained weights
PATH_X = './dataset/X.npy'
PATH_Y = './dataset/Y.npy'

## Load Dataset

In [3]:
# Load Dataset
X, Y_pre = np.load(PATH_X), np.load(PATH_Y)

# Encode Y_pre to one-hot(Y)
Y = np.zeros((1000, 10))
Y[np.arange(1000), Y_pre] = 1

# Split Dataset (90% train + 10% test)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)

## Define Model

In [3]:
# input
x = Input(shape=(1, input_length))
melgram = Melspectrogram(n_dft=512, n_hop=256, power_melgram=2.0,
                                # input_shape=(1, input_length),
                                     trainable_kernel=False,
                                     trainable_fb=False,
                                     return_decibel_melgram=True,
                                     sr=12000, n_mels=96,
                                     fmin=0.0, fmax=6000,
                                     name='melgram')(x)

# 1st conv layer
conv1 = Convolution2D(32, 3, 3, border_mode='same', init='he_normal', trainable=False)(melgram)
conv1 = BatchNormalization(axis=1, mode=2, trainable=False)(conv1)
conv1 = keras.layers.advanced_activations.ELU(alpha=1.0)(conv1)
conv1 = MaxPooling2D(pool_size=(2, 4))(conv1)
avg1 = GlobalAveragePooling2D()(conv1)

# 2nd conv layer
conv2 = Convolution2D(32, 3, 3, border_mode='same', init='he_normal', trainable=False)(conv1)
conv2 = BatchNormalization(axis=1, mode=2, trainable=False)(conv2)
conv2 = keras.layers.advanced_activations.ELU(alpha=1.0)(conv2)
conv2 = MaxPooling2D(pool_size=(3, 4))(conv2)
avg2 = GlobalAveragePooling2D()(conv2)

# 3rd conv layer
conv3 = Convolution2D(32, 3, 3, border_mode='same', init='he_normal', trainable=False)(conv2)
conv3 = BatchNormalization(axis=1, mode=2, trainable=False)(conv3)
conv3 = keras.layers.advanced_activations.ELU(alpha=1.0)(conv3)
conv3 = MaxPooling2D(pool_size=(2, 5))(conv3)
avg3 = GlobalAveragePooling2D()(conv3)

# 4th conv layer
conv4 = Convolution2D(32, 3, 3, border_mode='same', init='he_normal', trainable=False)(conv3)
conv4 = BatchNormalization(axis=1, mode=2, trainable=False)(conv4)
conv4 = keras.layers.advanced_activations.ELU(alpha=1.0)(conv4)
conv4 = MaxPooling2D(pool_size=(2, 4))(conv4)
avg4 = GlobalAveragePooling2D()(conv4)

# 5th conv layer
conv5 = Convolution2D(32, 3, 3, border_mode='same', init='he_normal', trainable=False)(conv4)
conv5 = BatchNormalization(axis=1, mode=2, trainable=False)(conv5)
conv5 = keras.layers.advanced_activations.ELU(alpha=1.0)(conv5)
conv5 = MaxPooling2D(pool_size=(4, 4))(conv5)
avg5 = GlobalAveragePooling2D()(conv5)

# Concatenate 5 intermediate outputs
concatenated = merge([avg1, avg2, avg3, avg4, avg5], mode='concat', concat_axis=1)

# Fully-connected & dropout layers
dense1 = Dense(32, input_shape=[160], activation='relu')(concatenated)
dr1 = Dropout(0.2)(dense1)
out = Dense(10, input_shape=[32], activation='softmax')(dr1)

## Build Model

In [4]:
# Build Model
model = Model(input=x, output=out)

In [5]:
# Show summaries of the model
model.summary()

# Show plot of the structure
plot(model, to_file='model.png')

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 1, 348000)     0                                            
____________________________________________________________________________________________________
melgram (Melspectrogram)         (None, 1, 96, 1360)   287840      input_1[0][0]                    
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 32, 96, 1360)  320         melgram[0][0]                    
____________________________________________________________________________________________________
batchnormalization_1 (BatchNorma (None, 32, 96, 1360)  128         convolution2d_1[0][0]            
___________________________________________________________________________________________

In [None]:
# load pre-trained weights
model.load_weights(PATH_WEIGHTS, by_name=True)

In [None]:
# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

## Train Model

In [None]:
# Train Model
model_his = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), nb_epoch=1500)

## Evaluate Model

In [None]:
# Evaluate Model
model_eval = model.evaluate(X_test, Y_test)