## Paralleling Recurrent Convolutional Neural Network

This is an implementation of the model presented in Feng et al. "Music genre classification with paralleling recurrent convolutional neural network."

In [10]:
import numpy as np

import keras
from keras.models import Model
from keras.layers import Input, Dense, Bidirectional, Dropout, Activation, GRU
from keras.layers import Conv2D, concatenate, MaxPooling2D, Flatten, Lambda
from keras import backend as K

from keras.optimizers import Adam, RMSprop

from sklearn.metrics import classification_report

## Retrieve all of the features and labels

In [3]:
npzfile = np.load('../data/melspects_128.npz', allow_pickle=True)

In [4]:
X_train, y_train = npzfile['X_train'], npzfile['y_train']
X_test, y_test = npzfile['X_test'], npzfile['y_test']

In [5]:
X_train.shape

(900, 128, 640)

## Building the Neural Net

In [6]:
def PRCNN(X_shape, nb_classes):
    
    # Input
    inputs = Input(shape=X_shape)
    
    # CNN Block
    conv1 = Conv2D(filters=16, kernel_size=(1,3), strides=1, padding='valid', 
                   activation='relu')(inputs)
    
    pool11 = MaxPooling2D((2, 2), strides=(2,2))(conv1)
    
    conv2 = Conv2D(filters=32, kernel_size=(1,3), strides=1, padding='valid', 
                   activation='relu')(pool11)
    
    pool12 = MaxPooling2D((2, 2), strides=(2,2))(conv2)
    
    conv3 = Conv2D(filters=64, kernel_size=(1,3), strides=1, padding='valid', 
                   activation='relu')(pool12)
    
    pool13 = MaxPooling2D((2, 2), strides=(2,2))(conv3)
    
    conv4 = Conv2D(filters=128, kernel_size=(1,3), strides=1, padding='valid', 
                   activation='relu')(pool13)
    
    pool14 = MaxPooling2D((4, 4), strides=(4,4))(conv4)
    
    conv5 = Conv2D(filters=64, kernel_size=(1,3), strides=1, padding='valid', 
                   activation='relu')(pool14)
    
    pool15 = MaxPooling2D((4, 4), strides=(4,4))(conv5)

    flatten1 = Flatten()(pool15)
    
    # BGRU-RNN block
    pool21 = MaxPooling2D((1,2), strides=(1,2))(inputs)
    
    # Remove channel axis so we can pass into Bidirectional GRU layer
    squeezed = Lambda(lambda x: K.squeeze(x, axis=-1))(pool21)
    
    # Bidirectional GRU
    bigru_rnn = Bidirectional(GRU(128), merge_mode='concat')(squeezed)
    
    # Concat Output
    concat = concatenate([flatten1, bigru_rnn])
    
    # Softmax Output
    output = Dense(nb_classes, activation='softmax')(concat)
    
    model_output = output
    model = Model(inputs=inputs, outputs=[output])
    
    model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.0005), metrics=['accuracy'])
    
    print(model.summary())
    return model

In [7]:
nb_classes = len(np.unique(y_train))
nb_epochs = 25
batch_size = 16

# Convert to one hot encoding
y_train_one_hot = np.zeros((y_train.size, nb_classes))
y_train_one_hot[range(y_train.size),y_train] = 1

# Need to add a channel dim for the convolution layers
X_train_expanded = np.expand_dims(X_train, axis=-1)
X_test_expanded = np.expand_dims(X_test, axis=-1)

X_shape = X_train_expanded.shape[1:]

model = PRCNN(X_shape, nb_classes)

# Fit data to model
history = model.fit(X_train_expanded, y_train_one_hot, batch_size=batch_size, epochs=nb_epochs)

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 128, 640, 1)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 128, 638, 16) 64          input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 64, 319, 16)  0           conv2d_1[0][0]                   
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 64, 317, 32)  1568        max_pooling2d_1[0][0]            
____________________________________________________________________________________________

In [13]:
preds = model.predict(X_test_expanded)
y_pred = np.argmax(preds, axis=1)

In [14]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.23      0.67      0.34        12
           1       0.75      0.92      0.83        13
           2       0.00      0.00      0.00        10
           3       0.15      1.00      0.26         5
           4       0.00      0.00      0.00         6
           5       1.00      0.07      0.12        15
           6       0.58      0.70      0.64        10
           7       0.00      0.00      0.00         9
           8       0.00      0.00      0.00        14
           9       0.00      0.00      0.00         6

    accuracy                           0.33       100
   macro avg       0.27      0.34      0.22       100
weighted avg       0.34      0.33      0.24       100



  _warn_prf(average, modifier, msg_start, len(result))
