In [31]:
import warnings
import numpy as np
import os
import itertools
import sys
import matplotlib.pyplot as plt

from collections import OrderedDict
from sklearn.metrics import confusion_matrix
from copy import copy

import tensorflow as tf

from tensorflow.keras import Input, Model
from tensorflow.keras import tensorflow.keras.utils import Sequence
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

from spektral.datasets import mnist
from spektral.layers import GraphConv
from spektral.layers.ops import sp_matrix_to_sp_tensor

tf.compat.v1.disable_eager_execution()

# Parameters
l2_reg = 5e-4         # Regularization rate for l2
learning_rate = 1e-3  # Learning rate for SGD
batch_size = 32       # Batch size
epochs = 1000       # Number of training epochs
es_patience = 10      # Patience fot early stopping


In [None]:
# data generator
class GTZANGenerator(Sequence):
    def __init__(self, X, y, batch_size=BATCH_SIZE, is_test = False):
        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.is_test = is_test
    
    def __len__(self):
        return int(np.ceil(len(self.X)/self.batch_size))
    
    def __getitem__(self, index):
        # Get batch indexes
        signals = self.X[index*self.batch_size:(index+1)*self.batch_size]

        # Apply data augmentation
        if not self.is_test:
            pass
            #signals = self.__augment(signals)
            
        return signals, self.y[index*self.batch_size:(index+1)*self.batch_size]
    
    def __augment(self, signals, hor_flip = 0.5, random_cutout = 0.5):
        spectrograms =  []
        for s in signals:
            signal = copy(s)
        
            # Perform random cutoout of some frequency/time
            if np.random.rand() < random_cutout:
                lines = np.random.randint(signal.shape[0], size=3)
                cols = np.random.randint(signal.shape[0], size=4)
                signal[lines, :, :] = -80 # dB
                signal[:, cols, :] = -80 # dB

            spectrograms.append(signal)
        return np.array(spectrograms)
    
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.X))
        np.random.shuffle(self.indexes)
        return None

In [32]:
# confusion matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
# majority vote
def majority_vote(scores):
    values, counts = np.unique(scores,return_counts=True)
    ind = np.argmax(counts)
    return values[ind]


In [33]:
# load data
print('============================================================================')
print('Loading dataset...')
print('============================================================================')
X_train = np.load('/home/jaehwlee/Genre_classification/GNN/feature_data/X_train_features.npy')
X_valid = np.load('/home/jaehwlee/Genre_classification/GNN/feature_data/X_valid_features.npy')
X_test = np.load('/home/jaehwlee/Genre_classification/GNN/feature_data/X_test_features.npy')
y_train = np.load('/home/jaehwlee/Genre_classification/GNN/mel_data/y_train.npy')
y_valid = np.load('/home/jaehwlee/Genre_classification/GNN/mel_data/y_valid.npy')
y_test = np.load('/home/jaehwlee/Genre_classification/GNN/mel_data/y_test.npy')

A = np.ones((128,128))

X_train, X_valid, X_test = X_train[..., None], X_valid[..., None], X_test[..., None]

song_samples = 660000
genres = {'metal': 0, 'disco': 1, 'classical': 2, 'hiphop': 3, 'jazz': 4, 
          'country': 5, 'pop': 6, 'blues': 7, 'reggae': 8, 'rock': 9}

print(X_train.shape)
print(X_valid.shape)
print(X_test.shape)

print(y_train.shape)
print(y_valid.shape)
print(y_test.shape)

print(A.shape)

print('============================================================================')
print('complete!')
print('============================================================================')

Loading dataset...
(12160, 128, 1)
(3040, 128, 1)
(3800, 128, 1)
(12160, 10)
(3040, 10)
(3800, 10)
(128, 128)
complete!


In [34]:
def gcn_model():
    # Model definition
    N = X_train.shape[-2]
    F = X_train.shape[-1]

    n_out = 10

    fltr = GraphConv.preprocess(A)
    X_in = Input(shape=(N, F))
    # Pass A as a fixed tensor, otherwise Keras will complain about inputs of
    # different rank.
    A_in = Input(tensor=sp_matrix_to_sp_tensor(fltr))

    graph_conv = GraphConv(32,
                           activation='elu',
                           kernel_regularizer=l2(l2_reg))([X_in, A_in])
    graph_conv = GraphConv(32,
                           activation='elu',
                           kernel_regularizer=l2(l2_reg))([graph_conv, A_in])
    flatten = Flatten()(graph_conv)
    fc = Dense(512, activation='relu')(flatten)
    output = Dense(n_out, activation='softmax')(fc)

    # Build model
    model = Model(inputs=[X_in, A_in], outputs=output)
    
    return model

In [None]:
train_generator = GTZANGenerator(X_train, y_train)
steps_per_epoch = np.ceil(len(X_train)/batch_size)

validation_generator = GTZANGenerator(X_valid, y_valid)
val_steps = np.ceil(len(X_test)/batch_size)

In [35]:
model = gcn_model()
optimizer = Adam(lr=learning_rate)
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['acc'])

model.summary()



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            [(None, 128, 1)]     0                                            
____________________________________________________________________________________

In [36]:
mc = ModelCheckpoint('gtzan_gcn.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True, save_weights_only=True)

rl = ReduceLROnPlateau(monitor='val_loss', factor=0.95, patience=3, verbose=1, mode='min', min_delta=0.0001, cooldown=2, min_lr=1e-5)
callback_list = [mc,rl]

hist = model.fit_generator(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    validation_data=validation_generator,
    validation_steps=val_steps,
    epochs=150,
    verbose=1,
    callbacks=[mc,rl])

# Evaluate model
print('Evaluating model.')
eval_results = model.evaluate(X_test,
                              y_test,
                              batch_size=batch_size)
print('Done.\n'
      'Test loss: {}\n'
      'Test acc: {}'.format(*eval_results))

Train on 12160 samples, validate on 3040 samples
Epoch 1/1000
Epoch 00001: val_acc improved from -inf to 0.23355, saving model to gtzan_gcn.h5
Epoch 2/1000
Epoch 00002: val_acc improved from 0.23355 to 0.23980, saving model to gtzan_gcn.h5
Epoch 3/1000
Epoch 00003: val_acc did not improve from 0.23980
Epoch 4/1000
Epoch 00004: val_acc improved from 0.23980 to 0.24013, saving model to gtzan_gcn.h5

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0009500000451225787.
Epoch 5/1000
Epoch 00005: val_acc did not improve from 0.24013
Epoch 6/1000
Epoch 00006: val_acc did not improve from 0.24013
Epoch 7/1000
Epoch 00007: val_acc did not improve from 0.24013
Epoch 8/1000
Epoch 00008: val_acc did not improve from 0.24013

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0009025000152178108.
Epoch 9/1000
Epoch 00009: val_acc did not improve from 0.24013
Epoch 10/1000
Epoch 00010: val_acc did not improve from 0.24013
Epoch 11/1000
Epoch 00011: val_acc did not improve from 0.2

Epoch 28/1000
Epoch 00028: val_acc did not improve from 0.24967

Epoch 00028: ReduceLROnPlateau reducing learning rate to 0.0006983372120885178.
Epoch 29/1000
Epoch 00029: val_acc did not improve from 0.24967
Epoch 30/1000
Epoch 00030: val_acc did not improve from 0.24967
Epoch 31/1000
Epoch 00031: val_acc did not improve from 0.24967
Epoch 32/1000
Epoch 00032: val_acc did not improve from 0.24967

Epoch 00032: ReduceLROnPlateau reducing learning rate to 0.0006634203542489559.
Epoch 33/1000
Epoch 00033: val_acc did not improve from 0.24967
Epoch 34/1000
Epoch 00034: val_acc did not improve from 0.24967
Epoch 35/1000
Epoch 00035: val_acc did not improve from 0.24967
Epoch 36/1000
Epoch 00036: val_acc did not improve from 0.24967

Epoch 00036: ReduceLROnPlateau reducing learning rate to 0.0006302493420662358.
Epoch 37/1000
Epoch 00037: val_acc did not improve from 0.24967
Epoch 38/1000
Epoch 00038: val_acc did not improve from 0.24967
Epoch 39/1000
Epoch 00039: val_acc did not improve fr

Epoch 56/1000
Epoch 00056: val_acc did not improve from 0.24967

Epoch 00056: ReduceLROnPlateau reducing learning rate to 0.0004876748775132.
Epoch 57/1000
Epoch 00057: val_acc did not improve from 0.24967
Epoch 58/1000
Epoch 00058: val_acc did not improve from 0.24967
Epoch 59/1000
Epoch 00059: val_acc did not improve from 0.24967
Epoch 60/1000
Epoch 00060: val_acc did not improve from 0.24967

Epoch 00060: ReduceLROnPlateau reducing learning rate to 0.00046329112810781223.
Epoch 61/1000
Epoch 00061: val_acc did not improve from 0.24967
Epoch 62/1000
Epoch 00062: val_acc did not improve from 0.24967
Epoch 63/1000
Epoch 00063: val_acc did not improve from 0.24967
Epoch 64/1000
Epoch 00064: val_acc did not improve from 0.24967

Epoch 00064: ReduceLROnPlateau reducing learning rate to 0.00044012657308485355.
Epoch 65/1000
Epoch 00065: val_acc did not improve from 0.24967
Epoch 66/1000
Epoch 00066: val_acc did not improve from 0.24967
Epoch 67/1000
Epoch 00067: val_acc did not improve fro

Epoch 84/1000
Epoch 00084: val_acc did not improve from 0.24967

Epoch 00084: ReduceLROnPlateau reducing learning rate to 0.00034056155709549785.
Epoch 85/1000
Epoch 00085: val_acc did not improve from 0.24967
Epoch 86/1000
Epoch 00086: val_acc did not improve from 0.24967
Epoch 87/1000
Epoch 00087: val_acc did not improve from 0.24967
Epoch 88/1000
Epoch 00088: val_acc did not improve from 0.24967

Epoch 00088: ReduceLROnPlateau reducing learning rate to 0.00032353347924072293.
Epoch 89/1000
Epoch 00089: val_acc did not improve from 0.24967
Epoch 90/1000
Epoch 00090: val_acc did not improve from 0.24967
Epoch 91/1000
Epoch 00091: val_acc did not improve from 0.24967
Epoch 92/1000
Epoch 00092: val_acc did not improve from 0.24967

Epoch 00092: ReduceLROnPlateau reducing learning rate to 0.00030735681357327847.
Epoch 93/1000
Epoch 00093: val_acc did not improve from 0.24967
Epoch 94/1000
Epoch 00094: val_acc did not improve from 0.24967
Epoch 95/1000
Epoch 00095: val_acc did not improve

KeyboardInterrupt: 

In [None]:
# plot confuison matrix
preds = np.argmax(model.predict(X_test), axis = 1)
y_orig = np.argmax(y_test, axis = 1)
cm = confusion_matrix(preds, y_orig)

keys = OrderedDict(sorted(genres.items(), key=lambda t:t[1])).keys()

plt.figure(figsize=(10,10))
plot_confusion_matrix(cm, keys, normalize=True)

In [None]:
# majority voting
preds = model.predict(X_test, batch_size=batch_size, verbose=0)

scores_songs = np.split(np.argmax(preds, axis=1), 300)
scores_songs = [majority_vote(scores) for scores in scores_songs]

label = np.split(np.argmax(y_test, axis=1), 300)
label = [majority_vote(l) for l in label]

from sklearn.metrics import accuracy_score

print("majority voting system (acc) = {:.3f}".format(accuracy_score(label, scores_songs)))