In [12]:
import warnings
import numpy as np
import os
import itertools
import sys
import matplotlib.pyplot as plt

from collections import OrderedDict
from sklearn.metrics import confusion_matrix
from copy import copy

import tensorflow as tf

from tensorflow.keras import Input, Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

tf.compat.v1.disable_eager_execution()

TRAIN_FEATURES_PATH = '/home/jaehwlee/Genre_classification/GNN/feature_data/X_train_features.npy'
VALID_FEATURES_PATH = '/home/jaehwlee/Genre_classification/GNN/feature_data/X_valid_features.npy'
TEST_FEATURES_PATH = '/home/jaehwlee/Genre_classification/GNN/feature_data/X_test_features.npy'

# Parameters
l2_reg = 5e-4         # Regularization rate for l2
learning_rate = 1e-3  # Learning rate for SGD
batch_size = 128       # Batch size
epochs = 100       # Number of training epochs
es_patience = 10      # Patience fot early stopping


In [13]:
# confusion matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
# majority vote
def majority_vote(scores):
    values, counts = np.unique(scores,return_counts=True)
    ind = np.argmax(counts)
    return values[ind]


In [14]:
# load data
print('============================================================================')
print('Loading dataset...')
print('============================================================================')
X_train = np.load('/home/jaehwlee/Genre_classification/GNN/mel_data/X_train.npy')
X_valid = np.load('/home/jaehwlee/Genre_classification/GNN/mel_data/X_valid.npy')
X_test = np.load('/home/jaehwlee/Genre_classification/GNN/mel_data/X_test.npy')
y_train = np.load('/home/jaehwlee/Genre_classification/GNN/mel_data/y_train.npy')
y_valid = np.load('/home/jaehwlee/Genre_classification/GNN/mel_data/y_valid.npy')
y_test = np.load('/home/jaehwlee/Genre_classification/GNN/mel_data/y_test.npy')

song_samples = 660000
genres = {'metal': 0, 'disco': 1, 'classical': 2, 'hiphop': 3, 'jazz': 4, 
          'country': 5, 'pop': 6, 'blues': 7, 'reggae': 8, 'rock': 9}

print(X_train.shape)
print(X_valid.shape)
print(X_test.shape)

print(y_train.shape)
print(y_valid.shape)
print(y_test.shape)

print('============================================================================')
print('complete!')
print('============================================================================')

Loading dataset...
(12160, 256, 256, 1)
(3040, 256, 256, 1)
(3800, 256, 256, 1)
(12160, 10)
(3040, 10)
(3800, 10)
complete!


In [15]:
def feature_extract_model():
    input_mel = Input(shape=(256,256,1))

    x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_mel)
    x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(128, (3,3), activation='relu', padding='same')(x)
    x = Conv2D(128, (3,3), activation='relu', padding='same')(x)
    x = GlobalAveragePooling2D()(x)
    out = Dense(10, activation='softmax')(x)


    model = Model(inputs=input_mel, outputs=out)
    return model

In [None]:
model = feature_extract_model()
validation_data = (X_valid, y_valid)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
mc = ModelCheckpoint('128test.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True, save_weights_only=True)

rl = ReduceLROnPlateau(monitor='val_loss', factor=0.95, patience=3, verbose=1, mode='min', min_delta=0.0001, cooldown=2, min_lr=1e-5)
callback_list = [mc,rl]

model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=validation_data,
          epochs=epochs,
          callbacks=[
              mc,rl
          ])
model.save('128test.h5')
# Evaluate model
print('Evaluating model.')
eval_results = model.evaluate(X_test,
                              y_test,
                              batch_size=batch_size)
print('Done.\n'
      'Test loss: {}\n'
      'Test acc: {}'.format(*eval_results))

Train on 12160 samples, validate on 3040 samples
Epoch 1/100
Epoch 00001: val_acc improved from -inf to 0.38125, saving model to 128test.h5
Epoch 2/100
Epoch 00002: val_acc improved from 0.38125 to 0.49737, saving model to 128test.h5
Epoch 3/100
Epoch 00003: val_acc improved from 0.49737 to 0.58257, saving model to 128test.h5
Epoch 4/100
Epoch 00004: val_acc improved from 0.58257 to 0.60033, saving model to 128test.h5
Epoch 5/100
Epoch 00005: val_acc improved from 0.60033 to 0.63750, saving model to 128test.h5
Epoch 6/100
Epoch 00006: val_acc improved from 0.63750 to 0.66217, saving model to 128test.h5
Epoch 7/100
Epoch 00007: val_acc improved from 0.66217 to 0.66513, saving model to 128test.h5

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0009500000451225787.
Epoch 8/100
Epoch 00008: val_acc improved from 0.66513 to 0.68783, saving model to 128test.h5
Epoch 9/100
Epoch 00009: val_acc improved from 0.68783 to 0.70428, saving model to 128test.h5
Epoch 10/100
Epoch 00010: v

Epoch 28/100
Epoch 00028: val_acc improved from 0.74375 to 0.75362, saving model to 128test.h5
Epoch 29/100
Epoch 00029: val_acc did not improve from 0.75362
Epoch 30/100
Epoch 00030: val_acc did not improve from 0.75362
Epoch 31/100
Epoch 00031: val_acc did not improve from 0.75362

Epoch 00031: ReduceLROnPlateau reducing learning rate to 0.0006983372120885178.
Epoch 32/100
Epoch 00032: val_acc improved from 0.75362 to 0.75592, saving model to 128test.h5
Epoch 33/100
Epoch 00033: val_acc did not improve from 0.75592
Epoch 34/100
Epoch 00034: val_acc did not improve from 0.75592
Epoch 35/100
Epoch 00035: val_acc improved from 0.75592 to 0.75888, saving model to 128test.h5

Epoch 00035: ReduceLROnPlateau reducing learning rate to 0.0006634203542489559.
Epoch 36/100
Epoch 00036: val_acc did not improve from 0.75888
Epoch 37/100

In [None]:
model.load_weights("128test.h5")
backbone_model = Model(inputs=model.input, outputs=model.get_layer('global_average_pooling2d').output)
X_train_features = backbone_model.predict(X_train)
X_valid_features = backbone_model.predict(X_valid)
X_test_features = backbone_model.predict(X_test)
np.save(TRAIN_FEATURES_PATH, X_train_features)
np.save(VALID_FEATURES_PATH, X_valid_features)
np.save(TEST_FEATURES_PATH, X_test_features)

print('save complete')