In [48]:
from __future__ import print_function
from __future__ import absolute_import

import warnings
import numpy as np
import os
import itertools
import sys
import matplotlib.pyplot as plt
from collections import OrderedDict
from sklearn.metrics import confusion_matrix
from copy import copy

import tensorflow as tf

from tensorflow.keras.utils import Sequence
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model, Sequential

from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Activation, AveragePooling2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Input, Lambda, BatchNormalization, Reshape, Permute
from tensorflow.keras.layers import Concatenate, concatenate, Add, add, Multiply, multiply

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.models import load_model

from tensorflow.keras import backend as K

NUM_OF_GENRES = 10
BATCH_SIZE = 128

In [49]:
# data generator
class GTZANGenerator(Sequence):
    def __init__(self, X, y, batch_size=BATCH_SIZE, is_test = False):
        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.is_test = is_test
    
    def __len__(self):
        return int(np.ceil(len(self.X)/self.batch_size))
    
    def __getitem__(self, index):
        # Get batch indexes
        signals = self.X[index*self.batch_size:(index+1)*self.batch_size]

        # Apply data augmentation
        if not self.is_test:
            pass
            #signals = self.__augment(signals)
            
        return signals, self.y[index*self.batch_size:(index+1)*self.batch_size]
    
    def __augment(self, signals, hor_flip = 0.5, random_cutout = 0.5):
        spectrograms =  []
        for s in signals:
            signal = copy(s)
            
            # Perform horizontal flip
            #if np.random.rand() < hor_flip:
                #signal = np.flip(signal, 1)

            # Perform random cutoout of some frequency/time
            if np.random.rand() < random_cutout:
                lines = np.random.randint(signal.shape[0], size=3)
                cols = np.random.randint(signal.shape[0], size=4)
                signal[lines, :, :] = -80 # dB
                signal[:, cols, :] = -80 # dB

            spectrograms.append(signal)
        return np.array(spectrograms)
    
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.X))
        np.random.shuffle(self.indexes)
        return None

In [50]:
inp = Input( shape=(256,256,1) )
emb = inp
filt = Conv2D( 16, kernel_size=(256,32), strides=(256,8), use_bias=True, activation='relu', padding='same')(emb)
filt = Conv2D( 32, kernel_size=(256,32), strides=(256,8), use_bias=True, activation='relu', padding='same')(emb)
# filt = MaxPooling2D(pool_size=(2,2))(filt)
# filt = Dropout(0.25)(filt)

# filt = Conv2D( 64, kernel_size=(128,16), strides=(128,4), use_bias=True, activation='relu', padding='same')(emb)
# filt = Conv2D( 128, kernel_size=(128,16), strides=(128,4), use_bias=True, activation='relu', padding='same')(emb)
# filt = MaxPooling2D(pool_size=(2,2), strides=(2,2))(filt)
# filt = Dropout(0.25)(filt)

# filt = Conv2D( 256, kernel_size=(64,4), strides=(64,1), use_bias=True, activation='relu', padding='same')(emb)
# filt = Conv2D( 512, kernel_size=(64,4), strides=(64,1), use_bias=True, activation='relu', padding='same')(emb)
# filt = MaxPooling2D(pool_size=(2,2), strides=(2,2))(filt)
# filt = Dropout(0.25)(filt)

# feat = Flatten()(filt)
# oup = Dense( num_genres, activation='softmax')(feat)

model = Model(inp, filt)
model.summary()

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        [(None, 256, 256, 1)]     0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 1, 32, 32)         262176    
Total params: 262,176
Trainable params: 262,176
Non-trainable params: 0
_________________________________________________________________


In [51]:
def create_model(input_shape, num_genres):
    inp = Input( shape=input_shape )
    emb = inp
    filt = Conv2D( 128, kernel_size=(12,1396), strides=(3,1396), use_bias=True, activation='relu', padding='valid')(emb)
    attn = Conv2D( 128, kernel_size=(12,1396), strides=(3,1396), use_bias=True, activation='sigmoid', padding='valid')(emb)
    
    gated = Multiply()([filt, attn])
    feat = Flatten()(gated)
    oup = Dense( num_genres, activation='softmax')(feat)
    
    model = Model(inp, oup)
    
    return model

In [52]:
# confusion matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
# majority vote
def majority_vote(scores):
    values, counts = np.unique(scores,return_counts=True)
    ind = np.argmax(counts)
    return values[ind]


In [53]:
# load data

print('============================================================================')
print('Loading dataset...')
print('============================================================================')
X_train = np.load('/home/jaehwlee/Genre_classification/GNN/1396mel_data/1396X_train.npy')
X_valid = np.load('/home/jaehwlee/Genre_classification/GNN/1396mel_data/1396X_valid.npy')
X_test = np.load('/home/jaehwlee/Genre_classification/GNN/1396mel_data/1396X_test.npy')
y_train = np.load('/home/jaehwlee/Genre_classification/GNN/1396mel_data/1396y_train.npy')
y_valid = np.load('/home/jaehwlee/Genre_classification/GNN/1396mel_data/1396y_valid.npy')
y_test = np.load('/home/jaehwlee/Genre_classification/GNN/1396mel_data/1396y_test.npy')

# X_train = np.load('/home/jaehwlee/SE-ResNeXt/mel_data/0519X_train.npy')
# X_test =np.load('/home/jaehwlee/SE-ResNeXt/mel_data/0519X_test.npy')
# y_train = np.load('/home/jaehwlee/SE-ResNeXt/mel_data/0519y_train.npy')
# y_test = np.load('/home/jaehwlee/SE-ResNeXt/mel_data/0519y_test.npy')

song_samples = 660000
genres = {'metal': 0, 'disco': 1, 'classical': 2, 'hiphop': 3, 'jazz': 4, 
          'country': 5, 'pop': 6, 'blues': 7, 'reggae': 8, 'rock': 9}

print(X_train.shape)
print(X_valid.shape)
print(X_test.shape)

print(y_train.shape)
print(y_valid.shape)
print(y_test.shape)

print('============================================================================')
print('complete!')
print('============================================================================')

Loading dataset...
(640, 96, 1366, 1)
(160, 96, 1366, 1)
(200, 96, 1366, 1)
(640,)
(160,)
(200,)
complete!


In [54]:
# callback function
reduceLROnPlat = ReduceLROnPlateau(
    monitor='val_loss', 
    factor=0.95,
    patience=3,
    verbose=1,
    mode='min',
    min_delta=0.0001,
    cooldown=2,
    min_lr=1e-5
)

mc = ModelCheckpoint('0707_mel.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True, save_weights_only=True)

rl = ReduceLROnPlateau(monitor='val_loss', factor=0.95, patience=3, verbose=1, mode='min', min_delta=0.0001, cooldown=2, min_lr=1e-5)
callback_list = [mc,rl]

In [55]:
# data generating
batch_size = BATCH_SIZE
#train_generator = GTZANGenerator(X_train, y_train)
#steps_per_epoch = np.ceil(len(X_train)/batch_size)

#validation_generator = GTZANGenerator(X_valid, y_valid)
#val_steps = np.ceil(len(X_test)/batch_size)

In [56]:
# compile model and summary
model = create_model(X_train[0].shape, NUM_OF_GENRES)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
#model.summary()

ValueError: Negative dimension size caused by subtracting 1396 from 1366 for 'conv2d_19/Conv2D' (op: 'Conv2D') with input shapes: [?,96,1366,1], [12,1396,1,128].

In [None]:
# fit model
# data generating
# batch_size = BATCH_SIZE
# train_generator = GTZANGenerator(X_train, y_train)
# steps_per_epoch = np.ceil(len(X_train)/batch_size)

# validation_generator = GTZANGenerator(X_valid, y_valid)
# val_steps = np.ceil(len(X_test)/batch_size)

# hist = model.fit_generator(
#     train_generator,
#     steps_per_epoch=steps_per_epoch,
#     validation_data=validation_generator,
#     validation_steps=val_steps,
#     epochs=150,
#     verbose=1,
#     callbacks=[mc,rl])

model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=100, callbacks=[mc,rl])

model.save('0707_mel.h5')

score = model.evaluate(X_test, y_test, verbose=0)
print("val_loss = {:.3f} and val_acc = {:.3f}".format(score[0], score[1]))

In [None]:
# plot confuison matrix
preds = np.argmax(model.predict(X_test), axis = 1)
y_orig = np.argmax(y_test, axis = 1)
cm = confusion_matrix(preds, y_orig)

keys = OrderedDict(sorted(genres.items(), key=lambda t: t[1])).keys()

plt.figure(figsize=(10,10))
plot_confusion_matrix(cm, keys, normalize=True)

In [None]:
# majority voting
preds = model.predict(X_test, batch_size=BATCH_SIZE, verbose=0)

scores_songs = np.split(np.argmax(preds, axis=1), 300)
scores_songs = [majority_vote(scores) for scores in scores_songs]

label = np.split(np.argmax(y_test, axis=1), 300)
label = [majority_vote(l) for l in label]

from sklearn.metrics import accuracy_score

print("majority voting system (acc) = {:.3f}".format(accuracy_score(label, scores_songs)))