# SKETCHBOOK 2
Using the sine spectrograms dataset to obtain the best architecture

In [None]:
import numpy as np
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from keras import backend as K
from keras import regularizers
from keras.layers.advanced_activations import LeakyReLU


# definition of the sparsity penalty
# the KL divergence describe the penalty term to be applied to the loss function
def KL(p, p_hat):
    return (p * K.log(p / p_hat)) + ((1-p) * K.log((1-p) / (1-p_hat)))

class SparseReg(regularizers.Regularizer):

    def __init__(self, p=0.05, beta=0.1,p_hat=0.0):
        self.p = K.cast_to_floatx(p)
        self.beta = K.cast_to_floatx(beta)
        self.p_hat = K.cast_to_floatx(p_hat)

    def __call__(self, x):
        regularization = 0.
        # p_hat needs to be the average activation of the units in the hidden layer.
        self.p_hat = K.sum(K.mean(x))

        regularization += self.beta * KL(self.p,self.p_hat)
        return regularization

    def get_config(self):
        return {'p': float(self.p),
                'beta': float(self.beta)
               }

In [None]:
# DATA LOADING AND INITIALISATION
# load the dataset
dataset = np.load('dataset_test.npz','r')
data = dataset['spectrograms']
norms = dataset['norms']
print('Dataset with shape', data.shape)

# reshape the dataset in 4-dimension tensor
data = np.reshape(data, (data.shape[0], data.shape[1], data.shape[2], 1))
print('Dataset reshaping in tensor of shape', data.shape)

# split between train and validation set
train_set = data[:1500]
validation_set = data[1500:]
print('Train batch:',train_set.shape)
print('Validation batch:',validation_set.shape)

In [None]:
# plot spectrograms for check
import librosa
import librosa.display
import matplotlib.pyplot as plt

test = train_set[900]
norm = norms[900]
test = np.reshape(test,newshape=(128,88))

fig = plt.figure(figsize=[12, 4])
librosa.display.specshow(test*norm, sr=44100, y_axis='mel', x_axis='time')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel spectrogram')
plt.tight_layout()
plt.show()
fig.savefig('Mel spectrogram')

In [None]:
# ARCHITECTURE DEFINITION
# obs: when doing a greedy layer-wise training set the attribute trainable to False to already trained layers

# input structure
input_img = Input(shape=(128,776,1))
print('Input tensor shape:', input_img.shape)

# encoding structure
x = Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(0.01), 
           activity_regularizer=SparseReg(beta=5e-1,p=1e-2), name='lay1', trainable=False)(input_img)
x = LeakyReLU(1e-1)(x)
x = MaxPooling2D(pool_size=(2,2), strides=(2), padding='same')(x)
x = Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(0.01), 
           activity_regularizer=SparseReg(beta=5e-1,p=1e-2),name='lay2')(x)
x = LeakyReLU(1e-1)(x)
#x = MaxPooling2D(pool_size=(2,2), strides=(2), padding='same')(x)
#x = Conv2D(512, (3, 3), padding='same', activity_regularizer=SparseReg(beta=5e-1,p=1e-2),name='lay3')(x)
#x = LeakyReLU(1e-1)(x)
#x = Conv2D(1024, (1, 1), padding='same', activity_regularizer=SparseReg(beta=5e-1,p=1e-2),name='lay3bis')(x)
#x = LeakyReLU(1e-1)(x)
#encoded = MaxPooling2D(pool_size=(2,2), strides=(2), padding='same')(x)
#print('Encoded representation tensor shape:', encoded.shape)

# decoding structure
#x = Conv2D(1024, (1, 1), padding='same', activity_regularizer=SparseReg(beta=5e-1,p=1e-2),name='lay4')(encoded)
#x = LeakyReLU(1e-1)(x)
#x = Conv2D(512, (3, 3), padding='same', activity_regularizer=SparseReg(beta=5e-1,p=1e-2),name='lay4bis')(x)
#x = LeakyReLU(1e-1)(x)
#x = UpSampling2D(size=(2,2))(x)
#x = Conv2D(256, (3, 3), padding='same', activity_regularizer=SparseReg(beta=5e-1,p=1e-2),name='lay5')(x)
#x = LeakyReLU(1e-1)(x)
#x = UpSampling2D(size=(2,2))(x)
x = Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(0.01), 
           activity_regularizer=SparseReg(beta=5e-1,p=1e-2), name='lay6')(x)
x = LeakyReLU(1e-1)(x)
x = UpSampling2D(size=(2,2))(x)
x = Conv2D(1, (3, 3), padding='same', kernel_regularizer=regularizers.l2(0.01), 
           activity_regularizer=SparseReg(beta=5e-1,p=1e-2), name='lay7')(x)
decoded = LeakyReLU(1e-1)(x)
print('Decoded representation tensor shape:', decoded.shape)


# compile the model
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

In [None]:
# using mnist dataset
from keras.datasets import mnist
import numpy as np
(x_train, _),(x_test, _) = mnist.load_data()

x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))  # adapt this if using `channels_first` image data format
x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))  # adapt this if using `channels_first` image data format

In [None]:
# add noise to the data to force the autoencoder not to learn the identity function
# obs: it screws up spectrograms!
noise_factor = 0.3
x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape)
x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape)

x_train_noisy = np.clip(x_train_noisy, 0., 1.)
x_test_noisy = np.clip(x_test_noisy, 0., 1.)

In [None]:
# LOAD TRAINED LAYERS
# load weights in an architecture which has layers in common
autoencoder.load_weights('specs_leaky_one_cae_weights.h5',by_name=True)

In [None]:
# MODEL TRAINING
# obs: do not use noise injection with spectrograms
hist = autoencoder.fit(train_set, train_set,
                epochs=1,
                batch_size=4,
                validation_data=(validation_set, validation_set),
                )

In [None]:
# IF USING MNIST
from keras.datasets import mnist
import numpy as np

(x_train, _), (x_test, _) = mnist.load_data()

x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))  # adapt this if using `channels_first` image data format
x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))  # adapt this if using `channels_first` image data format


In [None]:
# write losses and accuracies in a json file
import json

with open('data.json', 'w') as fp:
    json.dump(hist.history, fp)