In [16]:
# Convolutional autoencoder using 1D convolution

import numpy as np

from keras.layers import Input # define the input shape for the model
from keras.layers import Conv1D, MaxPooling1D, UpSampling1D # for the convnet structure
from keras.models import Model # for the overall definition


from keras.initializers import Constant # bias initialisation
from keras.initializers import TruncatedNormal # kernel initialissation
from keras.layers.advanced_activations import LeakyReLU # activation function (from NSynth)

In [2]:
# DEEP LEARNING PART

# load the training dataset
dataset = np.load('spectrogram_test.npz','r')
data_training = dataset['dataset']
data_training = np.transpose(data_training)
print('Training dataset with shape', data_training.shape)
print('Batch size:', data_training.shape[0])

Training dataset with shape (1145, 500, 128)
Batch size: 1145


In [3]:
# Convolutional autoencoder structure using the Keras Model API
# define input shape
input_img = Input(shape=(500,128))
print('Some information about tensor expected shapes')
print('Input tensor shape:', input_img.shape)

Some information about tensor expected shapes
Input tensor shape: (?, 500, 128)


In [18]:
# define encoder convnet
# obs: 1D convolution implemented
x = Conv1D(filters=128,kernel_size=4,activation=LeakyReLU(),padding='causal',dilation_rate=4,bias_initializer=Constant(0.1),kernel_initializer=TruncatedNormal())(input_img)
x = Conv1D(filters=256,kernel_size=(4),activation=LeakyReLU(),padding='causal',dilation_rate=2,bias_initializer=Constant(0.1),kernel_initializer=TruncatedNormal())(x)
x = MaxPooling1D(pool_size=4,strides=4)(x)
encoded = Conv1D(filters=512,kernel_size=4,activation=LeakyReLU(),padding='causal',bias_initializer=Constant(0.1),kernel_initializer=TruncatedNormal())(x)
print('Encoded representation tensor shape:', encoded.shape)

# define decoder convnet
x = Conv1D(filters=256,kernel_size=4,activation=LeakyReLU(),padding='causal',bias_initializer=Constant(0.1),kernel_initializer=TruncatedNormal())(encoded)
x = UpSampling1D(size=4)(x)
x = Conv1D(filters=128,kernel_size=4,activation=LeakyReLU(),padding='causal',dilation_rate=2,bias_initializer=Constant(0.1),kernel_initializer=TruncatedNormal())(x)
decoded = Conv1D(filters=1,kernel_size=4,activation=LeakyReLU(),padding='causal',dilation_rate=4,bias_initializer=Constant(0.1),kernel_initializer=TruncatedNormal())(x)
print('Decoded representation tensor shape:', decoded.shape)

Encoded representation tensor shape: (?, 125, 512)
Decoded representation tensor shape: (?, 500, 1)


In [13]:
# define overal autoencoder model
cae = Model(inputs=input_img, outputs=decoded)
cae.compile(optimizer='adam', loss='mse')

# check for equal size
# obs: the missing value is the batch_size
if input_img.shape[1:] != decoded.shape[1:]: print('alert: in/out dimension mismatch')

alert: in/out dimension mismatch


In [17]:
# reshape the training set in 4-dimension tensor
data_training = np.reshape(data_training, (len(data_training), data_training.shape[1], data_training.shape[2], 1))
print('Data training reshaping in tensor of shape', data_training.shape)

Data training reshaping in tensor of shape (319, 2584, 128, 1)


In [None]:
# Autoencoder training
cae.fit(data_training,data_training,
        epochs=1,
        batch_size=4, # minibatch of 4 for memory optimisation
        #callbacks=[TensorBoard(log_dir='/tmp/autoencoder')]
       )


# Saving the weights
import h5py
cae.save('cae_dtu_k80_structure1.h5')