In [7]:
import numpy as np
import pandas as pd

import keras
import tensorflow as tf

from keras import layers

dataset = pd.read_pickle("DATA_diz.pkl")


In [2]:
dataset.keys()

dict_keys(['song_id', 'song_timeSeries', 'MFCCs', 'Mel_spectrograms', 'sampling_rate', 'label'])

In [3]:
genre_dict = {
    0 : 'jazz',
    1 : 'rock',
    2 : 'hiphop',
    3 : "metal",
    4 : "pop",
    5 : "disco",
    6 : "blues",
    7 : "classical",
    8 : "country",
    9 : "reggae",
}

In [4]:
df = pd.DataFrame(dataset)
df.head(3)

Unnamed: 0,song_id,song_timeSeries,MFCCs,Mel_spectrograms,sampling_rate,label
0,jazz.00000_10_15,"[-0.011962891, -0.005706787, -0.00390625, -0.0...","[[[-312.6806, -311.4205, -321.67444, -308.2039...","[[[0.024068143, 0.026770458, 0.031402346, 0.07...",22050,jazz
1,jazz.00000_15_20,"[0.007904053, 0.015686035, 0.009796143, 0.0094...","[[[-226.86893, -167.50664, -140.55206, -128.56...","[[[0.010349594, 0.023792297, 0.031287283, 0.04...",22050,jazz
2,jazz.00000_20_25,"[0.050720215, 0.045684814, 0.044433594, 0.0443...","[[[-215.35652, -197.37381, -204.02322, -212.86...","[[[0.0783003, 0.08831336, 0.07720997, 0.071271...",22050,jazz


### Selelct all the songs of a genre in order to generate music of that genre
Only keep the MFCCs column and use that as input of the variational autoencoder

In [5]:
genre = 0 # jazz

df = df.loc[df['label'] == genre_dict[genre]]
df = df[['MFCCs']]
df.iloc[0,0].shape

(1, 13, 216)

### Convert the dataset into a Numpy array

In [6]:
df = np.array(df['MFCCs'])

print(df.shape)
print(df[0].shape)

(593,)
(1, 13, 216)


### Define the Encoder Network

In [8]:
def encoder(input_encoder):
  
    inputs = keras.Input(shape=input_encoder, name='input_layer')

    # Block-1
    x = layers.Conv2D(32, kernel_size=3, strides= 1, padding='same', name='conv_1')(inputs)
    x = layers.BatchNormalization(name='bn_1')(x)
    x = layers.LeakyReLU(name='lrelu_1')(x)
    
    # Block-2
    x = layers.Conv2D(64, kernel_size=3, strides= 2, padding='same', name='conv_2')(x)
    x = layers.BatchNormalization(name='bn_2')(x)
    x = layers.LeakyReLU(name='lrelu_2')(x)
    
    # Block-3
    x = layers.Conv2D(64, 3, 2, padding='same', name='conv_3')(x)
    x = layers.BatchNormalization(name='bn_3')(x)
    x = layers.LeakyReLU(name='lrelu_3')(x)
  
    # Block-4
    x = layers.Conv2D(64, 3, 1, padding='same', name='conv_4')(x)
    x = layers.BatchNormalization(name='bn_4')(x)
    x = layers.LeakyReLU(name='lrelu_4')(x)
   
    # Final Block
    flatten = layers.Flatten()(x)
    mean = layers.Dense(2, name='mean')(flatten)
    log_var = layers.Dense(2, name='log_var')(flatten)
    z = Sampling()([mean, log_var])

    encoder = tf.keras.Model(inputs, (mean, log_var, z), name="encoder")
    
    return encoder

### Define a Sampling layer for the latent variables

In [9]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

### Define the Decoder

In [None]:
def decoder(input_decoder):
    
    inputs = keras.Input(shape=input_decoder, name='input_layer')
    x = layers.Dense(3136, name='dense_1')(inputs)
    x = layers.Reshape((7, 7, 64), name='Reshape_Layer')(x)
   
    # Block-1
    x = layers.Conv2DTranspose(64, 3, strides= 1, padding='same',name='conv_transpose_1')(x)
    x = layers.BatchNormalization(name='bn_1')(x)
    x = layers.LeakyReLU(name='lrelu_1')(x)
  
    # Block-2
    x = layers.Conv2DTranspose(64, 3, strides= 2, padding='same', name='conv_transpose_2')(x)
    x = layers.BatchNormalization(name='bn_2')(x)
    x = layers.LeakyReLU(name='lrelu_2')(x)
    
    # Block-3
    x = layers.Conv2DTranspose(32, 3, 2, padding='same', name='conv_transpose_3')(x)
    x = layers.BatchNormalization(name='bn_3')(x)
    x = layers.LeakyReLU(name='lrelu_3')(x)
    
    # Block-4
    outputs = layers.Conv2DTranspose(1, 3, 1,padding='same', activation='sigmoid', name='conv_transpose_4')(x)
    model = tf.keras.Model(inputs, outputs, name="Decoder")
    return model