In [1]:
import datetime
import os
import random

import keras
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import (Activation, BatchNormalization, Dense,
                                     Dropout, Flatten, Reshape, Input,
                                     TimeDistributed)
from tensorflow.keras.models import Model

tf.compat.v1.disable_eager_execution()

import midi

In [2]:
%load_ext tensorboard

In [3]:
NUM_EPOCHS = 50    
DO_RATE = 0.1
BN_M = 0.9
BATCH_SIZE = 128

np.random.seed(0)
random.seed(0)

In [None]:
y = np.load('pypianorollSamples.npy').swapaxes(2,3)
y_shape = y.shape

In [6]:
y_train = y[:int(y.shape[0]*0.80)]
y_valid = y[int(y.shape[0]*0.80):y.shape[0]]

y_test_song = np.copy(y[0])

In [7]:
class Sampling(keras.layers.Layer):
    def call(self, inputs):
        mean, log_var = inputs 
        return K.random_normal(tf.shape(log_var), stddev=0.1) * K.exp(log_var / 2) + mean


In [8]:
PARAM_SIZE = 72

In [9]:
# x_in = tf.keras.layers.Input(shape=y_shape[1:])
# z = TimeDistributed(Dense(128, activation='relu'))(x_in)
# z = Reshape((y_shape[1], -1))(z)
# z = TimeDistributed(keras.Sequential([
#                                     Dense(1152, activation='relu'),
#                                     # Dense(512, activation='relu'), 
#                                     Dense(PARAM_SIZE, activation='relu')])
#                                     )(z)
# z = Flatten()(z)
# z = BatchNormalization(momentum=BN_M)(z)
# codings_mean = keras.layers.Dense(PARAM_SIZE)(z) # μ
# codings_log_var = keras.layers.Dense(PARAM_SIZE)(z) # γ
# codings = Sampling()([codings_mean, codings_log_var])
# variational_encoder = keras.Model(inputs=[x_in], outputs=[codings_mean, codings_log_var, codings])

# decoder_inputs = keras.layers.Input(shape=[PARAM_SIZE])
# x = Dense(PARAM_SIZE, activation='relu', name='encoder')(decoder_inputs)
# x = BatchNormalization(momentum=BN_M)(x)
# x = Activation('relu')(x)
# x = Dropout(DO_RATE)(x)
# x = Dense(PARAM_SIZE*4, activation='relu')(x)
# x = Reshape((4, PARAM_SIZE))(x)
# x = TimeDistributed(keras.Sequential([
#                                     Dense(PARAM_SIZE*4, activation='relu'),
#                                     Dense(PARAM_SIZE*16, activation='relu'),
#                                     # Dense(PARAM_SIZE*64, activation='relu'),
#                                     Dense(PARAM_SIZE*128, activation='relu'),
#                                 ]))(x)
# # x = BatchNormalization(momentum=BN_M)(x)
# # x = Activation('relu')(x)
# # x = Dropout(DO_RATE)(x)
# x = Reshape((y_shape[1], PARAM_SIZE, 128))(x)
# x = TimeDistributed(Dense(512, activation='sigmoid'))(x)
# outputs = Reshape((y_shape[1], PARAM_SIZE, 512))(x)
# variational_decoder = keras.Model(inputs=[decoder_inputs], outputs=[x])

In [None]:
x_in = Input(shape=y_shape[1:])
z = TimeDistributed(Dense(256, activation='relu'))(x_in)
z = Reshape((y_shape[1], -1))(z)
z = TimeDistributed(Dense(1024, activation='relu'))(z)
z = TimeDistributed(Dense(512, activation='relu'))(z)
z = Flatten()(z)
z = Dense(1024, activation='relu')(z)
z = Dense(512, activation='relu')(z)
z = BatchNormalization(momentum=BN_M)(z)
# z = Activation('relu')(z)
# z = Dropout(DO_RATE)(z)
codings_mean = Dense(PARAM_SIZE)(z) # μ
codings_log_var = Dense(PARAM_SIZE)(z) # γ
codings = Sampling()([codings_mean, codings_log_var])
variational_encoder = Model(inputs=[x_in], outputs=[codings_mean, codings_log_var, codings])

decoder_inputs = Input(shape=[PARAM_SIZE])
x = Dense(PARAM_SIZE, activation='relu', name= 'encoder')(decoder_inputs)
x = Dense(512, activation='relu')(x)
x = Dense(1024, activation='relu')(x)
x = Dense(2048, activation='relu')(x)
x = Reshape((4,512))(x)
x = TimeDistributed(Dense(1024, activation='relu'))(x)
x = TimeDistributed(Dense(18432, activation='relu'))(x)
x = BatchNormalization(momentum=BN_M)(x)
x = Activation('relu')(x)
x = Dropout(DO_RATE)(x)
x = Reshape((4,72, 256))(x)
x = TimeDistributed(Dense(512, activation='relu'))(x)
outputs = Reshape((y_shape[1], PARAM_SIZE, 512))(x)
variational_decoder = Model(inputs=[decoder_inputs], outputs=[x])

In [11]:
# variational_decoder.summary()
# variational_encoder.summary()
# variational_ae.summary()

In [None]:
_, _, codings = variational_encoder(x_in)
reconstructions = variational_decoder(codings)
variational_ae = keras.Model(inputs=[x_in], outputs=[reconstructions])

latent_loss = -0.5 * K.sum(
1 + codings_log_var - K.exp(codings_log_var) - K.square(codings_mean),
axis=-1)

variational_ae.add_loss(K.mean(latent_loss) * 10000 / 147456.)
variational_ae.compile(loss="binary_crossentropy", optimizer="rmsprop")

In [13]:
log_dir = os.path.join(
    "logs",
    datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
)
callbacks = [
    tf.keras.callbacks.TensorBoard(log_dir=log_dir),
]

In [None]:
history = variational_ae.fit(
    y_train,
    y_train,
    epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(y_valid, y_valid),
    callbacks=callbacks,
    verbose=2,
)

loss = history.history["loss"][-1]
print(f"Train Loss: {loss}")

write_dir = 'HistoryVAAuto/'
variational_ae.save('HistoryVAAuto/variational_ae.h5')

In [15]:
# teste = np.random.normal(0.0, 1.0, (PARAM_SIZE))

In [16]:
# import matplotlib.pyplot as plt
# # teste = np.random.normal(0.0, 1.0, (PARAM_SIZE))
# y_song = variational_decoder.predict(teste.reshape(1,PARAM_SIZE))

# for i in range(4):
#     a = np.where(y_song[0][i] < 0.05, y_song[0][i], 1)
#     a = np.where(a > 0.05, a, 0)
#     plt.figure(figsize=(16, 64))
#     plt.imshow(a)

# plt.show()

In [17]:
func = K.function([variational_decoder.get_layer('encoder').input, K.learning_phase()], [variational_decoder.layers[-1].output])
enc = Model(inputs=variational_encoder.input, outputs=variational_encoder.layers[-1].output)

In [28]:
import matplotlib.pyplot as plt
teste = np.random.normal(0.0, 1.0, (PARAM_SIZE))
tresh = 0.06

x_enc = np.squeeze(enc.predict(y))
x_mean = np.mean(x_enc, axis=0)
x_cov = np.cov((x_enc - x_mean).T)
_, s, v = np.linalg.svd(x_cov)
e = np.sqrt(s)

print(f"Means: {x_mean[:6]}")
print(f"Evals: {e[:6]} ")

x_vec = x_mean + np.dot(teste * e, v)
y_song = variational_decoder.predict(x_vec.reshape(1,PARAM_SIZE))
song_binarize = np.where(y_song[0] >= tresh, y_song[0], 0)
song_binarize = np.where(y_song[0] < tresh, y_song[0], 1)

# for i in range(4):
#     plt.figure(figsize=(16, 64))
#     plt.imshow(song_binarize[i])

plt.show()

Means: [ 0.01081835 -0.02783128  0.02302202  0.00344592  0.03728662  0.0241282 ]
Evals: [0.30996126 0.15812452 0.11834104 0.11726648 0.11682753 0.11590689] 


In [19]:
import pypianoroll

def sample2midi(path, sample, resolution):
    all_notes = np.zeros((2048,128), dtype=np.uint8)
    all_notes[:, 24:96] = sample.reshape(2048,72)

    pypianoroll.write(
        path=path, 
        multitrack=pypianoroll.Multitrack(
            resolution=resolution,
            tracks=[
                pypianoroll.BinaryTrack(
                    program=0, is_drum=False, pianoroll=all_notes
                    )
                ]
            )
        )

In [24]:
def make_rand_songs(write_dir, rand_vecs, tresh):
    for i in range(rand_vecs.shape[0]):
        x_rand = rand_vecs[i:i+1]
        y_song = variational_decoder.predict(x_rand.reshape(1,PARAM_SIZE))
        song_binarize = np.where(y_song[0] >= tresh, y_song[0], 0)
        song_binarize = np.where(y_song[0] < tresh, y_song[0], 1)
        sample2midi(write_dir + 'rand' + str(i) + '.mid', song_binarize.swapaxes(2,1), 8)
        for i in range(4):
            plt.figure(figsize=(4, 4))
            plt.imshow(song_binarize[i])
        break
def make_rand_songs_normalized(write_dir, rand_vecs):
    x_enc = np.squeeze(enc.predict(y))

    x_mean = np.mean(x_enc, axis=0)
    x_cov = np.cov((x_enc - x_mean).T)
    _, s, v = np.linalg.svd(x_cov)
    e = np.sqrt(s)

    print(f"Means: {x_mean[:6]}")
    print(f"Evals: {e[:6]} ")

    x_vecs = x_mean + np.dot(rand_vecs * e, v)
    make_rand_songs(write_dir, x_vecs, 0.5)

In [27]:
sample2midi("teste.mid", song_binarize.swapaxes(2,1), 8)
# rand_vecs = np.random.normal(0.0, 1.0, (10, PARAM_SIZE))
# make_rand_songs_normalized("", rand_vecs)