In [1]:
import datetime
import os
import time
import random

import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow.keras import backend as K
from tensorflow.keras.layers import (Activation, BatchNormalization, Dense,
                                     Dropout, Flatten, Input, Reshape,
                                     TimeDistributed)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop

tf.compat.v1.disable_eager_execution()

In [2]:
%load_ext tensorboard

In [3]:
import midi

In [4]:
NUM_EPOCHS = 250
LR = 0.001
WRITE_HISTORY = True
NUM_RAND_SONGS = 10
DO_RATE = 0.1
BN_M = 0.9

BATCH_SIZE = 256
MAX_LENGTH = 16
PARAM_SIZE = 120

np.random.seed(0)
random.seed(0)

In [5]:
y_samples = np.load('samples.npy')
y_lengths = np.load('lengths.npy')
num_songs = y_lengths.shape[0]

y_shape = (num_songs, MAX_LENGTH) + y_samples.shape[1:]
y_orig = np.zeros(y_shape, dtype=y_samples.dtype)

* y_samples = (_, 96, 96)
* y_lengths = (238,)  cada elemento qnts ticks tem a musica
* y_shape = (238, 16, 96, 96)
* y_origin = Array de zeros (238, 16, 96, 96)

In [6]:
cur_ix = 0
for i in range(num_songs):
    end_ix = cur_ix + y_lengths[i]
    for j in range(MAX_LENGTH):
        k = j % (end_ix - cur_ix) 
        y_orig[i,j] = y_samples[cur_ix + k]
    cur_ix = end_ix

In [7]:
y = np.copy(y_orig)

y_train = y[:125]
y_valid = y[125:]

y_test_song = np.copy(y[0])
midi.samples_to_midi(y_test_song, 'gt.mid', 16)

In [8]:
x_in = Input(shape=y_shape[1:])
x = Reshape((y_shape[1], -1))(x_in)
x = TimeDistributed(Dense(2000, activation='relu'))(x)
x = TimeDistributed(Dense(200, activation='relu'))(x)
x = Flatten()(x)
x = Dense(1600, activation='relu')(x)
x = Dense(PARAM_SIZE)(x)
x = BatchNormalization(momentum=BN_M, name='pre_encoder')(x)
x = Dense(1600, name='encoder')(x)
x = BatchNormalization(momentum=BN_M)(x)
x = Activation('relu')(x)
if DO_RATE > 0:
    x = Dropout(DO_RATE)(x)
x = Dense(MAX_LENGTH * 200)(x)
x = Reshape((MAX_LENGTH, 200))(x)
x = TimeDistributed(BatchNormalization(momentum=BN_M))(x)
x = Activation('relu')(x)
if DO_RATE > 0:
    x = Dropout(DO_RATE)(x)
x = TimeDistributed(Dense(2000))(x)
x = TimeDistributed(BatchNormalization(momentum=BN_M))(x)
x = Activation('relu')(x)
if DO_RATE > 0:
    x = Dropout(DO_RATE)(x)
x = TimeDistributed(Dense(y_shape[2] * y_shape[3], activation='sigmoid'))(x)
x = Reshape((y_shape[1], y_shape[2], y_shape[3]))(x)

model = Model(x_in, x)
model.compile(optimizer=RMSprop(learning_rate=LR), loss='binary_crossentropy')

Instructions for updating:
Colocations handled automatically by placer.


In [9]:
func = K.function([model.get_layer('encoder').input, K.learning_phase()], [model.layers[-1].output])
enc = Model(inputs=model.input, outputs=model.get_layer('pre_encoder').output)

In [10]:
rand_vecs = np.random.normal(0.0, 1.0, (NUM_RAND_SONGS, PARAM_SIZE))
np.save('rand.npy', rand_vecs)

In [11]:
def make_rand_songs(write_dir, rand_vecs):
    for i in range(rand_vecs.shape[0]):
        x_rand = rand_vecs[i:i+1]
        y_song = func([x_rand, 0])[0]
        midi.samples_to_midi(y_song[0], write_dir + 'rand' + str(i) + '.mid', 16, 0.25)

def make_rand_songs_normalized(write_dir, rand_vecs):
    x_enc = np.squeeze(enc.predict(y_orig))

    x_mean = np.mean(x_enc, axis=0)
    x_cov = np.cov((x_enc - x_mean).T)
    _, s, v = np.linalg.svd(x_cov)
    e = np.sqrt(s)

    print(f"Means: {x_mean[:6]}")
    print(f"Evals: {e[:6]} ")

    x_vecs = x_mean + np.dot(rand_vecs * e, v)
    make_rand_songs(write_dir, x_vecs)

In [12]:
log_dir = os.path.join(
    "logs",
    datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
)
callbacks = [
    tf.keras.callbacks.TensorBoard(log_dir=log_dir),
]

In [13]:
# def create_model():
#     def rounded_accuracy(y_true, y_pred):
#         return tf.keras.metrics.binary_accuracy(
#             tf.round(y_true),
#             tf.round(y_pred),
#         )
    
#     stacked_encoder = tf.keras.models.Sequential([
#         Input(shape=(16,96,96)),
#         Reshape((16, -1)),
#         TimeDistributed(Dense(2000, activation='relu')),
#         TimeDistributed(Dense(200, activation='relu')),
#         Flatten(),
#         Dense(1600, activation='relu'),
#         Dense(PARAM_SIZE),
#         BatchNormalization(momentum=BN_M, name='pre_encoder')
#     ])
#     stacked_decoder = tf.keras.models.Sequential([
#         Dense(1600, name='encoder'),
#         BatchNormalization(momentum=BN_M),
#         Activation('relu'),
#         Dense(MAX_LENGTH * 200),
#         Reshape((MAX_LENGTH, 200)),
#         TimeDistributed(BatchNormalization(momentum=BN_M)),
#         Activation('relu'),
#         TimeDistributed(Dense(2000)),
#         TimeDistributed(BatchNormalization(momentum=BN_M)),
#         Activation('relu'),
#         TimeDistributed(Dense(96 * 96, activation='sigmoid')),
#         Reshape((16,96,96))
#     ])
#     stacked_ae = tf.keras.models.Sequential([stacked_encoder, stacked_decoder])

#     stacked_ae.compile(
#         loss="binary_crossentropy",
#         optimizer= RMSprop(learning_rate=LR),
#         metrics=[rounded_accuracy],
#     )

#     return stacked_encoder, stacked_decoder, stacked_ae

# stacked_encoder, stacked_decoder, model = create_model()

In [14]:
history = model.fit(
    y_train,
    y_train,
    epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(y_valid, y_valid),
    callbacks=callbacks,
    verbose=2,
)

loss = history.history["loss"][-1]
print(f"Train Loss: {loss}")

write_dir = 'History/'
model.save('History/model.h5')

Train on 125 samples, validate on 113 samples
Epoch 1/250




125/125 - 3s - loss: 0.7160 - val_loss: 0.6530
Epoch 2/250
125/125 - 0s - loss: 0.2119 - val_loss: 0.6078
Epoch 3/250
125/125 - 0s - loss: 0.0696 - val_loss: 0.4969
Epoch 4/250
125/125 - 0s - loss: 0.0715 - val_loss: 0.4553
Epoch 5/250
125/125 - 0s - loss: 0.0478 - val_loss: 0.3783
Epoch 6/250
125/125 - 0s - loss: 0.0350 - val_loss: 0.3641
Epoch 7/250
125/125 - 0s - loss: 0.0304 - val_loss: 0.3242
Epoch 8/250
125/125 - 0s - loss: 0.0278 - val_loss: 0.3126
Epoch 9/250
125/125 - 0s - loss: 0.0259 - val_loss: 0.2616
Epoch 10/250
125/125 - 0s - loss: 0.0245 - val_loss: 0.2552
Epoch 11/250
125/125 - 0s - loss: 0.0234 - val_loss: 0.2043
Epoch 12/250
125/125 - 0s - loss: 0.0224 - val_loss: 0.1974
Epoch 13/250
125/125 - 0s - loss: 0.0217 - val_loss: 0.1612
Epoch 14/250
125/125 - 0s - loss: 0.0210 - val_loss: 0.1476
Epoch 15/250
125/125 - 0s - loss: 0.0204 - val_loss: 0.1255
Epoch 16/250
125/125 - 0s - loss: 0.0200 - val_loss: 0.1122
Epoch 17/250
125/125 - 0s - loss: 0.0195 - val_loss: 0.0957
E

Epoch 138/250
125/125 - 0s - loss: 0.0031 - val_loss: 0.0211
Epoch 139/250
125/125 - 0s - loss: 0.0031 - val_loss: 0.0215
Epoch 140/250
125/125 - 0s - loss: 0.0030 - val_loss: 0.0213
Epoch 141/250
125/125 - 0s - loss: 0.0029 - val_loss: 0.0215
Epoch 142/250
125/125 - 0s - loss: 0.0029 - val_loss: 0.0217
Epoch 143/250
125/125 - 0s - loss: 0.0028 - val_loss: 0.0216
Epoch 144/250
125/125 - 0s - loss: 0.0028 - val_loss: 0.0215
Epoch 145/250
125/125 - 0s - loss: 0.0027 - val_loss: 0.0218
Epoch 146/250
125/125 - 0s - loss: 0.0027 - val_loss: 0.0214
Epoch 147/250
125/125 - 0s - loss: 0.0026 - val_loss: 0.0220
Epoch 148/250
125/125 - 0s - loss: 0.0025 - val_loss: 0.0216
Epoch 149/250
125/125 - 0s - loss: 0.0024 - val_loss: 0.0219
Epoch 150/250
125/125 - 0s - loss: 0.0024 - val_loss: 0.0222
Epoch 151/250
125/125 - 0s - loss: 0.0023 - val_loss: 0.0217
Epoch 152/250
125/125 - 0s - loss: 0.0022 - val_loss: 0.0226
Epoch 153/250
125/125 - 0s - loss: 0.0022 - val_loss: 0.0217
Epoch 154/250
125/125 - 

In [15]:
y_song = model.predict((y_test_song).reshape(1,16,96,96), batch_size=BATCH_SIZE)

midi.samples_to_midi(y_song[0], write_dir + 'test.mid', 16)
make_rand_songs_normalized(write_dir, rand_vecs)

Means: [-0.04197179  0.06426372 -0.1942337  -0.01409939 -0.0045891  -0.03661692]
Evals: [3.03599259 2.57547879 2.35967081 2.1449569  1.99991735 1.93928994] 


In [16]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 45352), started 0:57:32 ago. (Use '!kill 45352' to kill it.)