In [1]:
import datetime
import os
import time
import random

import numpy as np
import tensorflow as tf

from matplotlib import pyplot as plt
from tensorflow.keras import backend as K
from tensorflow.keras.layers import (Activation, BatchNormalization, Dense,
                                     Dropout, Flatten, Input, Reshape,
                                     TimeDistributed)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop

tf.compat.v1.disable_eager_execution()

In [2]:
%load_ext tensorboard

In [5]:
import midi

In [11]:
NUM_EPOCHS = 100
LR = 0.001
WRITE_HISTORY = True
NUM_RAND_SONGS = 10
DO_RATE = 0.1
BN_M = 0.9

BATCH_SIZE = 256
MAX_LENGTH = 16
PARAM_SIZE = 120

np.random.seed(0)
random.seed(0)

In [7]:
y_samples = np.load('samples.npy')
y_lengths = np.load('lengths.npy')
num_songs = y_lengths.shape[0]

y_shape = (num_songs, MAX_LENGTH) + y_samples.shape[1:]
y_orig = np.zeros(y_shape, dtype=y_samples.dtype)

* y_samples = (_, 96, 96)
* y_lengths = (238,)
* y_shape = (238, 16, 96, 96)
* y_origin = Array de zeros (238, 16, 96, 96)

In [8]:
cur_ix = 0
for i in range(num_songs):
    end_ix = cur_ix + y_lengths[i]
    for j in range(MAX_LENGTH):
        k = j % (end_ix - cur_ix) 
        y_orig[i,j] = y_samples[cur_ix + k]
    cur_ix = end_ix

In [9]:
y = np.copy(y_orig)

y_train = y[:125]
y_valid = y[125:]

y_test_song = np.copy(y[0])
midi.samples_to_midi(y_test_song, 'gt.mid', 16)

In [15]:
x_in = Input(shape=y_shape[1:])
x = Reshape((y_shape[1], -1))(x_in)
x = TimeDistributed(Dense(2000, activation='relu'))(x)
x = TimeDistributed(Dense(200, activation='relu'))(x)
x = Flatten()(x)
x = Dense(1600, activation='relu')(x)
x = Dense(PARAM_SIZE)(x)
x = BatchNormalization(momentum=BN_M, name='pre_encoder')(x)

x = Dense(1600, name='encoder')(x)
x = BatchNormalization(momentum=BN_M)(x)
x = Activation('relu')(x)
x = Dropout(DO_RATE)(x)
x = Dense(MAX_LENGTH * 200)(x)
x = Reshape((MAX_LENGTH, 200))(x)
x = TimeDistributed(BatchNormalization(momentum=BN_M))(x)
x = Activation('relu')(x)
x = Dropout(DO_RATE)(x)
x = TimeDistributed(Dense(2000))(x)
x = TimeDistributed(BatchNormalization(momentum=BN_M))(x)
x = Activation('relu')(x)
x = Dropout(DO_RATE)(x)
x = TimeDistributed(Dense(y_shape[2] * y_shape[3], activation='sigmoid'))(x)
x = Reshape((y_shape[1], y_shape[2], y_shape[3]))(x)

model = Model(x_in, x)
model.compile(optimizer=RMSprop(learning_rate=LR), loss='binary_crossentropy')

In [12]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 16, 96, 96)]      0         
_________________________________________________________________
reshape (Reshape)            (None, 16, 9216)          0         
_________________________________________________________________
time_distributed (TimeDistri (None, 16, 2000)          18434000  
_________________________________________________________________
time_distributed_1 (TimeDist (None, 16, 200)           400200    
_________________________________________________________________
dropout (Dropout)            (None, 16, 200)           0         
_________________________________________________________________
flatten (Flatten)            (None, 3200)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1600)              512160

In [16]:
func = K.function([model.get_layer('encoder').input, K.learning_phase()], [model.layers[-1].output])
enc = Model(inputs=model.input, outputs=model.get_layer('pre_encoder').output)

In [17]:
rand_vecs = np.random.normal(0.0, 1.0, (NUM_RAND_SONGS, PARAM_SIZE))
np.save('rand.npy', rand_vecs)

In [18]:
def make_rand_songs(write_dir, rand_vecs):
    for i in range(rand_vecs.shape[0]):
        x_rand = rand_vecs[i:i+1]
        y_song = func([x_rand, 0])[0]
        midi.samples_to_midi(y_song[0], write_dir + 'rand' + str(i) + '.mid', 16, 0.25)

def make_rand_songs_normalized(write_dir, rand_vecs):
    x_enc = np.squeeze(enc.predict(y_orig))

    x_mean = np.mean(x_enc, axis=0)
    x_cov = np.cov((x_enc - x_mean).T)
    _, s, v = np.linalg.svd(x_cov)
    e = np.sqrt(s)

    print(f"Means: {x_mean[:6]}")
    print(f"Evals: {e[:6]} ")

    x_vecs = x_mean + np.dot(rand_vecs * e, v)
    make_rand_songs(write_dir, x_vecs)

In [19]:
log_dir = os.path.join(
    "logs",
    datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
)
callbacks = [
    tf.keras.callbacks.TensorBoard(log_dir=log_dir),
]

In [20]:
# def create_model():
#     def rounded_accuracy(y_true, y_pred):
#         return tf.keras.metrics.binary_accuracy(
#             tf.round(y_true),
#             tf.round(y_pred),
#         )
    
#     stacked_encoder = tf.keras.models.Sequential([
#         Input(shape=(16,96,96)),
#         Reshape((16, -1)),
#         TimeDistributed(Dense(2000, activation='relu')),
#         TimeDistributed(Dense(200, activation='relu')),
#         Flatten(),
#         Dense(1600, activation='relu'),
#         Dense(PARAM_SIZE),
#         BatchNormalization(momentum=BN_M, name='pre_encoder')
#     ])
#     stacked_decoder = tf.keras.models.Sequential([
#         Dense(1600, name='encoder'),
#         BatchNormalization(momentum=BN_M),
#         Activation('relu'),
#         Dense(MAX_LENGTH * 200),
#         Reshape((MAX_LENGTH, 200)),
#         TimeDistributed(BatchNormalization(momentum=BN_M)),
#         Activation('relu'),
#         TimeDistributed(Dense(2000)),
#         TimeDistributed(BatchNormalization(momentum=BN_M)),
#         Activation('relu'),
#         TimeDistributed(Dense(96 * 96, activation='sigmoid')),
#         Reshape((16,96,96))
#     ])
#     stacked_ae = tf.keras.models.Sequential([stacked_encoder, stacked_decoder])

#     stacked_ae.compile(
#         loss="binary_crossentropy",
#         optimizer= RMSprop(learning_rate=LR),
#         metrics=[rounded_accuracy],
#     )

#     return stacked_encoder, stacked_decoder, stacked_ae

# stacked_encoder, stacked_decoder, model = create_model()

In [21]:
history = model.fit(
    y_train,
    y_train,
    epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(y_valid, y_valid),
    callbacks=callbacks,
    verbose=2,
)

loss = history.history["loss"][-1]
print(f"Train Loss: {loss}")

write_dir = 'History/'
model.save('History/model.h5')

Train on 125 samples, validate on 113 samples
Epoch 1/100




125/125 - 2s - loss: 0.7153 - rounded_accuracy: 0.4979 - val_loss: 0.6535 - val_rounded_accuracy: 0.9960
Epoch 2/100
125/125 - 0s - loss: 0.2126 - rounded_accuracy: 0.9966 - val_loss: 0.6061 - val_rounded_accuracy: 0.9959
Epoch 3/100
125/125 - 0s - loss: 0.0698 - rounded_accuracy: 0.9966 - val_loss: 0.4963 - val_rounded_accuracy: 0.9959
Epoch 4/100
125/125 - 0s - loss: 0.0624 - rounded_accuracy: 0.9966 - val_loss: 0.5057 - val_rounded_accuracy: 0.9960
Epoch 5/100
125/125 - 0s - loss: 0.0570 - rounded_accuracy: 0.9966 - val_loss: 0.3432 - val_rounded_accuracy: 0.9960
Epoch 6/100
125/125 - 0s - loss: 0.0346 - rounded_accuracy: 0.9966 - val_loss: 0.3498 - val_rounded_accuracy: 0.9960
Epoch 7/100
125/125 - 0s - loss: 0.0287 - rounded_accuracy: 0.9966 - val_loss: 0.2974 - val_rounded_accuracy: 0.9960
Epoch 8/100
125/125 - 0s - loss: 0.0258 - rounded_accuracy: 0.9966 - val_loss: 0.2836 - val_rounded_accuracy: 0.9960
Epoch 9/100
125/125 - 0s - loss: 0.0241 - rounded_accuracy: 0.9966 - val_los

In [22]:
y_song = model.predict((y_test_song).reshape(1,16,96,96), batch_size=BATCH_SIZE)

midi.samples_to_midi(y_song[0], write_dir + 'test.mid', 16)
make_rand_songs_normalized(write_dir, rand_vecs)

Means: [ 0.01421444  0.01081225 -0.04106295  0.02704114 -0.04220293  0.02202534]
Evals: [0.14756089 0.07668684 0.07389737 0.07114887 0.0695562  0.06809989] 


In [23]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 10056), started 0:25:05 ago. (Use '!kill 10056' to kill it.)