In [1]:
from importlib import reload
import pymongo
import gridfs
import numpy as np
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
from scipy.sparse import csc_matrix, csr_matrix
import pickle
import pretty_midi
import sys
import copy
from collections import namedtuple
import timeit
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers

In [36]:
# import modules, including a reload statement so that they can be reimported after a change to the methods 
import src.midi_utils as midi_utils
reload(midi_utils)

import src.data as data
reload(data)

import src.models as models
reload(models)

import src.ml_classes as ml_classes
reload(ml_classes)

import src.exp_utils as exp_utils
reload(exp_utils)

import src.losses as losses
reload(losses)

<module 'src.losses' from '/storage/781-piano-autoencoder/src/losses.py'>

In [45]:
tf.compat.v1.disable_eager_execution()

In [4]:
run = 307
client = pymongo.MongoClient()  # assuming a local MongoDB
fs = gridfs.GridFS(client.sacred)  # assuming database name is 'sacred'
runs = client.sacred.runs
# Now get run from the database
run_entry = runs.find_one({'_id': run})
config = run_entry['config']

In [92]:
# get training data
model_datas_train, seconds = data.folder2examples('training_data/midi_train' + config['data_folder_prefix'], sparse=True,
                                                  use_base_key=config['use_base_key'], beats_per_ex=int(config['seq_length'] / config['sub_beats']),
                                                  nth_file=10, vel_cutoff=config['vel_cutoff'], sub_beats=config['sub_beats'])
model_datas_val, seconds = data.folder2examples('training_data/midi_val' + config['data_folder_prefix'], sparse=True,
                                                use_base_key=config['use_base_key'], beats_per_ex=int(config['seq_length'] / config['sub_beats']),
                                                sub_beats=config['sub_beats'])

100%|██████████| 17/17 [00:07<00:00,  2.23it/s]
  8%|▊         | 1/12 [00:00<00:01,  6.81it/s]

01h 00m 07s of data
created model data H :    (32, 88) data shape,     380 training examples
created model data O :    (32, 88) data shape,     380 training examples
created model data V :    (32, 88) data shape,     380 training examples
created model data R :    (32, 88) data shape,     380 training examples
created model data S :    (32, 2) data shape,     380 training examples
created model data key :    (12,) data shape,     380 training examples
created model data tempo :    (1,) data shape,     380 training examples
created model data V_mean :    (1,) data shape,     380 training examples


100%|██████████| 12/12 [00:04<00:00,  2.79it/s]

00h 38m 48s of data
created model data H :    (32, 88) data shape,     250 training examples
created model data O :    (32, 88) data shape,     250 training examples
created model data V :    (32, 88) data shape,     250 training examples
created model data R :    (32, 88) data shape,     250 training examples
created model data S :    (32, 2) data shape,     250 training examples
created model data key :    (12,) data shape,     250 training examples
created model data tempo :    (1,) data shape,     250 training examples
created model data V_mean :    (1,) data shape,     250 training examples





In [93]:
model_input_reqs, model_output_reqs = models.get_model_reqs(config['model_inputs'], config['model_outputs'])

config['beta_rate'] = 0

# model kwargs - for the encoder/decoder builder functions, make a dictionary to pass as kwargs
model_kwargs = config
# if variational, z will be a list of [[means], [stds]]
build_encoder_graph = models.create_LSTMencoder_graph
z, model_inputs_tf = build_encoder_graph(model_input_reqs, **model_kwargs)

if config['variational']:
    beta_fn = exp_utils.beta_fn2(config['beta_rate'], config['max_beta'])
    loss_for_train, beta_cb = losses.vae_custom_loss(z, beta_fn, free_bits=config['free_bits'], kl_weight=config['kl_weight'])
    sampling_fn = models.sampling(config['batch_size'], epsilon_std=config['epsilon_std'])
    # z_input is the tensor that will be passed into the decoder
    z_input = layers.Lambda(sampling_fn)(z)

else:
    loss_for_train = config['loss']
    z_input = z

if config['hierarchical']:
    build_decoder_graph = models.create_hierarchical_decoder_graph
else:
    build_decoder_graph =models.create_LSTMdecoder_graph_ar

pred, ar_inputs_tf = build_decoder_graph(z_input, model_output_reqs, **model_kwargs)
autoencoder = tf.keras.Model(inputs=model_inputs_tf + ar_inputs_tf, outputs=pred, name=f'autoencoder')
autoencoder.summary()


dg = ml_classes.ModelDataGenerator([md for md in model_datas_train.values()],
                                    [model_in.name for model_in in model_input_reqs],
                                    [model_out.name for model_out in model_output_reqs],
                                    t_force=True, batch_size = config['batch_size'], seq_length=config['seq_length'])

dg_val = ml_classes.ModelDataGenerator([md for md in model_datas_val.values()],
                                    [model_in.name for model_in in model_input_reqs],
                                    [model_out.name for model_out in model_output_reqs],
                                    t_force=True, batch_size = config['batch_size'], seq_length=config['seq_length'])

Model: "autoencoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
V_mean_in (InputLayer)          [(None, 1)]          0                                            
__________________________________________________________________________________________________
H_in (InputLayer)               [(None, 32, 88)]     0                                            
__________________________________________________________________________________________________
repeat32Times (RepeatVector)    (None, 32, 1)        0           V_mean_in[0][0]                  
__________________________________________________________________________________________________
joinModelInput (Concatenate)    (None, 32, 89)       0           H_in[0][0]                       
                                                                 repeat32Times[0][0]    

In [94]:
opt = tf.keras.optimizers.Adam()
autoencoder.compile(optimizer=opt, loss=loss_for_train, metrics=config['metrics'], loss_weights=config['loss_weights'])

In [97]:
autoencoder.evaluate(dg.__getitem__(0)[0], dg.__getitem__(0)[1], batch_size=config['batch_size'])

[15.581132888793945,
 7.763078,
 2.606018,
 0.0146484375,
 7.763078,
 0.011230469,
 2.606018]

In [98]:
# autoencoder.load_weights(f'experiments/run_{run}/{run}_best_train_weights.hdf5', by_name=True)
models.load_weights_safe(autoencoder,f'experiments/run_{run}/{run}_best_val_weights.hdf5',by_name=True)

weights loaded successfully


In [135]:
# much worse AFTER loading weights, for some odd reason... Something is being scrambled somewhere.
autoencoder.evaluate(dg.__getitem__(0)[0], dg.__getitem__(0)[1], batch_size=config['batch_size'])

[49.08544921875,
 24.667273,
 8.139393,
 0.0009765625,
 24.667273,
 0.00048828125,
 8.139393]

In [121]:
random_examples, idx = data.n_rand_examples(model_datas_val, n=64)
random_examples['H_out'] = random_examples['H_ar']
random_examples['V_out'] = random_examples['V_ar']
autoencoder.evaluate(random_examples, random_examples, batch_size=config['batch_size'])