<a href="https://colab.research.google.com/github/Zhachory1/MusicNST/blob/master/TestingMusicVAE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#@title Install Dependencies
print 'Installing dependencies...'
!apt-get update -qq && apt-get install -qq libfluidsynth1 fluid-soundfont-gm build-essential libasound2-dev libjack-dev
!pip install -q pyfluidsynth
!pip install py-midi pretty_midi

Installing dependencies...


In [3]:
#@title Import Data from GitHub 
!cd MusicNST/ && git pull
!git clone https://github.com/Zhachory1/MusicNST.git

Already up to date.
fatal: destination path 'MusicNST' already exists and is not an empty directory.


In [4]:
#@Title Imports
import os
import copy
import fnmatch
import numpy as np
import time
import argparse
import warnings
import midi
import pretty_midi
import pandas as pd
import collections
import random
import tensorflow as tf

from google.colab import files

# Note sequence to piano roll
from magenta.music.pianoroll_lib import PianorollSequence
import magenta.music.sequences_lib as seq_lib
import magenta.music as mm
import magenta.models.music_vae.data as data
from magenta.protobuf import music_pb2

from keras import optimizers, losses
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense, Lambda, LSTM, RepeatVector, Bidirectional
from keras import backend as K                   
from keras.utils import Sequence

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

def play(note_sequence):
  mm.play_sequence(note_sequence, synth=mm.fluidsynth)

def download(note_sequence, filename):
  mm.sequence_proto_to_midi_file(note_sequence, filename)
  files.download(filename)


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



Using TensorFlow backend.


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 3298728482037980355
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 655702652170079239
physical_device_desc: "device: XLA_CPU device"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 13315137474284640180
physical_device_desc: "device: XLA_GPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 12721733632
locality {
  bus_id: 1
  links {
  }
}
incarnation: 7952987640063017278
physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
]


In [0]:
#@Title Helper functions and global variables 

################################################################################
#                                 Constants 
################################################################################
DF_FNAME = 'midi_filename'
DF_FEATURES = 'midi_features'
DF_SPLIT = "split"
PATH_PREFIX = './MusicNST/midi_files/maestro-v1.0.0/'
INDEX_PATH = PATH_PREFIX + "maestro-v1.0.0.csv"
################################################################################
#                    Helper Functions to manipulate input data. 
################################################################################

def new_note_sequence(steps_per_quarter, step_per_split):
  """Helper function to quickly create default NoteSequence proto"""
  qns = music_pb2.NoteSequence()
  qns.quantization_info.steps_per_quarter = steps_per_quarter
  qns.total_quantized_steps = step_per_split
  qns.total_time = 2 * int(step_per_split/steps_per_quarter)
  time_signature = qns.time_signatures.add()
  time_signature.numerator = 4
  time_signature.denominator = 4
  time_signature.time = 0
  tempo = qns.tempos.add()
  tempo.qpm = 120 # quarters per minute
  tempo.time = 0
  return qns
  

def split_note_sequences(note_sequence, bars_per_split=2, steps_per_quarter=4):
  """Splits note_sequences into new note sequence of length bars_per_split
  
  Given a note_sequence, we quantize it to steps_per_quarter notes in a beat. We
  then grab 16*bar_per_split steps into separate note_sequences, assuming that 
  the bars are in 4|4 time. Setting bars_per_split to 2 will return an array of 
  note sequences that are of length 32. 
  
  Args:
    note_sequence:      NoteSequence, input note sequence to split
    bars_per_split:     int, number of bars each output note sequence will have. 
    steps_per_quarter:  int, number of steps in each quarter note.
  Return
    np.array[NoteSequences] - output of note sequences of length 
                              bars_per_split*16
  """
  quantized_seq = mm.quantize_note_sequence(note_sequence, steps_per_quarter)
  # Copy QNS into it's own note_sequences, and clear out its notes. We 
  # are going to iterate over the notes until we hit split boundaries. 
  # Once we hit a boundary, we split any ongoing notes in two, one for
  # the new QNS we have been collecting and one for the new one we will 
  # make. Once the bar is completed, we will createa PianorollSequence
  # from it, and start on the next bar. 
  step_per_split = int(steps_per_quarter*4) * bars_per_split
  split_steps = range(0, quantized_seq.total_quantized_steps, step_per_split)
  current_split = 0
  steps_per_quarter = quantized_seq.quantization_info.steps_per_quarter 
  qns = new_note_sequence(steps_per_quarter, step_per_split)
  pianoroll_seqs = []
  extended_notes = new_note_sequence(steps_per_quarter, step_per_split)
  for note in quantized_seq.notes:
    if note.quantized_start_step >= split_steps[current_split] + step_per_split: 
      # End of split sections. Close up QNS and set up next split.
      pianoroll_seqs.append(copy.deepcopy(qns))
      qns = extended_notes
      extended_notes = new_note_sequence(steps_per_quarter, step_per_split)
      current_split += 1

    if note.quantized_end_step >= split_steps[current_split] + step_per_split:
      # Note extends past this split. Split the note into two notes. Add note
      # from note.start_step to end of current split. Save note from start of 
      # next split to note.end_step
      first_half = qns.notes.add()
      first_half.pitch = note.pitch
      first_half.quantized_start_step = note.quantized_start_step - split_steps[current_split] 
      first_half.quantized_end_step = step_per_split

      second_half = extended_notes.notes.add()
      second_half.pitch = note.pitch
      second_half.quantized_start_step = 0
      second_half.quantized_end_step = note.quantized_end_step - split_steps[current_split] 
    else:
      # This is the normal route, where we just copy the note from the
      # sequence into the new one.
      new_note = qns.notes.add()
      new_note.pitch = note.pitch
      new_note.quantized_start_step = note.quantized_start_step - split_steps[current_split] 
      new_note.quantized_end_step = note.quantized_end_step - split_steps[current_split] 
  pianoroll_seqs.append(copy.deepcopy(qns))

  return pianoroll_seqs

def midi_to_pianoroll(midi_filename, play_track=False):
  """Converts Midi Files into np.arrays and concats them.
  
  The arrays have the following types:
  active, weights, onsets, onset_velocities, active_velocities, offsets, 
  control_changes.
  
  However we are only pulling the note.onsets.
  
  Args:
    midi_filename: string, filename
  Returns: 
    np.array - train tensor (33, 128, 1)
  """
  midi_file = pretty_midi.PrettyMIDI(midi_filename)
  note_seq = mm.midi_to_sequence_proto(midi_file)
  if play_track:
    play(note_seq)
  split_note_seqs = split_note_sequences(note_seq)
  final_array = []
  for note_seq in split_note_seqs:
    # This outputs (33, 128, 1) It's 33 instead of 32 because it adds an end 
    # token at the end of every sequence.
    pnt = seq_lib.sequence_to_pianoroll(
        note_seq, 2, data.MIN_MIDI_PITCH, data.MAX_MIDI_PITCH)
    t_list = [pnt.onsets]
    final_array.append(np.concatenate(t_list, axis=-1))
  return np.array(final_array)

def pianoroll_to_notes(pianoroll, opt_midi_file_name=""):
  """Helper to obtain note_seq."""
  event_list = PianorollSequence(steps_per_quarter=4)
  for i, e in enumerate(pianoroll):
    event_list.append(frozenset(np.where(e)[0]))
  note_seq = event_list.to_sequence()
  if opt_midi_file_name != "":
    download(note_seq, opt_midi_file_name)
  return note_seq

def load_midi_data_from_midi_files(filenames):
  """Loads files.
  Args:
    filenames: list of strings, midi filenames
  Returns: 
    pandas dataframe."""
  tensor_dict = collections.OrderedDict()
  index_array = []
  i = 0
  for mn in filenames:
    tensor_dict[mn]=midi_to_pianoroll(PATH_PREFIX+mn)
    index_array.append(i)
    i=i+1
  return tensor_dict


In [6]:
#@Title Testing reading in files and converting to piano roll
df = pd.read_csv("MusicNST/midi_files/maestro-v1.0.0/maestro-v1.0.0.csv", 
                 usecols=[DF_SPLIT,DF_FNAME])

midi_filename = df.loc[0].midi_filename
print(midi_filename)
midi = load_midi_data_from_midi_files([midi_filename])
midi_data = np.array(midi[midi_filename])
midi_data.shape

2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-split_07-07-17_Piano-e_3-02_wav--3.midi


(115, 33, 128)

In [0]:
class MusicVaeDataGenerator(Sequence):
    """Generates data for Keras from MIDI files"""
    def __init__(self, list_IDs, batch_size=1, dim=(33, 896),
                 shuffle=True, bars_per_split=2):
        """Initialization"""
        self.dim = dim
        self.batch_size = batch_size # Number of FILES to read in at one time
        self.list_IDs = list_IDs
        self.shuffle = shuffle
        self.bars_per_split = bars_per_split
        self.on_epoch_end()

    def __len__(self):
        """Denotes the number of batches per epoch"""
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        """Generate one batch of data"""
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        """Updates indexes after each epoch"""
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        """Generates data containing batch_size samples""" # X : (n_samples, *dim)
        # Initialization
        X = []
        #y = np.empty((self.batch_size), dtype=int)

        # Generate data
        loaded_data = load_midi_data_from_midi_files(list_IDs_temp)
        for i, ID in enumerate(list_IDs_temp):
            # Store sample. We grab many samples from one midi. So we concat 
            # everything in the batch dimension. Even though we have a batch 
            # size of eight, we mean we are reading in 8 files. We split the 
            # midi at 2 bar increments, then each of split split will be apart 
            # of the batch.   
            X.append(loaded_data[ID]) 

            # If we wanted to store a labe, we would do it here.
            #y[i] = self.labels[ID]
        X = np.concatenate(X, axis=0)
        
        
        return X, X

In [0]:
#@Title Make data generators
df = pd.read_csv("MusicNST/midi_files/maestro-v1.0.0/maestro-v1.0.0.csv", 
                 usecols=[DF_SPLIT,DF_FNAME])



# Make test/train/validation split from our csv file will contain our golden
# ie the composer and the midi_filename
train_split = df.loc[df[DF_SPLIT]=="train"]
train_filenames = train_split[DF_FNAME].tolist()
train_generator = MusicVaeDataGenerator(train_filenames)


validation_split = df.loc[df[DF_SPLIT]=="validation"]
validation_filenames = validation_split[DF_FNAME].tolist()
validation_generator = MusicVaeDataGenerator(validation_filenames)

test_split = df.loc[df[DF_SPLIT]=="test"]
test_filenames = test_split[DF_FNAME].tolist()
test_generator = MusicVaeDataGenerator(test_filenames)


In [9]:
#@title Test data generators
batch, _ = train_generator.__getitem__(0)
batch.shape

(125, 33, 128)

In [0]:
#@Title Define the VAE
def Encoder(x, input_dim, hidden_dim, latent_dim, epsilon_std):
    # LSTM encoding
    h = Bidirectional(LSTM(hidden_dim,  return_sequences=True, name=""))(x)
    h = Bidirectional(LSTM(hidden_dim, ))(h)
    

    # VAE Z layer
    z_mean = Dense(latent_dim)(h)
    z_log_sigma = Dense(latent_dim)(h)
    
    def sampling(args):
        z_mean, z_log_sigma = args
        # Note that this batch size DOES NOT correlate with the batch size in 
        # the data generator classes.
        def grab_normal(unused_arg):
          return K.random_normal(shape=(latent_dim,), mean=0., stddev=epsilon_std)
        epsilon = K.map_fn(grab_normal, z_log_sigma, dtype=tf.float32)

        return z_mean + z_log_sigma * epsilon

    # note that "output_shape" isn't necessary with the TensorFlow backend
    # so you could write `Lambda(sampling)([z_mean, z_log_sigma])`
    z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])
    
    return Model(x, z_mean), z_mean, z_log_sigma, z  

def Decoder(z, timesteps, input_dim, hidden_dim, latent_dim):
    # Making reusable layers for botht the generator model and the VAE model
    decoder_h_1 = LSTM(hidden_dim, return_sequences=True)
    decoder_h_2 = LSTM(hidden_dim, return_sequences=True)
    decoder_mean = LSTM(input_dim, return_sequences=True)

    # VAE model
    h_decoded = RepeatVector(timesteps)(z)
    h_decoded = decoder_h_1(h_decoded)
    h_decoded = decoder_h_2(h_decoded)
    x_decoded_mean = decoder_mean(h_decoded)
    
    # generator, from latent space to reconstructed inputs
    decoder_input = Input(shape=(latent_dim,))
    _h_decoded = RepeatVector(timesteps)(decoder_input)
    _h_decoded = decoder_h_1(_h_decoded)
    _h_decoded = decoder_h_2(_h_decoded)
    _x_decoded_mean = decoder_mean(_h_decoded)
    decoder = Model(decoder_input, _x_decoded_mean)
    
    # return generator model and outputs for VAE to use
    return decoder, x_decoded_mean
    
def VAE(input_dim, 
        timesteps,  
        hidden_dim, 
        latent_dim,
        epsilon_std=1.):
    # Create 3 models, encoder, decoder, and the full VAE. They all share the 
    # same weights so I can use them all. 
    
    # Creating input everyone can use.
    x = Input(shape=(timesteps, input_dim,))
    
    # Pull encoder and decoder with variables we need.
    encoder, z_mean, z_log_sigma, z = Encoder(x, input_dim, hidden_dim, latent_dim, epsilon_std)
    decoder, x_decoded_mean = Decoder(z, timesteps, input_dim, hidden_dim, latent_dim)
    
    # Make VAE model
    vae = Model(x, x_decoded_mean)
    
    # Describe the loss function
    def vae_loss(x, x_decoded_mean):
        # Loss between x and reconstructed x.
        xent_loss = losses.binary_crossentropy(x, x_decoded_mean)
        # Divergence between z and a guassian function. 
        kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma))
        loss = xent_loss + 0.1 * kl_loss
        return loss

    vae.compile(optimizer='rmsprop', loss=vae_loss)
  
    return vae, encoder, decoder

In [11]:
model, enc, gen = VAE(input_dim=128, 
        timesteps=33, 
        hidden_dim=64,
        latent_dim=64,
        epsilon_std=1.)

Instructions for updating:
Colocations handled automatically by placer.


In [0]:
# files.upload()
# model.load_weights("model.h5")

In [0]:
#@title Train model on dataset
model.fit_generator(generator=train_generator,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    epochs=10,
                    workers=8)

print model.evaluate_generator(generator=test_generator,
                         use_multiprocessing=True,
                         workers=4)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10

In [0]:
df = pd.read_csv("MusicNST/midi_files/maestro-v1.0.0/maestro-v1.0.0.csv", 
                 usecols=[DF_SPLIT,DF_FNAME])
midi_filename = df.loc[0].midi_filename
midi = load_midi_data_from_midi_files([midi_filename])
midi_data = np.array(midi[midi_filename])
midi_data.shape
pred = model.predict(midi_data)
pred = np.concatenate([pr for pr in pred], axis=0)
threshold = 0.2
new_pianoroll = []
for step in pred:
  if np.where(step >= threshold, 1, 0).sum() > 6:
    top_k_notes = step.argsort()[-6:][::-1]
    new_step = np.zeros_like(step)
    new_step[top_k_notes] = 1
    new_pianoroll.append(new_step)
  else:
    new_pianoroll.append(step)

In [0]:
def softmax(x, axis=-1):
    y = np.exp(x - np.max(x, axis, keepdims=True))
    return y / np.sum(y, axis, keepdims=True)
 
print(new_pianoroll[0])
print(midi_data[0][0])

In [0]:
threshold = 0.2

def encode_midi(midi_file, encoder):
    midi_data = midi_to_pianoroll(midi_file, False)
    return encoder.predict_on_batch(midi_data)

def decode_piano(latent_code, decoder, opt_midi_file_name=""):
    pianoroll = decoder.predict_on_batch(latent_code)
    pianoroll = np.concatenate([pr for pr in pianoroll], axis=0)
    pianoroll[pianoroll < threshold] = 0
    # Either make all or up to 6 notes play at the same step.
    new_pianoroll = []
    for step in pianoroll:
      if np.where(step >= threshold, 1, 0).sum() > 6:
        top_k_notes = step.argsort()[-6:][::-1]
        new_step = np.zeros_like(step)
        new_step[top_k_notes] = 1
        new_pianoroll.append(new_step)
      else:
        new_pianoroll.append(step)
    return pianoroll_to_notes(np.array(pianoroll), opt_midi_file_name)

In [16]:
latent_code = encode_midi(PATH_PREFIX+midi_filename, enc)
print(latent_code.mean(axis=1))
ns_reconstructed = decode_piano(latent_code, gen, '')
# ns_reconstructed
play(ns_reconstructed)

[-0.00223478  0.00074569 -0.00099466 -0.00238259 -0.00146582  0.0012129
  0.000951    0.00149064 -0.00010034 -0.00045232  0.00101645  0.00067532
 -0.00209925 -0.00126786 -0.00231596 -0.00019875 -0.00040246 -0.00092321
  0.00026902  0.00100347 -0.00060387 -0.00133428 -0.00039968 -0.00123715
 -0.00145964 -0.00163855 -0.00284389 -0.00101732  0.00124079  0.00149599
  0.00137052  0.00021626 -0.00070003 -0.00088385  0.00070703 -0.00138334
 -0.00109035  0.00117931  0.0009407   0.00011232 -0.00150819 -0.0025395
 -0.002242   -0.00120201 -0.00123377 -0.00196918 -0.00129294 -0.00135233
  0.00054151 -0.00162069 -0.00114228 -0.00098012 -0.00190167 -0.00127972
 -0.0006112  -0.00180615 -0.00109066 -0.00176481  0.00217141  0.00044889
  0.00121451 -0.00052481 -0.00155938 -0.00133876 -0.00249004 -0.00148853
 -0.00075242  0.00213555  0.00188526 -0.00066146 -0.00013513 -0.00193033
 -0.00265688 -0.00175924 -0.00105177 -0.00157387 -0.00177841 -0.00170079
 -0.00142943  0.00119019  0.00162095 -0.00062805 -0.0

In [0]:
model.save_weights("model_bilstm_weights.h5")
enc.save_weights("encoder_bilstm_weights.h5")
gen.save_weights("decoder_bilstm_weights.h5")

In [0]:
files.download("model_bilstm_weights.h5")
files.download("encoder_bilstm_weights.h5")
files.download("decoder_bilstm_weights.h5")

In [20]:
!ls

decoder_bilstm.h5  encoder_bilstm.h5  model_bilstm.h5  MusicNST  sample_data
