In [1]:
import os
from tqdm import tqdm
import mido
import numpy as np
import sys
import time
from music21 import *
from keras.layers import Input, Dense, LSTM, Embedding, Dropout
from keras import Model
from pathlib import Path

from tensorflow.nn import ctc_beam_search_decoder

port = mido.open_output()

Using TensorFlow backend.


<center>Import useful Functions</center>

In [69]:
def decode_categorical(music):
    notes, durations = [], []
    for i in music:
        raw = categorical_to_notes[i]
        note, duration = raw.split("|")
        notes.append(int(note))
        durations.append(float(duration))
    return notes, durations

def print_progress(c, msg = 'Note: '):
    text = '\r' + msg + " %i"
    sys.stdout.write(text % c)
    sys.stdout.flush()

def play_song(music, start):
    notes_music, dur_music = decode_categorical(music)
    ticksPerQuarter = tpq[start]
    t = tempos[start]
    counter = 0
    start_time = time.time()
    defaults.ticksPerQuarter = ticksPerQuarter
    for n, d in zip(notes_music, dur_music):
        ticks = midi.translate.durationToMidi(duration.Duration(quarterLength = d))
        real_time = mido.tick2second(ticks, ticksPerQuarter , t)
        time.sleep(real_time)
        msg = mido.Message(type = 'note_on', note = n, velocity = 127)
        port.send(msg)
        counter += 1
        print_progress(counter)

def create_midi_file(music, start, filename):
    notes_music, dur_music = decode_categorical(music)
    mid = mido.MidiFile()
    track = mido.MidiTrack()
    mid.tracks.append(track)
    track.append(mido.MetaMessage('set_tempo', tempo = tempos[start]))
    mid.ticks_per_beat = tpq[start]
    for n, d in zip(notes_music, dur_music):
        ticks = midi.translate.durationToMidi(duration.Duration(quarterLength = d))
        msg = mido.Message(type = 'note_on', note = n, velocity = 127, time = ticks)
        track.append(msg)
    mid.save(filename)
    return filename

In [50]:
notes = np.load('../datasets/Piano/notesDurations.npy')
tempos = np.load('../datasets/Piano/tempos.npy')
tpq = np.load('../datasets/Piano/tpq.npy')

In [38]:
notes = np.load('../datasets/Guitar/notes.npy')[:1500000]
tempos = np.load('../datasets/Guitar/tempos.npy')[:1500000]
tpq = np.load('../datasets/Guitar/tpq.npy')[:1500000]

In [7]:
notes = np.load('../datasets/Multiple/notes.npy')
tempos = np.load('../datasets/Multiple/tempos.npy')
tpq = np.load('../datasets/Multiple/tpq.npy')

In [51]:
unique = np.unique(notes)
notes_to_categorical = {o:(i + 1) for i, o in enumerate(unique)}
categorical_to_notes = {(i + 1):o for i, o in enumerate(unique)}
cat_notes = [notes_to_categorical[i] for i in notes]

In [52]:
notes[:10]
play_song(cat_notes[:40], 0)

Note:  40

In [53]:
def get_train_size(inp_size, batch_size):
    for i in range (inp_size, 0, -1):
        if i % batch_size == 0:
            return i
        
def create_dataset(notes, seq_length_in, seq_length_out, batch_size):
    size = get_train_size(len(notes) - seq_length_in - seq_length_out, batch_size)
    inp, outp, targ = [], [], []
    for i in tqdm(range (size)):
        inp.append(notes[i: i + seq_length_in])
        outp.append([0] + notes[i + seq_length_in: i + seq_length_in + seq_length_out - 1])
        targ.append(notes[i + seq_length_in: i + seq_length_in + seq_length_out])
    inp = np.array(inp)
    outp = np.array(outp)
    targ = np.array(targ).reshape((-1, seq_length_out, 1))
    return inp, outp, targ

<center> Define Problem Hyperparameters</center>

In [54]:
batch_size =  64
lstm_cells = 256*2
seq_length_in = 30
seq_length_out = 20
emb_size = 50
vocab_size = max(cat_notes) + 1
print (vocab_size)

2060


In [55]:
inp, outp, targ = create_dataset(cat_notes, seq_length_in, seq_length_out, batch_size)

100%|████████████████████████████████████████████████████████████████████| 1422592/1422592 [00:09<00:00, 143818.38it/s]


<center>Create Autoencoder</center>

In [56]:
def create_model(vocab_size, batch_size, lstm_cells = lstm_cells, emb_size = 50, dropout = 0.4):
    #define training encoder
    encoder_inputs = Input(batch_shape = (batch_size, None), name = 'encoder_inputs')
    encoder_emb = Embedding(input_dim = vocab_size, output_dim= emb_size, name = 'encoder_emb')
    enc_emb_inp = encoder_emb(encoder_inputs)
    encoder_lstm = LSTM(lstm_cells, return_state=True, name = 'encoder_lstm')
    encoder_outputs, state_h, state_c = encoder_lstm(enc_emb_inp)
    drp1 = Dropout(dropout/2)
    state_h = drp1(state_h)
    state_c = drp1(state_c)
    encoder_states = [state_h, state_c]
    #define training decoder
    decoder_inputs = Input(batch_shape = (batch_size, None), name = 'decoder_inputs')
    decoder_emb = Embedding(input_dim = vocab_size, output_dim= emb_size, name = 'decoder_emb')
    dec_emb_inp = decoder_emb(decoder_inputs)
    decoder_lstm = LSTM(lstm_cells, return_sequences=True, return_state=True, name = 'decoder_lstm')
    decoder_outputs, _, _ = decoder_lstm(dec_emb_inp, initial_state=encoder_states)
    drp2 = Dropout(dropout)
    secoder_outputs = drp2(decoder_outputs)
    decoder_dense = Dense(vocab_size, activation='softmax', name = 'decoder_dense')
    decoder_outputs = decoder_dense(decoder_outputs)
     
    autoencoder = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    #define models for prediction    
    decoder_state_input_h = Input(shape=(lstm_cells,))
    decoder_state_input_c = Input(shape=(lstm_cells,))
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    decoder_outputs, state_h, state_c = decoder_lstm(dec_emb_inp, initial_state=decoder_states_inputs)
    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
    encoder = Model(encoder_inputs, encoder_states)

    return encoder, decoder, autoencoder

In [14]:
encoder, decoder, autoencoder = create_model(vocab_size, batch_size)

In [15]:
decoder.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
decoder_inputs (InputLayer)     (64, None)           0                                            
__________________________________________________________________________________________________
decoder_emb (Embedding)         (64, None, 50)       161800      decoder_inputs[0][0]             
__________________________________________________________________________________________________
input_1 (InputLayer)            (None, 512)          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 512)          0                                            
__________________________________________________________________________________________________
decoder_ls

In [16]:
autoencoder.compile('adam', loss = 'sparse_categorical_crossentropy', metrics = ['acc'])
autoencoder.fit([inp, outp], targ, batch_size= batch_size, epochs = 2)

KeyboardInterrupt: 

<center>Evaluate Model</center>

In [57]:
# generate target given source sequence
def predict_sequence(infenc, infdec, source, n_steps):
    # encode
    infenc.reset_states()
    infdec.reset_states()
    state = infenc.predict(source)
    # start of sequence input
    target_seq = np.array([0]).reshape(1, 1)
    # collect predictions
    output = list(np.reshape(source, (-1,)))
    for t in tqdm(range(n_steps)):
        # predict next char
        yhat, h, c = infdec.predict([target_seq] + state)
        # store prediction
        output.append(np.argmax(yhat))
        # update state
        state = [h, c]
        # update target sequence
        target_seq = np.argmax(yhat).reshape(1,1)
    return np.array(output)

In [58]:
infenc, infdec, autoencoder = create_model(vocab_size, 1, lstm_cells= lstm_cells, dropout=0.0)
autoencoder.load_weights('Weights/Auto_Piano_512_225_0.5270.hdf5')
error = 0.5270

In [83]:
start = np.random.randint(0, len(inp))
start = 1261287
source = inp[start].reshape(1, seq_length_in)
gen_music = predict_sequence(infenc, infdec, source, 800)

100%|███████████████████████████████████████████████████████████████████████████████| 800/800 [00:04<00:00, 190.49it/s]


In [84]:
# print (start)
# play_song(gen_music, start)
filename = 'Piano Samples/samples_512/seq_' + str(lstm_cells) + "_" + str(start) +"_" + str(error) +'.mid'
create_midi_file(gen_music, start, filename)

'Piano Samples/samples_512/seq_512_1261287_0.527.mid'