In [189]:
import numpy as np
import pandas as pd
import pretty_midi
import matplotlib.pyplot as plt

from utils import plot_piano_roll

In [190]:
# TODO:
# group according to bar length

In [191]:
# variable declarations

TEMPO = 120.  # 1 sec = 2 beats
SIXTEENTH_NOTE_BEATS = 0.25  # 1 16th note = 0.25 beats
SIXTEENTH_NOTE_LEN = SIXTEENTH_NOTE_BEATS / (TEMPO / 60.)  # 1 16th note = 0.125 sec
ONE_BAR_LEN = SIXTEENTH_NOTE_LEN * 16  # 16 16th notes = 1 bar = 2 sec
FOUR_BAR_LEN = ONE_BAR_LEN * 4  # 16-bar = 32 sec

EVENT_SIZE = 64

In [192]:
# helper function definitions

def midi_notes2notes_df(notes):

    prev_note = notes[0]

    processed_notes = []
    for temp_note in notes:
        pitch = temp_note.pitch
        duration = temp_note.end - temp_note.start
        step = temp_note.start - prev_note.start
        prev_note = temp_note

        processed_notes.append({'pitch':pitch,'duration':duration,'step':step})

    notes_df = pd.DataFrame.from_dict(processed_notes)

    return notes_df


def notesdf2midi_notes(notes_df):

    recovered_midi_notes = []
    current_step = 0.0
    for _,row in notes_df.iterrows():
        note_duration = row['duration']
        current_step = current_step + row['step']
        recovered_midi_notes.append(pretty_midi.Note(velocity=100,pitch=int(row['pitch']),start=current_step,end=current_step+note_duration))
    
    return recovered_midi_notes


# reconstruction of recovered notes
def pred_df2midi_file(notes_df):
    recovered_notes = notesdf2midi_notes(notes_df.iloc[:64])

    pm = pretty_midi.Instrument(program=0,is_drum=False)
    pm.notes = recovered_notes

    recovered_midi_file = pretty_midi.PrettyMIDI(initial_tempo=120.)
    recovered_midi_file.instruments = [pm]
    recovered_midi_file.time_signature_changes = [pretty_midi.TimeSignature(4,4,0.0)]
    recovered_midi_file.write('../data/processed/mini_guitar.mid')

    return

In [193]:
midi_data = pretty_midi.PrettyMIDI(midi_file='../data/midi_dump/turkish.mid')

In [194]:
def midi_data2tensor(midi_data):
    notes = midi_data.instruments[0].notes
    notes_df = midi_notes2notes_df(notes)
    split_indices = np.arange(start=EVENT_SIZE,stop=len(notes_df),step=EVENT_SIZE)
    single_batch_of_events = np.stack(np.split(notes_df.values,split_indices,axis=0)[:-1],axis=0)
    return single_batch_of_events

In [195]:
import glob
midi_list = glob.glob("../data/maestro-v3.0.0/2018/*.midi")

complete_batch_of_events = None

for midi_file in midi_list:
    midi_data = pretty_midi.PrettyMIDI(midi_file)
    single_batch_of_events = midi_data2tensor(midi_data)
    if complete_batch_of_events is None:
        complete_batch_of_events = single_batch_of_events
    else:
        complete_batch_of_events= np.concatenate((complete_batch_of_events,single_batch_of_events),axis=0)

print(complete_batch_of_events.shape)

KeyboardInterrupt: 

In [None]:
import tensorflow as tf
train_ds = tf.data.Dataset.from_tensor_slices(complete_batch_of_events)
train_ds = train_ds.shuffle(1000).batch(6)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)

In [None]:
for temp_ds in train_ds:
    break

In [None]:
# MAKE DIMENSION OF X : 128 + 2

In [230]:
from tensorflow.keras.layers import Bidirectional,LSTM,Dense,LSTMCell
from tensorflow.keras import Input

CONDUCTOR_LEN = 4


class Decoder(tf.keras.Model):
    def __init__(self,latent_dim):
        super().__init__()
        self.latent_dim = latent_dim
        self.conductor_dense = Dense(units=512,activation='tanh')
        self.conductor_lstm_1 = LSTM(units=512,return_sequences=True)
        self.conductor_lstm_2 = LSTM(units=256,return_sequences=True)
        self.bottom_lstm_dense = Dense(units=128,activation='tanh')
        self.bottom_lstm_1 = LSTM(units=128,return_sequences=True)

    def call(self,z,x):
        conductor_rnn_h0 = self.conductor_dense(z)
        batch_size,_ = conductor_rnn_h0.shape
        conductor_input = tf.zeros(shape=(batch_size,CONDUCTOR_LEN,1))
        conductor_output = self.conductor_lstm_1(inputs=conductor_input, initial_state=[conductor_rnn_h0, conductor_rnn_h0])
        conductor_output = self.conductor_lstm_2(conductor_output)
        bottom_input = self.bottom_lstm_dense(conductor_output) 

        total_seq_len = x.shape[1]
        subseq_len = int(total_seq_len/CONDUCTOR_LEN)

        for subsec_idx in range(CONDUCTOR_LEN):
            bottom_rnn_h0 = bottom_input[:,subsec_idx,:]
            subseq_x = x[:,subsec_idx*subseq_len:(subsec_idx+1)*subseq_len,:]
            print(bottom_rnn_h0.shape,subseq_x.shape)

        return bottom_input



In [259]:
# RNN SAMPLING

lstm_cell = LSTMCell(units=36)
dense_last = Dense(units=3)

h0 = tf.random.normal(shape=(temp_ds.shape[0],36))
c0 = tf.random.normal(shape=(temp_ds.shape[0],36))

h_next,c_next,temp_pred,out = None,None,None,None
for j in range(0, temp_ds.shape[1]):
    if j == 0:
        _, (h_n, c_n) = lstm_cell(inputs=tf.zeros_like(temp_ds[:, j, :]), states=[h0,c0])
        h_next,c_next = h_n, c_n
        temp_pred = dense_last(h_next)
        out = tf.expand_dims(temp_pred,axis=1)
    else:
        _, (h_n, c_n) = lstm_cell(inputs=temp_pred, states=[h_next,c_next])
        h_next,c_next = h_n, c_n
        temp_pred = dense_last(h_next)
        out = tf.concat([out,tf.expand_dims(temp_pred,axis=1)],axis=1)



(6, 64, 3)


In [260]:
# TEACHER FORCING

lstm_cell = LSTMCell(units=36)
dense_last = Dense(units=3)

h0 = tf.random.normal(shape=(temp_ds.shape[0],36))
c0 = tf.random.normal(shape=(temp_ds.shape[0],36))

h_next,c_next,temp_pred,out = None,None,None,None
for j in range(0, temp_ds.shape[1]):
    if j == 0:
        _, (h_n, c_n) = lstm_cell(inputs=tf.zeros_like(temp_ds[:, j, :]), states=[h0,c0])
        h_next,c_next = h_n, c_n
        temp_pred = dense_last(h_next)
        out = tf.expand_dims(temp_pred,axis=1)
    else:
        _, (h_n, c_n) = lstm_cell(inputs=temp_ds[:, j, :], states=[h_next,c_next])
        h_next,c_next = h_n, c_n
        temp_pred = dense_last(h_next)
        out = tf.concat([out,tf.expand_dims(temp_pred,axis=1)],axis=1)

out.shape

TensorShape([6, 64, 3])

In [228]:
decoder = Decoder(4)

z = tf.random.normal(shape=(6,256))
decoder(z,temp_ds)

(6, 128) (6, 16, 3)
(6, 128) (6, 16, 3)
(6, 128) (6, 16, 3)
(6, 128) (6, 16, 3)


<tf.Tensor: shape=(6, 4, 128), dtype=float32, numpy=
array([[[-0.07063276,  0.04780326, -0.00672024, ...,  0.0054275 ,
          0.01931448, -0.02480353],
        [-0.13177484,  0.08146067, -0.01586253, ...,  0.02253362,
          0.02968648, -0.02916783],
        [-0.17060663,  0.10083238, -0.02863683, ...,  0.04015026,
          0.03211647, -0.02892247],
        [-0.18978095,  0.10878893, -0.04188544, ...,  0.0552422 ,
          0.03172155, -0.02808377]],

       [[ 0.00500768, -0.01895826,  0.05834664, ..., -0.00886359,
         -0.0007349 , -0.02397226],
        [ 0.00654601, -0.02564476,  0.08427896, ..., -0.00467321,
         -0.00112684, -0.03542342],
        [ 0.00737374, -0.02695352,  0.08764245, ...,  0.00322162,
         -0.00491748, -0.03562484],
        [ 0.00878097, -0.02479966,  0.07945227, ...,  0.01155077,
         -0.0112262 , -0.02956738]],

       [[-0.03163254,  0.04645364, -0.06605332, ..., -0.1165111 ,
          0.01556935, -0.00789317],
        [-0.03304306,  0.

In [186]:
from tensorflow.keras.layers import Bidirectional,LSTM,Dense
from tensorflow.keras import Input

def get_encoder(latent_dim):
    inputs = tf.keras.Input(shape = (EVENT_SIZE,3))
    x = Bidirectional(LSTM(units=1024,return_sequences=True))(inputs)
    x = Bidirectional(LSTM(units=1024,return_sequences=False))(x)
    mu = Dense(units=latent_dim)(x)
    rho = Dense(units=latent_dim)(x)
    Encoder = tf.keras.Model(inputs=inputs,outputs=[mu,rho])
    
    return Encoder

def get_decoder(latent_dim):
    z = tf.keras.Input(shape = (latent_dim,))
    x = tf.keras.layers.Dense(units=120, activation='relu')(z)
    x = tf.keras.layers.Dense(units=500, activation='relu')(x)
    decoded_img = tf.keras.layers.Dense(units=784)(x)
    Decoder = tf.keras.Model(inputs=z,outputs=[decoded_img])
    
    return Decoder

class VAE(tf.keras.Model):
    def __init__(self,latent_dim):
        super().__init__()
        self.latent_dim = latent_dim
        self.encoder_block = get_encoder(latent_dim)
        self.decoder_block = get_decoder(latent_dim)

    def call(self,img):
        z_mu,z_rho = self.encoder_block(img)

        epsilon = tf.random.normal(shape=z_mu.shape,mean=0.0,stddev=1.0)
        z = z_mu + tf.math.softplus(z_rho) * epsilon

        decoded_img = self.decoder_block(z)

        return z_mu,z_rho,decoded_img