In [59]:
from music21 import converter, stream, note, chord, instrument
from collections import *
from itertools import groupby
import numpy as np

In [2]:
midi = converter.parse('/Users/Haebichan/Desktop/midi/KissTheRain.mid')

In [3]:
parts = instrument.partitionByInstrument(midi)

In [34]:
notes_to_parse = parts.recurse()

In [57]:

min_value = 0.00
lower_first = 0.00

lower_second = 0.5
upper_first = 0.5

upper_second = 1.0
max_value = 1.0

def notes_to_matrix(notes, durations, offsets, min_value=min_value, lower_first=lower_first,
                    lower_second=lower_second,
                    upper_first=upper_first, upper_second=upper_second,
                    max_value=max_value):
    
    # I want to represent my notes in matrix form. X axis will represent time, Y axis will represent pitch values.
    # I should normalize my matrix between 0 and 1.
    # So that I will represent rest with (min_value, lower_first), continuation with [lower_second, upper_first]
    # and first touch with (upper_second, max_value)
    # First touch means that you press the note and it cause to 1 time duration playing. Continuation
    # represent the continuum of this note playing. 
    
    try:
        last_offset = int(offsets[-1]) 
    except IndexError:
        print ('Index Error')
        return (None, None, None)
    
    total_offset_axis = last_offset * 4 + (8 * 4) 
    our_matrix = np.random.uniform(min_value, lower_first, (128, int(total_offset_axis))) 
    # creates matrix and fills with (-1, -0.3), this values will represent the rest.
    
    for (note, duration, offset) in zip(notes, durations, offsets):
        how_many = int(float(duration)/0.25) # indicates time duration for single note.
       
        
        # Define difference between single and double note.
        # I have choose the value for first touch, the another value for contiunation
        # lets make it randomize
        first_touch = np.random.uniform(upper_second, max_value, 1)
        # continuation = np.random.randint(low=-1, high=1) * np.random.uniform(lower_second, upper_first, 1)
        continuation = np.random.uniform(lower_second, upper_first, 1)
        if ('.' not in str(note)): # it is not chord. Single note.
            our_matrix[note, int(offset * 4)] = first_touch
            our_matrix[note, int((offset * 4) + 1) : int((offset * 4) + how_many)] = continuation

        else: # For chord
            chord_notes_str = [note for note in note.split('.')] 
            chord_notes_float = list(map(int, chord_notes_str)) # take notes in chord one by one

            for chord_note_float in chord_notes_float:
                our_matrix[chord_note_float, int(offset * 4)] = first_touch
                our_matrix[chord_note_float, int((offset * 4) + 1) : int((offset * 4) + how_many)] = continuation
                
    return our_matrix

In [40]:
def note_to_int(note): # converts the note's letter to pitch value which is integer form.
    # source: https://musescore.org/en/plugin-development/note-pitch-values
    # idea: https://github.com/bspaans/python-mingus/blob/master/mingus/core/notes.py
    note_base_name = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    if ('#-' in note):
        first_letter = note[0]
        base_value = note_base_name.index(first_letter)
        octave = note[3]
        value = base_value + 12*(int(octave)-(-1))
        
    elif ('#' in note): # not totally sure, source: http://www.pianofinders.com/educational/WhatToCallTheKeys1.htm
        first_letter = note[0]
        base_value = note_base_name.index(first_letter)
        octave = note[2]
        value = base_value + 12*(int(octave)-(-1))
        
    elif ('-' in note): 
        first_letter = note[0]
        base_value = note_base_name.index(first_letter)
        octave = note[2]
        value = base_value + 12*(int(octave)-(-1))
        
    else:
        first_letter = note[0]
        base_val = note_base_name.index(first_letter)
        octave = note[1]
        value = base_val + 12*(int(octave)-(-1))
        
    return value

In [55]:
durations = []
notes = []
offsets = []

for element in notes_to_parse:
    if isinstance(element, note.Note):
        notes.append(note_to_int(str(element.pitch)))
        durations.append(element.duration.quarterLength)
        offsets.append(element.offset)
    if isinstance(element, chord.Chord):
        notes.append('.'.join(str(note_to_int(str(n))) for n in element.pitches))


In [63]:
arr = notes_to_matrix(notes, durations, offsets, min_value=min_value, lower_first=lower_first,
                    lower_second=lower_second,
                    upper_first=upper_first, upper_second=upper_second,
                    max_value=max_value)

In [61]:
notes_to_parse = parts.recurse()


In [62]:
from keras import layers
from keras import models
import keras
from keras.models import Model
import tensorflow as tf
from keras.layers.advanced_activations import *

max_len = 18
midi_shape = (max_len, 128)

input_midi = keras.Input(midi_shape)

x = layers.LSTM(1024, return_sequences=True, unit_forget_bias=True)(input_midi)
x = layers.LeakyReLU()(x)
x = layers.BatchNormalization() (x)
x = layers.Dropout(0.3)(x)

# compute importance for each step
attention = layers.Dense(1, activation='tanh')(x)
attention = layers.Flatten()(attention)
attention = layers.Activation('softmax')(attention)
attention = layers.RepeatVector(1024)(attention)
attention = layers.Permute([2, 1])(attention)

multiplied = layers.Multiply()([x, attention])
sent_representation = layers.Dense(512)(multiplied)


x = layers.Dense(512)(sent_representation)
x = layers.LeakyReLU()(x)
x = layers.BatchNormalization() (x)
x = layers.Dropout(0.22)(x)

x = layers.LSTM(512, return_sequences=True, unit_forget_bias=True)(x)
x = layers.LeakyReLU()(x)
x = layers.BatchNormalization() (x)
x = layers.Dropout(0.22)(x)


# compute importance for each step
attention = layers.Dense(1, activation='tanh')(x)
attention = layers.Flatten()(attention)
attention = layers.Activation('softmax')(attention)
attention = layers.RepeatVector(512)(attention)
attention = layers.Permute([2, 1])(attention)

multiplied = layers.Multiply()([x, attention])
sent_representation = layers.Dense(256)(multiplied)


x = layers.Dense(256)(sent_representation)
x = layers.LeakyReLU()(x)
x = layers.BatchNormalization() (x)
x = layers.Dropout(0.22)(x)


x = layers.LSTM(128, unit_forget_bias=True)(x)
x = layers.LeakyReLU()(x)
x = layers.BatchNormalization() (x)
x = layers.Dropout(0.22)(x)


x = layers.Dense(128, activation='softmax')(x) 

model = Model(input_midi, x)

In [16]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

In [18]:
# SVG(model_to_dot(model).create(format = 'svg'))