In [150]:
import numpy as np
import os,glob
import music21
from music21 import *
from pathlib import Path
environment.set('midiPath', '/usr/bin/musescore3')

In [151]:
# Is GPU active?

#import tensorflow as tf
#tf.test.gpu_device_name()

In [152]:
# To see information about RAM.
'''
!cat /proc/meminfo 
from tensorflow.python.client import device_lib
device_lib.list_local_devices()
'''

'\n!cat /proc/meminfo \nfrom tensorflow.python.client import device_lib\ndevice_lib.list_local_devices()\n'

In [153]:
def note_to_int(note): # converts the note's letter to pitch value which is integer form.
    # source: https://musescore.org/en/plugin-development/note-pitch-values
    # idea: https://github.com/bspaans/python-mingus/blob/master/mingus/core/notes.py
    
    note_base_name = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    if ('#-' in note):
        first_letter = note[0]
        base_value = note_base_name.index(first_letter)
        octave = note[3]
        value = base_value + 12*(int(octave)-(-1))
        
    elif ('#' in note): 
        first_letter = note[0]
        base_value = note_base_name.index(first_letter)
        octave = note[2]
        value = base_value + 12*(int(octave)-(-1))
        
    elif ('-' in note): 
        first_letter = note[0]
        base_value = note_base_name.index(first_letter)
        octave = note[2]
        value = base_value + 12*(int(octave)-(-1))
        
    else:
        first_letter = note[0]
        base_val = note_base_name.index(first_letter)
        octave = note[1]
        value = base_val + 12*(int(octave)-(-1))
        
    return value

min_value = 0.00
lower_first = 0.00
#lower_first = 0.1

lower_second = 0.5
#lower_second = 0.4
upper_first = 0.5
#upper_first = 0.6

upper_second = 1.0
#upper_second = 0.8
max_value = 1.0

def notes_to_matrix(notes, durations, offsets, min_value=min_value, lower_first=lower_first,
                    lower_second=lower_second,
                    upper_first=upper_first, upper_second=upper_second,
                    max_value=max_value):
    
    # I want to represent my notes in matrix form. X axis will represent time, Y axis will represent pitch values.
    # I should normalize my matrix between 0 and 1.
    # So that I will represent rest with (min_value, lower_first), continuation with [lower_second, upper_first]
    # and first touch with (upper_second, max_value)
    # First touch means that you press the note and it cause to 1 time duration playing. Continuation
    # represent the continuum of this note playing. 
    
    try:
        last_offset = int(offsets[-1]) 
    except IndexError:
        print ('Index Error')
        return (None, None, None)
    
    total_offset_axis = last_offset * 4 + (8 * 4) 
    our_matrix = np.random.uniform(min_value, lower_first, (128, int(total_offset_axis))) 
    # creates matrix and fills with (-1, -0.3), this values will represent the rest.
    
    for (note, duration, offset) in zip(notes, durations, offsets):
        how_many = int(float(duration)/0.25) # indicates time duration for single note.
       
        
        # Define difference between single and double note.
        # I have choose the value for first touch, the another value for continuation.
        # Lets make it randomize
        
        # I choose to use uniform distrubition. Maybe, you can use another distrubition like Gaussian.
        # I will try 
        first_touch = np.random.uniform(upper_second, max_value, 1)
        continuation = np.random.uniform(lower_second, upper_first, 1)
        
        if ('.' not in str(note)): # It is not chord. Single note.
            our_matrix[note, int(offset * 4)] = first_touch
            our_matrix[note, int((offset * 4) + 1) : int((offset * 4) + how_many)] = continuation

        else: # For chord
            chord_notes_str = [note for note in note.split('.')] 
            chord_notes_float = list(map(int, chord_notes_str)) # Take notes in chord one by one

            for chord_note_float in chord_notes_float:
                our_matrix[chord_note_float, int(offset * 4)] = first_touch
                our_matrix[chord_note_float, int((offset * 4) + 1) : int((offset * 4) + how_many)] = continuation
                
    return our_matrix

def check_float(duration): # This function fix the issue which comes from some note's duration. 
                           # For instance some note has duration like 14/3 or 7/3. 
    if ('/' in duration):
        numerator = float(duration.split('/')[0])
        denominator = float(duration.split('/')[1])
        duration = str(float(numerator/denominator))
    return duration

def midi_to_matrix(filename, length=250): # Convert midi file to matrix for DL architecture.
    
    midi = converter.parse(filename)
    notes_to_parse = None
    
    try :
        parts = music21.instrument.partitionByInstrument(midi)
    except TypeError:
        print ('Type error.')
        return None
      
    instrument_names = []
    
    try:
        for instrument in parts: # Learn names of instruments.
            name = (str(instrument).split(' ')[-1])[:-1]
            instrument_names.append(name)
    
    except TypeError:
        print ('Type is not iterable.')
        return None
    
    # Just take piano part. For the future works, we can use different instrument.
    try:
        piano_index = instrument_names.index('Piano')
    except ValueError:
        print ('%s have not any Piano part' %(filename))
        return None
    
    
    notes_to_parse = parts.parts[piano_index].recurse()
    
    duration_piano = float(check_float((str(notes_to_parse._getDuration()).split(' ')[-1])[:-1]))

    durations = []
    notes = []
    offsets = []
    
    for element in notes_to_parse:
        if isinstance(element, note.Note): # If it is single note
            notes.append(note_to_int(str(element.pitch))) # Append note's integer value to "notes" list.
            duration = str(element.duration)[27:-1] 
            durations.append(check_float(duration)) 
            offsets.append(element.offset)

        elif isinstance(element, chord.Chord): # If it is chord
            notes.append('.'.join(str(note_to_int(str(n)))
                                  for n in element.pitches))
            duration = str(element.duration)[27:-1]
            durations.append(check_float(duration))
            offsets.append(element.offset)

    
    
    our_matrix = notes_to_matrix(notes, durations, offsets)
    
    try:
        freq, time = our_matrix.shape
    except AttributeError:
        print ("'tuple' object has no attribute 'shape'")
        return None
            
    if (time >= length):
        return (our_matrix[:,:length]) # We have to set all individual note matrix to same shape for Generative DL.
    else:
        print ('%s have not enough duration' %(filename))

def int_to_note(integer):
    # Convert pitch value to the note which is a letter form. 
    note_base_name = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    octave_detector = (integer // 12) 
    base_name_detector = (integer % 12) 
    note = note_base_name[base_name_detector] + str((int(octave_detector))-1)
    if ('-' in note):
      note = note_base_name[base_name_detector] + str(0)
      return note
    return note

# PAY ATTENTION. From matrix form to midi form, I have to indicate first touch, continuation and rest with unique numbers.
# I choose -1.0 for rest , 0 for continuation and 1 for first touch.

lower_bound = (lower_first + lower_second) / 2
upper_bound = (upper_first + upper_second) / 2

def converter_func(arr,first_touch = 1.0, continuation = 0.0, lower_bound = lower_bound, upper_bound = upper_bound):
    # I can write this function thanks to https://stackoverflow.com/questions/16343752/numpy-where-function-multiple-conditions
    # First touch represent start for note, continuation represent continuation for first touch, 0 represent end or rest
    np.place(arr, arr < lower_bound, -1.0)
    np.place(arr, (lower_bound <= arr) & (arr < upper_bound), 0.0)
    np.place(arr, arr >= upper_bound, 1.0)
    return arr

def how_many_repetitive_func(array, from_where=0, continuation=0.0):
    new_array = array[from_where:]
    count_repetitive = 1 
    for i in new_array:
        if (i != continuation):
            return (count_repetitive)
        else:
            count_repetitive += 1
    return (count_repetitive)

def matrix_to_midi(matrix, random=0):
    first_touch = 1.0
    continuation = 0.0
    y_axis, x_axis = matrix.shape
    output_notes = []
    offset = 0
        
    # Delete rows until the row which include 'first_touch'
    how_many_in_start_zeros = 0
    for x_axis_num in range(x_axis):
        one_time_interval = matrix[:,x_axis_num] # Values in a column.
        one_time_interval_norm = converter_func(one_time_interval)
        if (first_touch not in one_time_interval_norm):
            how_many_in_start_zeros += 1
        else:
            break
            
    how_many_in_end_zeros = 0
    for x_axis_num in range(x_axis-1,0,-1):
        one_time_interval = matrix[:,x_axis_num] # values in a column
        one_time_interval_norm = converter_func(one_time_interval)
        if (first_touch not in one_time_interval_norm):
            how_many_in_end_zeros += 1
        else:
            break
        
    print ('How many rows for non-start note at beginning:', how_many_in_start_zeros)
    print ('How many rows for non-start note at end:', how_many_in_end_zeros)

    matrix = matrix[:,how_many_in_start_zeros:]
    y_axis, x_axis = matrix.shape
    print (y_axis, x_axis)

    for y_axis_num in range(y_axis):
        one_freq_interval = matrix[y_axis_num,:] # Values in a row.
        
        one_freq_interval_norm = converter_func(one_freq_interval)
        
        i = 0        
        offset = 0
        
        if (random):
          
          while (i < len(one_freq_interval)):
              how_many_repetitive = 0
              temp_i = i
              if (one_freq_interval_norm[i] == first_touch):
                  how_many_repetitive = how_many_repetitive_func(one_freq_interval_norm, from_where=i+1, continuation=continuation)
                  i += how_many_repetitive 

              if (how_many_repetitive > 0):
                  random_num = np.random.randint(3,6)
                  new_note = note.Note(int_to_note(y_axis_num),duration=duration.Duration(0.25*random_num*how_many_repetitive))
                  new_note.offset = 0.25*temp_i*2
                  new_note.storedInstrument = instrument.Piano()
                  output_notes.append(new_note)
              else:
                  i += 1
        
          
        else:
          
          while (i < len(one_freq_interval)):
              how_many_repetitive = 0
              temp_i = i
              if (one_freq_interval_norm[i] == first_touch):
                  how_many_repetitive = how_many_repetitive_func(one_freq_interval_norm, from_where=i+1, continuation=continuation)
                  i += how_many_repetitive 

              if (how_many_repetitive > 0):
                  new_note = note.Note(int_to_note(y_axis_num),duration=duration.Duration(0.25*how_many_repetitive))
                  new_note.offset = 0.25*temp_i
                  new_note.storedInstrument = instrument.Piano()
                  output_notes.append(new_note)
              else:
                  i += 1
        
    return output_notes

In [154]:
midis_array = './midis_array_schumann.npy'
#midis_array = './midis_array_wikifonia.npy'
#midis_array = './midis_array_wikifonia_200.npy'
midis_array_raw = np.load(midis_array)

midis_array = np.transpose(midis_array_raw, (0, 2, 1)) 
midis_array.shape

midis_array = np.asarray(midis_array)
midis_array = np.reshape(midis_array,(-1,128))

midis_array.shape
print(midis_array.shape)
print(midis_array)

(20500, 128)
[[0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.03303868 0.03303868 0.         ... 0.         0.         0.        ]
 [0.12511323 0.12511323 0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]


In [155]:
max_len = 18 # how many column will take account to predict next column.
step = 1 # step size.

previous_full = []
predicted_full = []

for i in range (0, midis_array.shape[0]-max_len, step):
    prev = midis_array[i:i+max_len,...] # take max_len column.
    pred = midis_array[i+max_len,...] # take (max_len)th column.
    previous_full.append(prev)
    predicted_full.append(pred)


In [156]:
previous_full = np.asarray(previous_full).astype('float64')
predicted_full = np.asarray (predicted_full).astype('float64')

In [157]:
num_of_sample, max_len, freq = previous_full.shape

print (previous_full.shape)
print (predicted_full.shape)

(20482, 18, 128)
(20482, 128)


In [158]:
# Build our Deep Learning Architecture

from keras import layers
from keras import models
import keras
from keras.models import Model
import tensorflow as tf
from keras.layers.advanced_activations import *
config = tf.compat.v1.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5  # 设置为显存使用的比例，例如0.5表示使用一半的显存
session = tf.compat.v1.Session(config=config)

midi_shape = (max_len, 128)

input_midi = keras.Input(midi_shape)

x = layers.LSTM(1024, return_sequences=True, unit_forget_bias=True)(input_midi)
x = layers.LeakyReLU()(x)
x = layers.BatchNormalization() (x)
x = layers.Dropout(0.3)(x)

# compute importance for each step
attention = layers.Dense(1, activation='tanh')(x)
attention = layers.Flatten()(attention)
attention = layers.Activation('softmax')(attention)
attention = layers.RepeatVector(1024)(attention)
attention = layers.Permute([2, 1])(attention)

multiplied = layers.Multiply()([x, attention])
sent_representation = layers.Dense(512)(multiplied)


x = layers.Dense(512, kernel_initializer='he_normal')(sent_representation) #+
x = layers.LeakyReLU()(x)
x = layers.BatchNormalization() (x)
x = layers.Dropout(0.22)(x)

x = layers.LSTM(512, return_sequences=True, unit_forget_bias=True)(x)
x = layers.LeakyReLU()(x)
x = layers.BatchNormalization() (x)
x = layers.Dropout(0.22)(x)


# compute importance for each step
attention = layers.Dense(1, activation='tanh')(x)
attention = layers.Flatten()(attention)
attention = layers.Activation('softmax')(attention)
attention = layers.RepeatVector(512)(attention)
attention = layers.Permute([2, 1])(attention)

multiplied = layers.Multiply()([x, attention])
sent_representation = layers.Dense(256)(multiplied)


x = layers.Dense(256)(sent_representation)
x = layers.LeakyReLU()(x)
x = layers.BatchNormalization() (x)
x = layers.Dropout(0.22)(x)


x = layers.LSTM(128, unit_forget_bias=True)(x)
x = layers.LeakyReLU()(x)
x = layers.BatchNormalization() (x)
x = layers.Dropout(0.22)(x)


x = layers.Dense(128, activation='softmax')(x) 

model = Model(input_midi, x)


2023-08-07 05:55:50.698399: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-08-07 05:55:50.698675: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-08-07 05:55:50.698848: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-08-07 05:55:50.699062: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-08-07 05:55:50.699237: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from S

In [159]:
model.summary()

Model: "model_19"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_10 (InputLayer)          [(None, 18, 128)]    0           []                               
                                                                                                  
 lstm_27 (LSTM)                 (None, 18, 1024)     4722688     ['input_10[0][0]']               
                                                                                                  
 leaky_re_lu_45 (LeakyReLU)     (None, 18, 1024)     0           ['lstm_27[0][0]']                
                                                                                                  
 batch_normalization_45 (BatchN  (None, 18, 1024)    4096        ['leaky_re_lu_45[0][0]']         
 ormalization)                                                                             

In [160]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    
    num_of_top = 15
    num_of_first = np.random.randint(1,3)

    
    preds [0:48] = 0 # eliminate notes with low octaves
    preds [100:] = 0 # eliminate notes with very high octaves
    
    ind = np.argpartition(preds, -1*num_of_top)[-1*num_of_top:]
    top_indices_sorted = ind[np.argsort(preds[ind])]
    
    
    array = np.random.uniform(0.0, 0.0, (128)) 
    array[top_indices_sorted[0:num_of_first]] = 1.0
    array[top_indices_sorted[num_of_first:num_of_first+3]] = 0.5

    return array

In [161]:
previous_full.shape

(20482, 18, 128)

In [162]:
'''
import random
import sys

epoch_total = 801
batch_size = 2

for epoch in range(1, epoch_total): # Train model with epoch_total 
    print('Epoch:', epoch)
    model.fit(previous_full, predicted_full, batch_size=batch_size, epochs=1,
              shuffle=True) # Fit model for 1 iteration.
    

    start_index = random.randint(0, len(midis_array)- max_len - 1)
    
    generated_midi = midis_array[start_index: start_index + max_len]
        
    if ((epoch%10) == 0):
      model.save_weights('my_model_weights.h5')

      for temperature in [1.2]:#1.2
          print('------ temperature:', temperature)

          for i in range(480):
              samples = generated_midi[i:]
              expanded_samples = np.expand_dims(samples, axis=0)
              preds = model.predict(expanded_samples, verbose=0)[0]
              preds = np.asarray(preds).astype('float64')

              next_array = sample(preds, temperature)
              
              midi_list = []
              midi_list.append(generated_midi)
              midi_list.append(next_array)
              generated_midi = np.vstack(midi_list)
              

          generated_midi_final = np.transpose(generated_midi,(1,0))
          output_notes = matrix_to_midi(generated_midi_final, random=0)
          midi_stream = stream.Stream(output_notes)
          midi_file_name = ('lstm_out_{}_{}.mid'.format(epoch, temperature))
          midi_stream.write('midi', fp=midi_file_name)
          parsed = converter.parse(midi_file_name)
          for part in parsed.parts:
              part.insert(0, instrument.Piano())
          parsed.write('midi', fp=midi_file_name)
'''

"\nimport random\nimport sys\n\nepoch_total = 801\nbatch_size = 2\n\nfor epoch in range(1, epoch_total): # Train model with epoch_total \n    print('Epoch:', epoch)\n    model.fit(previous_full, predicted_full, batch_size=batch_size, epochs=1,\n              shuffle=True) # Fit model for 1 iteration.\n    \n\n    start_index = random.randint(0, len(midis_array)- max_len - 1)\n    \n    generated_midi = midis_array[start_index: start_index + max_len]\n        \n    if ((epoch%10) == 0):\n      model.save_weights('my_model_weights.h5')\n\n      for temperature in [1.2]:#1.2\n          print('------ temperature:', temperature)\n\n          for i in range(480):\n              samples = generated_midi[i:]\n              expanded_samples = np.expand_dims(samples, axis=0)\n              preds = model.predict(expanded_samples, verbose=0)[0]\n              preds = np.asarray(preds).astype('float64')\n\n              next_array = sample(preds, temperature)\n              \n              midi_lis

In [163]:
for layer in model.layers:
    lstm_weights = layer.get_weights() # list of numpy arrays

print ('Lstm weights:', lstm_weights)


Lstm weights: [array([[-0.12224296,  0.00849919, -0.02755524, ..., -0.11062723,
        -0.13359016, -0.09257195],
       [ 0.00830756,  0.0106108 ,  0.0541364 , ...,  0.03149183,
        -0.12957801,  0.12617569],
       [ 0.07676673,  0.02992907, -0.11204779, ...,  0.08123395,
         0.03007442,  0.08350554],
       ...,
       [-0.06237789, -0.1416943 , -0.06299905, ...,  0.08587269,
        -0.13450405,  0.06901221],
       [ 0.00682142,  0.06599204, -0.11658302, ..., -0.0206587 ,
         0.07242097,  0.10865031],
       [-0.03401373,  0.02169538,  0.02500044, ..., -0.1351209 ,
         0.01143976, -0.06529003]], dtype=float32), array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0

In [164]:
import random
from keras.models import load_model
import numpy as np


model = Model(input_midi, x)

model.load_weights('./training_midi/schumann/GCN50_LSTM250/my_model_weights.h5')




start_index = random.randint(0, len(midis_array)- max_len - 1)
    
generated_midi = midis_array[start_index: start_index + max_len]

In [165]:
for layer in model.layers:
    lstm_weights = layer.get_weights() # list of numpy arrays

print ('Lstm weights:', lstm_weights)


Lstm weights: [array([[ 0.09898354, -0.04661011,  0.04526471, ..., -0.14553256,
        -0.14505753, -0.11100412],
       [ 0.10369075, -0.01685387,  0.11084764, ..., -0.1113098 ,
         0.10703409,  0.07558455],
       [-0.12649146,  0.05169357,  0.04687911, ...,  0.09698477,
         0.07183761,  0.13718681],
       ...,
       [ 0.08167681, -0.06389368,  0.05841148, ..., -0.03620089,
         0.06275338, -0.05363113],
       [ 0.14019316,  0.14958239, -0.05251255, ..., -0.00748959,
         0.0961367 ,  0.03582837],
       [-0.05867726,  0.14919503, -0.08831827, ..., -0.03645581,
         0.08930548, -0.09888922]], dtype=float32), array([-0.10350482, -0.04472326, -0.05848197, -0.09472198, -0.08195392,
       -0.07310443, -0.08567924, -0.08869315, -0.09576766, -0.10026473,
       -0.07457457, -0.10404094, -0.07839239, -0.06249927, -0.10060263,
       -0.0959958 , -0.11075784, -0.10807294, -0.11680683, -0.12499435,
       -0.12246582, -0.11038589, -0.11752604, -0.11721894, -0.062424

In [166]:
for temperature in [0.7, 2.7]:
        print('------ temperature:', temperature)
        generated_midi = midis_array[start_index: start_index + max_len]
        for i in range(680):
            samples = generated_midi[i:]
            expanded_samples = np.expand_dims(samples, axis=0)
            preds = model.predict(expanded_samples, verbose=0)[0]
            preds = np.asarray(preds).astype('float64')

            next_array = sample(preds, temperature)
           
            midi_list = []
            midi_list.append(generated_midi)
            midi_list.append(next_array)
            generated_midi = np.vstack(midi_list)
            

        generated_midi_final = np.transpose(generated_midi,(1,0))
        output_notes = matrix_to_midi(generated_midi_final, random=1)
        midi_stream = stream.Stream(output_notes)
        midi_file_name = ('lstm_out_{}.mid'.format(temperature))
        midi_stream.write('midi', fp=midi_file_name)
        parsed = converter.parse(midi_file_name)
        for part in parsed.parts:
            part.insert(0, instrument.Piano())
        parsed.write('midi', fp=midi_file_name)

        

------ temperature: 0.7
How many rows for non-start note at beginning: 18
How many rows for non-start note at end: 0
128 680
------ temperature: 2.7
How many rows for non-start note at beginning: 18
How many rows for non-start note at end: 0
128 680
