# LTSM Model Training

In [1]:
import numpy
from music21 import note, chord
import pickle
import os
import numpy as np
import glob
from music21 import corpus, converter

from keras.layers import LSTM, Flatten,Input, RepeatVector, Dropout,Permute, Dense, Activation, TimeDistributed, Embedding, Concatenate, Reshape
from keras.layers import Multiply, Lambda, Softmax
import keras.backend as K 
from keras.models import Model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import plot_model
from keras.optimizers import RMSprop
from keras.utils import np_utils
from models.LTSM import element_list, dicts, create_pitch_sequence, get_tracks, new_nn

Using TensorFlow backend.


# Parameters Initialization

In [2]:
parent_folder = 'New_Music'    #Set up the data folders
run_num = '0001'
track_name = 'Tracks'

#folder_in = 'run/{}/'.format(parent_folder)
folder_in = 'run/{}/'.format(parent_folder)+'_'.join(['0001', track_name])


storage = os.path.join(folder_in, 'data_storage')
training_data_folder = os.path.join('data', track_name)

#Set up the folders to store the vizualizations,data,weights and the output
if not os.path.exists(folder_in):
    os.mkdir(folder_in)
    os.mkdir(os.path.join(folder_in, 'vizualizations'))
    os.mkdir(os.path.join(folder_in, 'data_storage'))
    os.mkdir(os.path.join(folder_in, 'weights_record'))
    os.mkdir(os.path.join(folder_in, 'output_tracks'))
    

mode = 'load' #If the mode== "load", then let the model starts to derive the notes

sequence_length = 32   #The input data is divided into 32 pitches 
embedding_size = 100 
recurr_neural_network_units = 256  #Recurrent Neural Networks
attention_usage = True  #When the attention mechanism needed to be implemented.

## Parse the musical tracks

In [3]:
if mode != 'load':              #When there is no new data to be loaded, work with the already existing pitches and time steps.
    with open(os.path.join(storage, 'pitches'), 'rb') as fi:
        pitches = pickle.load(fi)
    with open(os.path.join(storage, 'time_steps'), 'rb') as fi:
        time_steps = pickle.load(fi) 
    
    
else:
   
    
    tracks = glob.glob(os.path.join(training_data_folder, "*.mid"))  #Extract the musical tracks from the training data folder and parse them,
    parser=converter
    print(parser)
    print("Total number of files:",len(tracks)) #Total number of musical tracks.

    pitches = []
    time_steps = []

    for i, track in enumerate(tracks):    #Parse each track and then derive the pitches and timesteps from them.
        print(i+1, "Parsing %s" % track)
        first_score = parser.parse(track).chordify()
        

        for j in range(1):

            score = first_score.transpose(j)

            #pitches.extend(['START'] * sequence_length)
            #time_steps.extend([0]* sequence_length)
            total_pitch= len(['START'] *sequence_length)    #Collect all the pitches and the timesteps together under 2 variables.
            for k in range(sequence_length):
                pitches.append('START')
                
            total_time_steps=len( [0]* sequence_length)
            for k in range(sequence_length):
                time_steps.append(0)
            
            #Collect all the pitches along with their octave so that the unique arrangement of the pitches were even considered. 
            

            for element in score.flat.notes:
                
                if isinstance(element, note.Note):
                    (pitches.append(str(element.name)),time_steps.append(element.duration.quarterLength)) if element.isRest else (pitches.append(str(element.nameWithOctave)),time_steps.append(element.duration.quarterLength))
                     #if element.isRest:
                      #  pitches.append(str(element.name))
                       # time_steps.append(element.duration.quarterLength)
                     #else:
                      #  pitches.append(str(element.nameWithOctave))
                       # time_steps.append(element.duration.quarterLength)

                if isinstance(element, chord.Chord):
                    pitches.append('.'.join(n.nameWithOctave for n in element.pitches))
                    time_steps.append(element.duration.quarterLength)

    with open(os.path.join(storage, 'pitches'), 'wb') as f:
        pickle.dump(pitches, f)
    with open(os.path.join(storage, 'time_steps'), 'wb') as f:
        pickle.dump(time_steps, f) 
    parent = []
    child = []
    for nt in score.flat.notes:        
        if isinstance(nt, note.Note):
            child.append(max(0.0, nt.pitch.ps))
            parent.append(nt)
        elif isinstance(nt, chord.Chord):
            for pitch in nt.pitches:
                child.append(max(0.0, pitch.ps))
                parent.append(nt)
    print(parent)
    print(len(pitches))

<module 'music21.converter' from 'C:\\Users\\Arjun\\anaconda3\\envs\\generative\\lib\\site-packages\\music21\\converter\\__init__.py'>
Total number of files: 7
1 Parsing data\Tracks\BEAT1R-Copy1.mid
2 Parsing data\Tracks\cs1-6gig-Copy1.mid
3 Parsing data\Tracks\cs2-3cou-Copy1.mid
4 Parsing data\Tracks\cs4-3cou-Copy1.mid
5 Parsing data\Tracks\cs5-2all-Copy1.mid
6 Parsing data\Tracks\cs6-2all-Copy1.mid
7 Parsing data\Tracks\Sample input-Copy1.mid
[<music21.chord.Chord B3>, <music21.chord.Chord G2 D3 B3>, <music21.chord.Chord G2 D3 B3>, <music21.chord.Chord G2 D3 B3>, <music21.chord.Chord B3>, <music21.chord.Chord A3>, <music21.chord.Chord G3>, <music21.chord.Chord F#3>, <music21.chord.Chord G3>, <music21.chord.Chord D3>, <music21.chord.Chord E3>, <music21.chord.Chord F#3>, <music21.chord.Chord G3>, <music21.chord.Chord A3>, <music21.chord.Chord B3>, <music21.chord.Chord C4>, <music21.chord.Chord D4>, <music21.chord.Chord B3>, <music21.chord.Chord G3>, <music21.chord.Chord F#3>, <music21.

## Create files to store the unique picthes and time steps

In [4]:
# Derive the unique sets of pitches and timesteps
#pitch, pitch_num = element_list(pitches)
#time_step, timestep_num = element_list(time_steps)
pitch = sorted(set(pitches))
time_step = sorted(set(time_steps))
timestep_num = len(time_step)
pitch_num = len(pitch)
#pitch, pitch_num = element_list(pitches)
#time_step, timestep_num = element_list(time_steps)
diff_ones = [pitch, pitch_num, time_step, timestep_num]

with open(os.path.join(storage, 'diff_ones'), 'wb') as f:    #Put the pitches, timesteps and their conversions to numerical in a single file "diff_ones".
    pickle.dump(diff_ones, f)

#create dictionaries for pitches and time steps
timestep_numerical, numerical_timestep = dicts(time_step)
pitch_numerical, numerical_pitch = dicts(pitch)
all_combined = [pitch_numerical, numerical_pitch, timestep_numerical, numerical_timestep]

with open(os.path.join(storage, 'all_combined'), 'wb') as f:
    pickle.dump(all_combined, f)

In [5]:
print('\nPitches: Num')  #Integer coding for pitches
len(pitch_numerical)



Pitches: Num


233

In [6]:
print('\nTimesteps:Num')   #Integer coding for time steps
timestep_numerical


Timesteps:Num


{0: 0,
 Fraction(1, 12): 1,
 Fraction(1, 6): 2,
 0.25: 3,
 Fraction(1, 3): 4,
 Fraction(5, 12): 5,
 0.5: 6,
 Fraction(7, 12): 7,
 Fraction(2, 3): 8,
 0.75: 9,
 1.0: 10,
 1.25: 11,
 1.5: 12,
 2.0: 13}

## Pitch and timestep sequences 

In [7]:

pitch_numerical, numerical_pitch, timestep_numerical, numerical_timestep = all_combined  
pitch, pitch_num, time_step, timestep_num = diff_ones
pitch_input=[]
pitch_output=[]
timestep_input=[]
timestep_output=[]
dummy=1
for i in range(len(pitches) - sequence_length):   #Divide the whole bundle into parts of 32 pitches and timesteps and then process.
    
    pitch_in_sequences = pitches[i:i + (sequence_length*dummy)]
    pitch_out_sequences = pitches[i + (sequence_length*dummy)]
       
    pitch_input.append([pitch_numerical[char] for char in pitch_in_sequences])
    pitch_output.append(pitch_numerical[pitch_out_sequences])
  
    timestep_in_sequence = time_steps[i:i + (sequence_length*dummy)]
    timestep_out_sequence = time_steps[i + (sequence_length*dummy)]
    
    timestep_input.append([timestep_numerical[char] for char in timestep_in_sequence])
    timestep_output.append(timestep_numerical[timestep_out_sequence])
arrangement = len(pitch_input)  #The amount of variation needed in the sampling.

pitch_input = np.reshape(pitch_input, (arrangement, sequence_length))
pitch_output = np_utils.to_categorical(pitch_output, num_classes=pitch_num)

timestep_input = np.reshape(timestep_input, (arrangement, sequence_length))
timestep_output = np_utils.to_categorical(timestep_output, num_classes=timestep_num)

ltsm_ip = [pitch_input, timestep_input]
ltsm_op = [pitch_output, timestep_output]


In [8]:
print('pitch input',ltsm_ip[0][0])
print(len(ltsm_ip[0][0]))
print('duration input',ltsm_ip[1][0])
print('pitch output',ltsm_op[0][0])
print('duration output',ltsm_op[1][0])


pitch input [232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232
 232 232 232 232 232 232 232 232 232 232 232 232 232 232]
32
duration input [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
pitch output [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
duration output [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 

## RNN with attention mechanism

In [9]:
#model, model_with_attention = new_nn(pitch_num, timestep_num, embedding_size, recurr_neural_network_units, attention_usage)

attention_usage=True
pitch_in = Input(shape = (None,))   #Previoous pitch and timesteps sequence
timestep_in = Input(shape = (None,))

pitch_layer = Embedding(pitch_num, embedding_size)(pitch_in)    #Embedding layers to convert the values into vectors.
time_step_layer = Embedding(timestep_num, embedding_size)(timestep_in) 

nn_layers = Concatenate()([pitch_layer,time_step_layer])  #Concantenating both the inputs

nn_layers = LSTM(recurr_neural_network_units, return_sequences=True)(nn_layers)  #LSTM layers
#nn_layers = Dropout(0.3)(nn_layers)

if attention_usage==False:
    final_layer = LSTM(recurr_neural_network_units)(nn_layers)

    
else:
    
    #final_layer = LSTM(recurr_neural_network_units)(nn_layers)
    nn_layers = LSTM(recurr_neural_network_units, return_sequences=True)(nn_layers)
 
    dense_layer = Dense(1, activation='tanh')(nn_layers)  #Dense layer with tanh activation function and 1 output unit.
    dense_layer = Reshape([-1])(dense_layer)
    act_func = Activation('softmax')(dense_layer)   #Weights are computed

    act_func_2 = Permute([2, 1])(RepeatVector(recurr_neural_network_units)(act_func))  #Hidden states' Weighted sum computation

    final_layer = Multiply()([nn_layers, act_func_2])
    final_layer = Lambda(lambda unit: K.sum(unit, axis=1), output_shape=(recurr_neural_network_units,))(final_layer)
    
  
                                    
pitch_out = Dense(pitch_num, activation = 'softmax', name = 'pitch')(final_layer)  #Network's output
timesteps_out = Dense(timestep_num, activation = 'softmax', name = 'duration')(final_layer)
   
model = Model([pitch_in, timestep_in], [pitch_out, timesteps_out])  #Inputs the previous pitches and time steps and outputs the next pitches and time steps
    

if attention_usage:
    model_with_attention = Model([pitch_in, timestep_in], act_func)  #Model with the alpha as output
else:
    model_with_attention = None



model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'], optimizer=RMSprop(lr = 0.001))

plot_model(model, to_file=os.path.join(folder_in ,'vizualizations/model.png'), show_shapes = True, show_layer_names = True)
model.summary()

W0505 21:22:14.687928  2116 deprecation_wrapper.py:119] From C:\Users\Arjun\anaconda3\envs\generative\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0505 21:22:14.700928  2116 deprecation_wrapper.py:119] From C:\Users\Arjun\anaconda3\envs\generative\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0505 21:22:14.703964  2116 deprecation_wrapper.py:119] From C:\Users\Arjun\anaconda3\envs\generative\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0505 21:22:15.459161  2116 deprecation_wrapper.py:119] From C:\Users\Arjun\anaconda3\envs\generative\lib\site-packages\keras\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0505 21:22:15.4

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 100)    23300       input_1[0][0]                    
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 100)    1400        input_2[0][0]                    
__________________________________________________________________________________________________
concatenat

## Train the neural network

In [10]:
weights_in = os.path.join(folder_in, 'weights_record')
# model.load_weights(os.path.join(weights_folder, "weights.h5"))

In [11]:
weights_in = os.path.join(folder_in, 'weights_record')  #Weights to be stored in this folder so that it can used during the modle prediction

checkpoint1 = ModelCheckpoint(
    os.path.join(weights_in, "weights_record-improvement-{epoch:02d}-{loss:.4f}-bigger.h5"),
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)


checkpoint2 = ModelCheckpoint(
    os.path.join(weights_in, "weights_record.h5"),
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)

early_stopping = EarlyStopping(
    monitor='loss'
    , restore_best_weights=True
    , patience = 10
)


callbacks_list = [
    checkpoint1
    , checkpoint2
    , early_stopping
 ]

model.save_weights(os.path.join(weights_in, "weights_record.h5"))
model.fit(ltsm_ip, ltsm_op
          , epochs=10, batch_size=32  #EPOCKS=20000
          , validation_split = 0.2
          , callbacks=callbacks_list
          , shuffle=True
         )


W0505 21:22:15.925303  2116 deprecation_wrapper.py:119] From C:\Users\Arjun\anaconda3\envs\generative\lib\site-packages\keras\backend\tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0505 21:22:16.273303  2116 deprecation.py:323] From C:\Users\Arjun\anaconda3\envs\generative\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 3917 samples, validate on 980 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x23159d0b748>