# MAIN TRAIN PIPELINE

## Importing libraries from Keras, Model and other 

In [2]:
import glob
import pickle
import numpy
from music21 import converter, instrument, note, chord
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint

from model import LSTM_model
import warnings
warnings.simplefilter("ignore")

### Parsing part

#### Main idea - converting all midi files to the large sequence of the notes and chords
We have 92 files in midi_songs dir
For parsing used music21 library

In [None]:
def parsing_notes():
    notes = []

    midi_files = glob.glob("midi_songs/*mid")
    
    for idx, item in enumerate(midi_files):
        midi_file = converter.parse(item)
        if(idx % 10 == 0):
            print('{} midi files parsed'.format(idx))
        try:
            intstument_ = instrument.partitionByInstrument(midi_file)
            parsed = instrument_.parts[0].recurse() # parts mean about instruments, our data fully done by the piano
        except:
            parsed = midi_file.flat.notes
        
        if(parsed!=None):
            for item in parsed:
                if isinstance(item, note.Note):
                    notes.append(str(item.pitch))
                elif isinstance(item, chord.Chord):
                    notes.append('.'.join(str(n) for n in item.normalOrder))
    with open('data/notes', 'wb') as filepath:
        pickle.dump(notes, filepath)
    return notes

In [3]:
parsed = parsing_notes()

0 midi files parsed
10 midi files parsed
20 midi files parsed
30 midi files parsed
40 midi files parsed
50 midi files parsed
60 midi files parsed
70 midi files parsed
80 midi files parsed
90 midi files parsed


All unique chords and notes, which we use for making a labels

In [4]:
len(set(parsed))  # size of our vocab

359

Sequence of features for LSTM will be like that

[number of all notes, sequence of 100 notes] 

output: 

[number of all notes, note after those sequecne of 100 notes] 

So, our aim is predict next note or chord

In [5]:
def prepare_sequences(notes, n_vocab):
    sequence_length = 100 # Fixed Sequence for LSTM, which will be size of input

    # get all pitch names
    pitchnames = sorted(set(item for item in notes))

    
    # create a mapping "pitch" : "int"
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    input_features = []
    output_features = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        input_features.append([note_to_int[char] for char in sequence_in])
        output_features.append(note_to_int[sequence_out])
        
    n_patterns = len(input_features)

#     reshape the input into a format compatible with LSTM layers
    input_features = numpy.reshape(input_features, (n_patterns, sequence_length, 1))
#     normalize input
    input_features = input_features / float(n_vocab)

    output_features = np_utils.to_categorical(output_features)
    return (input_features, output_features)

In [6]:
notes = parsed
size_vocab = len(set(notes))
in_f, out_f = prepare_sequences(notes, size_vocab)

In [7]:
print('Size of input : LSTM in {}'.format(in_f.shape))
print('Size of output: LSTM out {}'.format(out_f.shape))

Size of input : LSTM in (60398, 100, 1)
Size of output: LSTM out (60398, 359)


Model description in other Model.py file
As we can see, when we add new LSTM layers or Dense layers

Total parameters increased

In [8]:
LSTM = LSTM_model(in_f, size_vocab)

In [38]:
LSTM.summary() # OUR DEFAULT MODEL

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 100, 512)          1052672   
_________________________________________________________________
dropout_4 (Dropout)          (None, 100, 512)          0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 100, 512)          2099200   
_________________________________________________________________
dropout_5 (Dropout)          (None, 100, 512)          0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 512)               2099200   
_________________________________________________________________
dense_3 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)               0         
__________

In [9]:
LSTM.summary() # ADVANCED MODEL, MORE FEATURES IN NN

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 100, 1024)         4202496   
_________________________________________________________________
dropout_1 (Dropout)          (None, 100, 1024)         0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 100, 1024)         8392704   
_________________________________________________________________
dropout_2 (Dropout)          (None, 100, 1024)         0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 100, 512)          3147776   
_________________________________________________________________
dropout_3 (Dropout)          (None, 100, 512)          0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 512)               2099200   
__________

In [10]:
filepath = "weights-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(
    filepath,
    monitor='loss',
    verbose=0,
    mode='min'
)
callbacks_list = [checkpoint]

In [None]:
LSTM.fit(in_f, out_f, epochs=100, batch_size=32, callbacks=callbacks_list) # training part of code

Epoch 1/100
   64/60398 [..............................] - ETA: 6:55:22 - loss: 5.9420

Train process take a lot of time 
We trained in Nvidia Quaddro P6000