# Importing important libraries

 *  **glob** lets you match specified patterns.



 *  **music21** is a toolkit that let you work with music files. Understand them, process them and write them.


In [3]:
import glob
from music21 import converter, instrument, note, chord


1. **converter**:  It is used to load music from various file formats music21.stream slass
2. **instrument**:  It is used to gather the information about the instruments used in the music file.
3. **note**: It is a class in music21 toolkit to gather information about notes and to create Notes in a music file.
4. **chord**: It is a class in music21 toolkit to gather information about Chords and to create Chords in a music file.



# Creating the dataset

Here we will iterate over each **midi file** in my dataset and extract the information about the notes and chords of the music file. We will read the file as a **sequence of Notes and Chords**. These sequences are going to be my dataset.

In [4]:
!unzip "source path" -d "/content/"

unzip:  cannot find or open source path, source path.zip or source path.ZIP.


In [2]:
notes=[]
for file in glob.glob("/content/drive/My Drive/midi_songs/*.mid"):
  midi = converter.parse(file) #this will convert the file intp music.21 stream.score objects.
  parts = instrument.partitionByInstrument(midi)# this will create partition into a Part for each unique Instrument.
  notes_to_parse = parts.parts[0]
  #this will have information about the notes,chords and rests in the song.
  #now we need to extract the information of notes and chords.
  for element in notes_to_parse:
    if isinstance(element, note.Note):
      #print(str(element.pitch))
      notes.append(str(element.pitch))
    elif isinstance(element, chord.Chord):
      #print(str(element.normalOrder))
      notes.append('.'.join(str(n) for n in element.normalOrder))

Now we have the notes and chords information of each song in the list named **'notes'**. Let's have a look at the list.

In [5]:
notes

['F6',
 'D3',
 'C#4',
 'A3',
 'F4',
 'A3',
 'C#4',
 'F4',
 'A3',
 'C#4',
 'G4',
 'A3',
 'C#4',
 'F4',
 'A3',
 'C#4',
 'F4',
 'A3',
 'C#4',
 'F6',
 'D3',
 'C#4',
 'A3',
 'F4',
 'A3',
 'C#4',
 'F4',
 'A3',
 'C#4',
 'G4',
 'A3',
 'C#4',
 'F4',
 'A3',
 'C#4',
 'F4',
 'A3',
 'C#4',
 'F6',
 'D3',
 'B3',
 'A3',
 'F4',
 'A3',
 'B3',
 'F4',
 'A3',
 'B3',
 'G4',
 'A3',
 'B3',
 'F4',
 'A3',
 'B3',
 'F4',
 'A3',
 'B3',
 'F6',
 'D3',
 'B-3',
 'A3',
 'G4',
 'A3',
 'B3',
 'F4',
 'A3',
 'B-3',
 'C6',
 'E4',
 'A3',
 'B3',
 'F4',
 'A3',
 'B3',
 'G4',
 'A3',
 'B3',
 'F6',
 'D3',
 'C#4',
 'A3',
 'F4',
 'A3',
 'C#4',
 'F4',
 'A3',
 'C#4',
 'G4',
 'A3',
 'C#4',
 'F4',
 'A3',
 'C#4',
 'F4',
 'A3',
 'C#4',
 'F6',
 'D3',
 'C#4',
 'A3',
 'F4',
 'A3',
 'C#4',
 'F4',
 'A3',
 'C#4',
 'G4',
 'A3',
 'C#4',
 'F4',
 'A3',
 'C#4',
 'F4',
 'A3',
 'C#4',
 'F6',
 'D3',
 'B3',
 'A3',
 'F4',
 'A3',
 'B3',
 'F4',
 'A3',
 'B3',
 'G4',
 'A3',
 'B3',
 'F4',
 'A3',
 'B3',
 'F4',
 'A3',
 'B3',
 'F6',
 'D3',
 'B-3',
 'A3',
 'G4',


In [None]:
len(notes)

57177

This means we have 57177 notes and chords in all the songs combinely. Now we need to Form our dataset with this

The idea to generate the dataset is, one input will have 100 notes+chords and the 101th note or chord is our output for that input. Then the second input output will be generated by shifting the same task for next 100 notes and chords.


**For Example:**
 * from 0th index till 99th index the sequence of notes and chords is the input, the note or chord present at the 100th index is the output for this particular input.

 * from 100th index till 199th index the sequence of notes and chords is the input, the note or chord present at the 200th index is the output for this particular input.

 And so on.

Additionally, we will also have to **encode them**. As the model doesn't understand anything present in **object or str** field.

In [6]:
import numpy 
from keras.utils import np_utils

n_vocab = len(set(notes))#to get the number of unique data present in notes
sequence_length = 100  #You can keep it higher or lower
# get all pitch names
pitchnames = sorted(set(item for item in notes)) #getting various unique data present in notes
# create a dictionary to map pitches to integers
note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
network_input = []
network_output = []
# create input sequences and the corresponding outputs
for i in range(0, len(notes) - sequence_length):
    sequence_in = notes[i:i + sequence_length]
    sequence_out = notes[i + sequence_length]
    network_input.append([note_to_int[char] for char in sequence_in])
    network_output.append(note_to_int[sequence_out])


# Data Preprocessing

Now we need to reshape and normalize the input data. Also we will one hot encode the output data.

In [7]:
n_patterns = len(network_input)

# reshape the input into a format compatible with LSTM layers
network_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))
#normalize input
network_input = network_input / float(n_vocab)
network_output = np_utils.to_categorical(network_output)

# Buiding the Model

In [8]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Activation


In [9]:
model = Sequential()
model.add(LSTM(
        256,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        return_sequences=True
    ))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(256))
model.add(Dense(256))
model.add(Dropout(0.3))
model.add(Dense(n_vocab))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 100, 256)          264192    
_________________________________________________________________
dropout (Dropout)            (None, 100, 256)          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 100, 512)          1574912   
_________________________________________________________________
dropout_1 (Dropout)          (None, 100, 512)          0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 256)               787456    
_________________________________________________________________
dense (Dense)                (None, 256)               65792     
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0

# Training the Model

In [11]:
model.fit(network_input, network_output, epochs=80, batch_size=32)

Epoch 1/80


KeyboardInterrupt: ignored

# Saving the Model

In [None]:
  model.save_weights('music125.h5')


In [None]:
with open('model_architecture125.json','w') as f:
  f.write(model.to_json())

# Restoring the Model

In [12]:
from keras.models import load_model
from keras.models import model_from_json

In [None]:
model = Sequential()
model.add(LSTM(
        256,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        return_sequences=True
    ))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(256))
model.add(Dense(256))
model.add(Dropout(0.3))
model.add(Dense(n_vocab))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

In [14]:
# Model reconstruction from JSON file
with open('/content/music.json', 'r') as f:
    model = model_from_json(f.read())

In [18]:

# Load weights into the new model
model.load_weights('/content/music125.h5')


In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 100, 256)          264192    
_________________________________________________________________
dropout (Dropout)            (None, 100, 256)          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 100, 512)          1574912   
_________________________________________________________________
dropout_1 (Dropout)          (None, 100, 512)          0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 256)               787456    
_________________________________________________________________
dense (Dense)                (None, 256)               65792     
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0

# Testing the model

In [None]:
type(network_input[0])

numpy.ndarray

In [25]:
start = numpy.random.randint(0, len(network_input)-1)
b=network_input[start]

In [26]:
b

array([[0.68715084],
       [0.90223464],
       [0.21787709],
       [0.90223464],
       [0.77653631],
       [0.39385475],
       [0.91899441],
       [0.56145251],
       [0.91899441],
       [0.77653631],
       [0.68715084],
       [0.95251397],
       [0.95251397],
       [0.80726257],
       [0.80726257],
       [0.85195531],
       [0.79050279],
       [0.91061453],
       [0.9273743 ],
       [0.91061453],
       [0.85195531],
       [0.79050279],
       [0.83240223],
       [0.79608939],
       [0.79050279],
       [0.97486034],
       [0.83240223],
       [0.79608939],
       [0.79050279],
       [0.94134078],
       [0.97486034],
       [0.79050279],
       [0.92458101],
       [0.94134078],
       [0.79050279],
       [0.79050279],
       [0.85195531],
       [0.9273743 ],
       [0.91061453],
       [0.79050279],
       [0.83240223],
       [0.85195531],
       [0.93575419],
       [0.83240223],
       [0.79608939],
       [0.93575419],
       [0.97486034],
       [0.941

array([0.68715084, 0.90223464, 0.21787709, 0.90223464, 0.77653631,
       0.39385475, 0.91899441, 0.56145251, 0.91899441, 0.77653631,
       0.68715084, 0.95251397, 0.95251397, 0.80726257, 0.80726257,
       0.85195531, 0.79050279, 0.91061453, 0.9273743 , 0.91061453,
       0.85195531, 0.79050279, 0.83240223, 0.79608939, 0.79050279,
       0.97486034, 0.83240223, 0.79608939, 0.79050279, 0.94134078,
       0.97486034, 0.79050279, 0.92458101, 0.94134078, 0.79050279,
       0.79050279, 0.85195531, 0.9273743 , 0.91061453, 0.79050279,
       0.83240223, 0.85195531, 0.93575419, 0.83240223, 0.79608939,
       0.93575419, 0.97486034, 0.94134078, 0.82681564, 0.83240223,
       0.85195531, 0.84636872, 0.92458101, 0.94134078, 0.93575419,
       0.93575419, 0.93575419, 0.93575419, 0.93575419, 0.93575419,
       0.93575419, 0.93575419, 0.05586592, 0.98603352, 0.44413408,
       0.98603352, 0.44413408, 0.44413408, 0.44692737, 0.98603352,
       0.43854749, 0.98603352, 0.61452514, 0.98603352, 0.79608

In [32]:
start = numpy.random.randint(0, len(network_input)-1)
int_to_note = dict((number, note) for number, note in enumerate(pitchnames))
pattern = (network_input[start])
prediction_output = []
# generate 500 notes
for note_index in range(500):
    prediction_input = numpy.reshape(pattern, (1, len(pattern), 1))
    prediction_input = (prediction_input) / float(n_vocab)
    prediction = model.predict(prediction_input, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_note[index]
    prediction_output.append(result)
    pattern=numpy.append(pattern,index)
    pattern = pattern[1:len(pattern)]


In [33]:
prediction_output

['0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '2.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '2.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '2.5',
 '2.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '2.5',
 '0.5',
 '2.5',
 '0.5',
 '0.5',
 '0.5',
 '2.5',
 '0.5',
 '2.5',
 '0.5',
 '0.5',
 '0.5',
 '2.5',
 '0.5',
 '2.5',
 '0.5',
 '0.5',
 '2.5',
 '2.5',
 '0.5',
 '5.10',
 '0.5',
 '0.5',
 '0.5',
 '2.5',
 '0.5',
 '2.5',
 '0.5',
 '0.5',
 '2.5',
 '2.5',
 '0.5',
 '5.10',
 '0.5',
 '0.5',
 '0.5',
 '2.5',
 '2.5',
 '2.5',
 '0.5',
 '0.5',
 '0.5',
 '0.5',
 '2.5',
 '0.5',
 '2.5',
 '0.5',
 '0.5',
 '2.5',
 '2.5',
 '0.5',
 '0.5',
 '0.5',
 '2.5',
 '0.5',
 '2.5',
 '0.5',
 '0.5',
 '2.5',
 '2.5',
 '0.5',
 '0.5',
 '0.5',
 '2.5',
 '0.5',
 '2.5',
 '0.5',
 '0.5',
 '2.5',
 '2.5',
 '0.5',
 '0.5',
 '0.5',
 '2.5'

# Converting output into midi file

In [34]:
offset = 0
output_notes = []
# create note and chord objects based on the values generated by the model
for pattern in prediction_output:
    # pattern is a chord
    if ('.' in pattern) or pattern.isdigit():
        notes_in_chord = pattern.split('.')
        notes = []
        for current_note in notes_in_chord:
            new_note = note.Note(int(current_note))
            new_note.storedInstrument = instrument.Piano()
            notes.append(new_note)
        new_chord = chord.Chord(notes)
        new_chord.offset = offset
        output_notes.append(new_chord)
    # pattern is a note
    else:
        new_note = note.Note(pattern)
        new_note.offset = offset
        new_note.storedInstrument = instrument.Piano()
        output_notes.append(new_note)
    # increase offset each iteration so that notes do not stack
    offset += 0.5

In [35]:
import music21
midi_stream = music21.stream.Stream(output_notes)
midi_stream.write('midi', fp='test_output.mid')

'test_output.mid'