In [2]:
# pip install music21
# Commented out.. as if run again, doesn't try to install

In [3]:
from music21 import converter, instrument, note, chord, stream
import glob
import pickle
import numpy as np

## Read a Midi File

In [4]:
song1 = converter.parse("mozart/mz_311_1.mid")
print(type(song1))

<class 'music21.stream.base.Score'>


In [5]:
song1

<music21.stream.Score 0x213578836d0>

In [6]:
# song1 --> object of stream.Score type
#       --> will contain music in form of notes and chords
song1.show('midi')
# This will show the song in playable format

In [7]:
song1.show('text')
# This will show the song in text-format (notes & chords)

{0.0} <music21.metadata.Metadata object at 0x21357883e50>
{0.0} <music21.stream.Part 0x21357883ee0>
    {0.0} <music21.stream.Measure 1 offset=0.0>
        {0.0} <music21.instrument.Piano 'Piano right: Piano right'>
        {0.0} <music21.instrument.Piano 'Piano'>
        {0.0} <music21.clef.TrebleClef>
        {0.0} <music21.tempo.MetronomeMark allegrissimo Quarter=141.83>
        {0.0} <music21.key.Key of D major>
        {0.0} <music21.meter.TimeSignature 4/4>
        {0.0} <music21.chord.Chord A4 D5 F#4>
        {1.0} <music21.note.Rest quarter>
        {2.0} <music21.tempo.MetronomeMark Quarter=147.83>
        {2.0} <music21.note.Note G>
        {2.25} <music21.note.Note F#>
        {2.5} <music21.note.Note E>
        {2.75} <music21.note.Note F#>
        {3.0} <music21.note.Note A>
        {3.25} <music21.note.Note G>
        {3.5} <music21.tempo.MetronomeMark Quarter=135.83>
        {3.5} <music21.note.Note F#>
        {3.75} <music21.note.Note G>
    {4.0} <music21.stream.Measu

In [8]:
# So, the chords and notes are stored in nested forms of containers
# .. to simplify this, store all of them in a single list
# ==> Flatten the elements.
elements_of_song = song1.flat.notes

In [9]:
print(len(elements_of_song))
print(elements_of_song)
print(type(elements_of_song))

2601
<music21.stream.iterator.StreamIterator for Score:0x2135ee7fc10 @:0>
<class 'music21.stream.iterator.StreamIterator'>


In [10]:
for e in elements_of_song:
    print(e, e.offset, type(e))
    # e.offset --> will tell the time-duration of element

<music21.chord.Chord A4 D5 F#4> 0.0 <class 'music21.chord.Chord'>
<music21.chord.Chord A2 D2 D3> 0.0 <class 'music21.chord.Chord'>
<music21.note.Note G> 2.0 <class 'music21.note.Note'>
<music21.chord.Chord F#4 D4> 2.0 <class 'music21.chord.Chord'>
<music21.note.Note F#> 2.25 <class 'music21.note.Note'>
<music21.note.Note E> 2.5 <class 'music21.note.Note'>
<music21.note.Note F#> 2.75 <class 'music21.note.Note'>
<music21.note.Note A> 3.0 <class 'music21.note.Note'>
<music21.chord.Chord E4 G4> 3.0 <class 'music21.chord.Chord'>
<music21.note.Note G> 3.25 <class 'music21.note.Note'>
<music21.note.Note F#> 3.5 <class 'music21.note.Note'>
<music21.note.Note G> 3.75 <class 'music21.note.Note'>
<music21.note.Note A> 4.0 <class 'music21.note.Note'>
<music21.chord.Chord A4 F#4> 4.0 <class 'music21.chord.Chord'>
<music21.note.Note A> 6.0 <class 'music21.note.Note'>
<music21.note.Note B> 6.5 <class 'music21.note.Note'>
<music21.note.Note C#> 7.0 <class 'music21.note.Note'>
<music21.note.Note D> 7.5

## Get the Notes & Chords from the Song

In [11]:
elex = elements_of_song[0]
ele2 = elements_of_song[4]
# isinstance(element, classType)
# If the element and its class match with classType --> this returns True (else False)
flag1a = isinstance(elex, note.Note)
flag1b = isinstance(elex, chord.Chord)
flag2a = isinstance(ele2, note.Note)
flag2b = isinstance(ele2, chord.Chord)
print(flag1a, flag1b, flag2a, flag2b)

False True True False


#### Processing a Note :-

In [12]:
note1 = elements_of_song[4]
print(note1.pitch)
print(type(note1))
# This gives the note in form of a class
print(type(note1.pitch))
# Get the string from the class
currNote = str(note1.pitch)
print(currNote)
# This will recover the note-name from class

F#5
<class 'music21.note.Note'>
<class 'music21.pitch.Pitch'>
F#5


#### Processing a Chord :-

In [13]:
chord1 = elements_of_song[0]
print(chord1)
print(type(chord1))
# This is a chord, let's figure this out.. how to process this
print(chord1.normalOrder)
# chord.normalOrder --> Gives the list of nodes in it.
# 2 --> A4
# 6 --> D5
# 9 --> F#4
# (Following some pattern of indexing.. have to figure it out)
print(type(chord1.normalOrder))
# Convert the chord-list into a string, concatenated with "+"
currChord = "+".join(str(x) for x in chord1.normalOrder)
print(currChord)

<music21.chord.Chord A4 D5 F#4>
<class 'music21.chord.Chord'>
[2, 6, 9]
<class 'list'>
2+6+9


#### Making a list, only of Notes (from Notes) OR (from Chords)

In [14]:
notes_of_song = []
# Empty array container for notes & chords

for ele in elements_of_song:
    # If element is a note, store it's pitch
    if(isinstance(ele, note.Note) == True):
        tempNote = str(ele.pitch)
        notes_of_song.append(tempNote)
    elif(isinstance(ele, chord.Chord) == True):
    # Else, element is a chord, split notes, and make string of them
        tempChord = "+".join(str(x) for x in ele.normalOrder)
        notes_of_song.append(tempChord)

In [15]:
print("No. of notes/chords =", len(notes_of_song))
for note1 in notes_of_song:
    print(note1)

No. of notes/chords = 2601
2+6+9
9+2
G5
2+6
F#5
E5
F#5
A5
4+7
G5
F#5
G5
A5
6+9
A5
B5
C#6
D6
A5
F#3
D4
F#5
D3
D4
A5
E3
G5
F#5
D4
G5
A5
A3
G5
C#4
F5
2
F#5
G5
2+6
F#5
E5
F#5
A5
4+7
G5
F#5
G5
A5
6+9
A5
B5
C#6
D6
A5
F#3
D4
F#5
D3
D4
A5
E3
G5
F#5
D4
G5
A5
A3
G5
F#5
C#4
E5
D5
D4
F#4
A4
D4
F#4
6+7
D4
6+7
F#4
6+7
6+7
D4
F#5
E5
F#4
D5
C#5
E4
G4
A4
E4
G4
E5
E4
F#4
G4
F#4
E4
D4
C#4
B3
E5
A3
C#4
A4
A3
C#4
7+9
A3
7+9
C#4
7+9
7+9
G3
G5
F#5
C#4
E5
D5
F#3
D4
A4
F#3
D4
F#5
F#3
D3
E3
F#3
G3
A3
B3
C#4
F#5
D4
D5
E5
F#5
G5
A5
B5
C#6
D6
C#6
B5
6+9
A5
G5
6+9
F#5
E5
6+9
D5
E5
7+11
F#5
E5
E-5
E5
F#5
G5
A5
B5
A5
G5
7+11
F#5
E5
7+11
D5
C#5
8+11
B4
C#5
A3
D5
C#5
G3
A4
D5
F#3
D3
G5
C#3
A5
G5
A2
E5
G5
D3
F#5
E5
F#3
D5
C#5
A3
D5
C#5
G3
A4
D5
F#3
D3
G5
C#3
A5
G5
A2
E5
G5
D3
F#5
E5
F#3
D5
C#5
4+9
E5
C#5
A4
F#5
2+6+9
A5
F#5
D5
C#5
4+9
E5
C#5
A4
F#5
2+6+9
A5
F#5
D5
1+4
9+1+4
9+1+4
9+1+4
9+1+4
9+1+4
9+1+4
9+1+4
9+1+4
1+2
B4
C#5
D5
E-5
A3
E5
C#4
B3
E5
D4
A5
C#4
G#5
E4
F#5
A3
E5
C#4
C#5
B3
D5
F#4
D4
D5
B3
E5
G#3
D5
B3
C#5
E

## Get All the Notes, from all the Midi Files

In [16]:
# import glob
# from pathlib import Path

# input_dir = Path.cwd()
# files = list(input_dir.rglob("*.mid"))

# notes = []
# for file in files:
#     song = converter.parse(file)
#     # Convert file into stream.Score object
#     # ..which just contains notes/chords
#     print("parsing", file)
#     file = file.resolve()
#     print(type(file))
#     elements_of_song = song.flat.notes
#     for ele in elements_of_song:
#         # If element is a note, store it's pitch
#         if(isinstance(ele, note.Note) == True):
#             tempNote = str(ele.pitch)
#             notes_of_song.append(tempNote)
#         elif(isinstance(ele, chord.Chord) == True):
#         # Else, element is a chord, split notes, and make string of them
#             tempChord = "+".join(str(x) for x in ele.normalOrder)
#             notes_of_song.append(tempChord)

In [18]:
notes = []

for file in glob.glob("mozart/*.mid"):
    midi = converter.parse(file) # Convert file into stream.Score Object
    print("parsing %s"%file)
    elements_to_parse = midi.flat.notes
    
    for elex in elements_to_parse:
        # If the element is a Note,  then store it's pitch
        if(isinstance(elex, chord.Chord) == True):
            notes.append("+".join(str(n) for n in elex.normalOrder))
        elif(isinstance(elex, note.Note) == True):
            noteString = str(elex.pitch)
            notes.append(noteString)
            # If the element is a Chord, split each note of chord and join them with +

parsing mozart\mz_311_1.mid
parsing mozart\mz_311_2.mid
parsing mozart\mz_311_3.mid
parsing mozart\mz_330_1.mid
parsing mozart\mz_330_2.mid
parsing mozart\mz_330_3.mid
parsing mozart\mz_331_1.mid
parsing mozart\mz_331_2.mid
parsing mozart\mz_331_3.mid
parsing mozart\mz_332_1.mid
parsing mozart\mz_332_2.mid
parsing mozart\mz_332_3.mid
parsing mozart\mz_333_1.mid
parsing mozart\mz_333_2.mid
parsing mozart\mz_333_3.mid
parsing mozart\mz_545_1.mid
parsing mozart\mz_545_2.mid
parsing mozart\mz_545_3.mid
parsing mozart\mz_570_1.mid
parsing mozart\mz_570_2.mid
parsing mozart\mz_570_3.mid


In [19]:
print(len(notes))
for n in notes:
    print(n)

55802
2+6+9
9+2
G5
2+6
F#5
E5
F#5
A5
4+7
G5
F#5
G5
A5
6+9
A5
B5
C#6
D6
A5
F#3
D4
F#5
D3
D4
A5
E3
G5
F#5
D4
G5
A5
A3
G5
C#4
F5
2
F#5
G5
2+6
F#5
E5
F#5
A5
4+7
G5
F#5
G5
A5
6+9
A5
B5
C#6
D6
A5
F#3
D4
F#5
D3
D4
A5
E3
G5
F#5
D4
G5
A5
A3
G5
F#5
C#4
E5
D5
D4
F#4
A4
D4
F#4
6+7
D4
6+7
F#4
6+7
6+7
D4
F#5
E5
F#4
D5
C#5
E4
G4
A4
E4
G4
E5
E4
F#4
G4
F#4
E4
D4
C#4
B3
E5
A3
C#4
A4
A3
C#4
7+9
A3
7+9
C#4
7+9
7+9
G3
G5
F#5
C#4
E5
D5
F#3
D4
A4
F#3
D4
F#5
F#3
D3
E3
F#3
G3
A3
B3
C#4
F#5
D4
D5
E5
F#5
G5
A5
B5
C#6
D6
C#6
B5
6+9
A5
G5
6+9
F#5
E5
6+9
D5
E5
7+11
F#5
E5
E-5
E5
F#5
G5
A5
B5
A5
G5
7+11
F#5
E5
7+11
D5
C#5
8+11
B4
C#5
A3
D5
C#5
G3
A4
D5
F#3
D3
G5
C#3
A5
G5
A2
E5
G5
D3
F#5
E5
F#3
D5
C#5
A3
D5
C#5
G3
A4
D5
F#3
D3
G5
C#3
A5
G5
A2
E5
G5
D3
F#5
E5
F#3
D5
C#5
4+9
E5
C#5
A4
F#5
2+6+9
A5
F#5
D5
C#5
4+9
E5
C#5
A4
F#5
2+6+9
A5
F#5
D5
1+4
9+1+4
9+1+4
9+1+4
9+1+4
9+1+4
9+1+4
9+1+4
9+1+4
1+2
B4
C#5
D5
E-5
A3
E5
C#4
B3
E5
D4
A5
C#4
G#5
E4
F#5
A3
E5
C#4
C#5
B3
D5
F#4
D4
D5
B3
E5
G#3
D5
B3
C#5
E3
B4
D4
G#4
C#4
A4
E4

## Saving the file, containing all Notes

In [20]:
import pickle

with open("notes2", 'wb') as filepath:
    pickle.dump(notes, filepath)

In [21]:
# 'wb' --> Write-binary mode (to write data in a file)
# 'rb' --> Read-binary mode (to read data from a file)

with open("notes2", 'rb') as f:
    notes = pickle.load(f)
    # This will load whole file-data to variable notes

#### Count of Unique Elements in Music :-

In [22]:
# In 'wb' and 'rb', same file needs to be referenced.
# Else, Will give error --> "Ran out of data".
print(len(set(notes)))
# This will print unique no. of elements.
# i.e. --> Unique notes/chords in all files.
numElements = len(set(notes))

229


In [23]:
print(notes[:100])

['2+6+9', '9+2', 'G5', '2+6', 'F#5', 'E5', 'F#5', 'A5', '4+7', 'G5', 'F#5', 'G5', 'A5', '6+9', 'A5', 'B5', 'C#6', 'D6', 'A5', 'F#3', 'D4', 'F#5', 'D3', 'D4', 'A5', 'E3', 'G5', 'F#5', 'D4', 'G5', 'A5', 'A3', 'G5', 'C#4', 'F5', '2', 'F#5', 'G5', '2+6', 'F#5', 'E5', 'F#5', 'A5', '4+7', 'G5', 'F#5', 'G5', 'A5', '6+9', 'A5', 'B5', 'C#6', 'D6', 'A5', 'F#3', 'D4', 'F#5', 'D3', 'D4', 'A5', 'E3', 'G5', 'F#5', 'D4', 'G5', 'A5', 'A3', 'G5', 'F#5', 'C#4', 'E5', 'D5', 'D4', 'F#4', 'A4', 'D4', 'F#4', '6+7', 'D4', '6+7', 'F#4', '6+7', '6+7', 'D4', 'F#5', 'E5', 'F#4', 'D5', 'C#5', 'E4', 'G4', 'A4', 'E4', 'G4', 'E5', 'E4', 'F#4', 'G4', 'F#4', 'E4']


## Preparing Sequenctial Data for LSTM :-

In Markov chain, we have a window size. So choosing a sequence length. This length also states, how many elements are considered in a LSTM layer.

In [33]:
sequenceLength = 100
# Will give 100 elements to a layer, and will predict output for next layer using them.

uniqueNotes = sorted(set(notes))
print(uniqueNotes)

['0', '0+1', '0+1+3', '0+2', '0+2+6', '0+2+7', '0+3', '0+3+5', '0+3+6', '0+3+6+9', '0+3+7', '0+4', '0+4+6', '0+4+7', '0+5', '0+6', '1', '1+2', '1+2+4', '1+3', '1+3+7', '1+4', '1+4+7', '1+4+7+10', '1+4+7+9', '1+5', '1+5+8', '1+6', '1+7', '10', '10+0', '10+0+2', '10+0+3', '10+0+4', '10+1', '10+1+4', '10+11', '10+11+1', '10+2', '10+2+3', '10+2+5', '10+3', '11', '11+0', '11+0+2', '11+1', '11+1+2', '11+2', '11+2+4', '11+2+5', '11+2+5+7', '11+2+6', '11+3', '11+3+6', '11+4', '2', '2+3', '2+3+5', '2+4', '2+4+5', '2+4+6', '2+4+8', '2+4+9', '2+5', '2+5+7', '2+5+8', '2+5+8+11', '2+5+9', '2+6', '2+6+9', '2+7', '2+8', '3', '3+4', '3+4+5', '3+4+6', '3+5', '3+5+7', '3+5+9', '3+6', '3+6+9', '3+7', '3+7+10', '3+8', '3+9', '4', '4+10', '4+5', '4+5+7', '4+6', '4+6+7', '4+6+8', '4+7', '4+7+10', '4+7+10+0', '4+7+11', '4+7+9', '4+8', '4+8+11', '4+9', '5', '5+10', '5+11', '5+6', '5+7', '5+7+10', '5+7+11', '5+7+9', '5+7+9+10+0', '5+8', '5+8+0', '5+8+10', '5+8+11', '5+9', '5+9+0', '6', '6+10', '6+10+1', '6+11'

#### Mapping Strings (unique-elements) to Integer values :-

In [41]:
# As ML models work with numerial data only, will map each string with a number.
noteMap = dict((ele, num) for num, ele in enumerate(uniqueNotes))

for ele in noteMap:
    print(ele, " : ", noteMap[ele])

0  :  0
0+1  :  1
0+1+3  :  2
0+2  :  3
0+2+6  :  4
0+2+7  :  5
0+3  :  6
0+3+5  :  7
0+3+6  :  8
0+3+6+9  :  9
0+3+7  :  10
0+4  :  11
0+4+6  :  12
0+4+7  :  13
0+5  :  14
0+6  :  15
1  :  16
1+2  :  17
1+2+4  :  18
1+3  :  19
1+3+7  :  20
1+4  :  21
1+4+7  :  22
1+4+7+10  :  23
1+4+7+9  :  24
1+5  :  25
1+5+8  :  26
1+6  :  27
1+7  :  28
10  :  29
10+0  :  30
10+0+2  :  31
10+0+3  :  32
10+0+4  :  33
10+1  :  34
10+1+4  :  35
10+11  :  36
10+11+1  :  37
10+2  :  38
10+2+3  :  39
10+2+5  :  40
10+3  :  41
11  :  42
11+0  :  43
11+0+2  :  44
11+1  :  45
11+1+2  :  46
11+2  :  47
11+2+4  :  48
11+2+5  :  49
11+2+5+7  :  50
11+2+6  :  51
11+3  :  52
11+3+6  :  53
11+4  :  54
2  :  55
2+3  :  56
2+3+5  :  57
2+4  :  58
2+4+5  :  59
2+4+6  :  60
2+4+8  :  61
2+4+9  :  62
2+5  :  63
2+5+7  :  64
2+5+8  :  65
2+5+8+11  :  66
2+5+9  :  67
2+6  :  68
2+6+9  :  69
2+7  :  70
2+8  :  71
3  :  72
3+4  :  73
3+4+5  :  74
3+4+6  :  75
3+5  :  76
3+5+7  :  77
3+5+9  :  78
3+6  :  79
3+6+9  :  80
3+7

--> As sequenceLength is 100, will take first 100 data to input, and 101st data as output.
--> For next iteration, take (2-101) data points as input, and 102nd data as output.
--> So on...  Sliding window (of size 100) as input, & next 1 data as output.

--> So, total we will get (len(notes) - sequenceLength) datapoints.

In [42]:
networkInput = [] # input-data
networkOutput = [] # will try to get output, using input

for i in range(len(notes) - sequenceLength):
    inputSeq = notes[i : i+sequenceLength] # 100 string-values
    outputSeq = notes[i + sequenceLength] # 1 string-value
    # Currently, inputSeq & outputSeq has strings.
    # Use map, to convert it to integer-values.
    # ..as ML-algorithm works only on numerical data.
    for tempStr in inputSeq:
        tempNum = noteMap[tempStr]
        networkInput.append(tempNum)
    for temp in outputSeq:
        tempNum = noteMap[tempStr]
        networkOutput.append(tempNum)

In [46]:
print(len(networkInput))
print(len(networkOutput))

5570200
135923


#### Create ready-data for Neural Network :-