# Encoding a midi file into note-wise and chord-wise text file
***

In [2]:
import argparse
import random
import os
import numpy as np
from math import floor
import music21
from pathlib import Path

music21: Certain music21 functions might need the optional package matplotlib;
                  if you run into errors, install it by following the instructions at
                  http://mit.edu/music21/doc/installing/installAdditional.html


## 1a: Store midi file as a music21 midifile object
***

In [4]:
# fname = "../midi-files/mozart/sonat-3.mid"
fname = "midi-files/Requiem/Requiem_-_Mozart_-_8._Lacrimosa_Piano_Truc.mid"

mf=music21.midi.MidiFile()
mf.open(fname)
mf.read()
mf.close()
mf

<MidiFile 2 tracks
  <MidiTrack 0 -- 454 events
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent TIME_SIGNATURE, t=None, track=0, channel=None, data=b'\x0c\x03\x18\x08'>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent KEY_SIGNATURE, t=None, track=0, channel=None, data=b'\xff\x00'>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent SET_TEMPO, t=None, track=0, channel=None, data=b'\x0e\x15\xc5'>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent CONTROLLER_CHANGE, t=None, track=0, channel=1, parameter1=121, parameter2=0>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent PROGRAM_CHANGE, t=None, track=0, channel=1, data=0>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent CONTROLLER_CHANGE, t=None, track=0, channel=1, parameter1=7, parameter2=100>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent CONTROLLER_CHANGE, t=None, track=0, channel=1, parameter1=10, 

## 1b: Convert midifile object into stream object
***

In [5]:
midi_stream=music21.midi.translate.midiFileToStream(mf)
midi_stream

<music21.stream.Score 0x7efc4110cba8>

Here we explore 3 ways in which stream object can be represented:
1. Graphical score
2. Audio player
3. Text

In [6]:
midi_stream.show()

SubConverterException: To create PNG files directly from MusicXML you need to download MuseScore and put a link to it in your .music21rc via Environment.

In [6]:
midi_stream.show('midi')

In [5]:
midi_stream.show('text')

{0.0} <music21.stream.Part 0x10a4a0940>
    {0.0} <music21.instrument.Piano Piano>
    {0.0} <music21.tempo.MetronomeMark adagietto Quarter=65.0>
    {0.0} <music21.key.Key of F major>
    {0.0} <music21.meter.TimeSignature 12/8>
    {0.0} <music21.note.Rest rest>
    {0.5} <music21.note.Note C#>
    {1.0} <music21.note.Note D>
    {1.5} <music21.note.Rest rest>
    {2.0} <music21.note.Note A>
    {2.5} <music21.note.Note B->
    {3.0} <music21.note.Rest rest>
    {3.5} <music21.note.Note D>
    {4.0} <music21.note.Note C#>
    {4.5} <music21.note.Rest rest>
    {5.0} <music21.note.Note C>
    {5.5} <music21.note.Note B->
    {6.0} <music21.note.Rest rest>
    {6.5} <music21.note.Note A>
    {7.0} <music21.note.Note D>
    {7.5} <music21.note.Rest rest>
    {8.0} <music21.note.Note B->
    {8.5} <music21.note.Note G>
    {9.0} <music21.note.Rest rest>
    {9.5} <music21.note.Note E>
    {10.0} <music21.note.Note F>
    {10.5} <music21.note.Rest rest>
    {11.0} <music21.note.Note A>
  

## 1c: Define the parameters of encoding
***

In [6]:
sample_freq=12
note_range=62
note_offset=33
chamber=0
numInstruments=1

## 1d: Set your midi stream object as s

And you can explore the midi stream object
***

In [7]:
s = midi_stream
print(s.duration.quarterLength)

48.0


In [8]:
dir(s)

['_DOC_ATTR',
 '_DOC_ORDER',
 '__add__',
 '__bool__',
 '__class__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_accumulatedSeconds',
 '_activeSite',
 '_activeSiteStoredOffset',
 '_atSoundingPitch',
 '_cache',
 '_classListFullyQualifiedCacheDict',
 '_classSetCacheDict',
 '_classTupleCacheDict',
 '_consolidateLayering',
 '_deepcopySubclassable',
 '_derivation',
 '_durSpanOverlap',
 '_duration',
 '_editorial',
 '_elements',
 '_endElements',
 '_findLayering',
 '_fixMeasureNumbers',
 '_getActiveSite',
 '_getAtSoundingPitch',
 '_getClef',
 '_getDerivation',
 '_

In [9]:
s[0].elements

(<music21.instrument.Piano Piano>,
 <music21.tempo.MetronomeMark adagietto Quarter=65.0>,
 <music21.key.Key of F major>,
 <music21.meter.TimeSignature 12/8>,
 <music21.note.Rest rest>,
 <music21.note.Note C#>,
 <music21.note.Note D>,
 <music21.note.Rest rest>,
 <music21.note.Note A>,
 <music21.note.Note B->,
 <music21.note.Rest rest>,
 <music21.note.Note D>,
 <music21.note.Note C#>,
 <music21.note.Rest rest>,
 <music21.note.Note C>,
 <music21.note.Note B->,
 <music21.note.Rest rest>,
 <music21.note.Note A>,
 <music21.note.Note D>,
 <music21.note.Rest rest>,
 <music21.note.Note B->,
 <music21.note.Note G>,
 <music21.note.Rest rest>,
 <music21.note.Note E>,
 <music21.note.Note F>,
 <music21.note.Rest rest>,
 <music21.note.Note A>,
 <music21.note.Note C#>,
 <music21.note.Note F>,
 <music21.note.Note C#>,
 <music21.note.Note D>,
 <music21.note.Note A>,
 <music21.note.Note A>,
 <music21.note.Note B->,
 <music21.note.Note G>,
 <music21.note.Note D>,
 <music21.note.Note C#>,
 <music21.chord.C

## Step 2: Encode stream into chordwise text

Using the following function:  

**stream_to_chordwise(s, chamber, note_range, note_offset, sample_freq)**
***

## 2a. Create empty array (3D-tensor) to store encoding

In [10]:
maxTimeStep = floor(s.duration.quarterLength * sample_freq)+1
score_arr = np.zeros((maxTimeStep, numInstruments, note_range))

print(maxTimeStep, "\n", score_arr.shape)

577 
 (577, 1, 62)


## 2b. Define 2 types of filters (Note and Chord) to extract data from stream object
***

In [11]:
# define two types of filters because notes and chords have different structures for storing their data
# chord have an extra layer because it consist of multiple notes

noteFilter=music21.stream.filters.ClassFilter('Note')
chordFilter=music21.stream.filters.ClassFilter('Chord')

## 2c. Recurse through the stream object using Note filter to get a list of notes
***

In [12]:
for n in s.recurse().addFilter(noteFilter):
    print(n)

<music21.note.Note C#>
<music21.note.Note D>
<music21.note.Note A>
<music21.note.Note B->
<music21.note.Note D>
<music21.note.Note C#>
<music21.note.Note C>
<music21.note.Note B->
<music21.note.Note A>
<music21.note.Note D>
<music21.note.Note B->
<music21.note.Note G>
<music21.note.Note E>
<music21.note.Note F>
<music21.note.Note A>
<music21.note.Note C#>
<music21.note.Note F>
<music21.note.Note C#>
<music21.note.Note D>
<music21.note.Note A>
<music21.note.Note A>
<music21.note.Note B->
<music21.note.Note G>
<music21.note.Note D>
<music21.note.Note C#>
<music21.note.Note B->
<music21.note.Note A>
<music21.note.Note C#>
<music21.note.Note D>
<music21.note.Note A>
<music21.note.Note A>
<music21.note.Note B->
<music21.note.Note G>
<music21.note.Note D>
<music21.note.Note C#>
<music21.note.Note B->
<music21.note.Note A>
<music21.note.Note F>
<music21.note.Note D>
<music21.note.Note A>
<music21.note.Note E>
<music21.note.Note A>
<music21.note.Note F>
<music21.note.Note D>
<music21.note.Note

In [13]:
# pitch.midi-note_offset: pitch is the numerical representation of a note. 
#                         note_offset is the the pitch relative to a zero mark. eg. B-=25, C=27, A=24

# n.offset: the timestamps of each note, relative to the start of the score
#           by multiplying with the sample_freq, you make all the timestamps integers

# n.duration.quarterLength: the duration of that note as a float eg. quarter note = 0.25, half note = 0.5
#                           multiply by sample_freq to represent duration in terms of timesteps
notes=[]
instrumentID=0

for n in s.recurse().addFilter(noteFilter):
    if chamber:
        # assign_instrument where 0 means piano-like and 1 means violin-like, and -1 means neither
        instrumentID=assign_instrument(n.activeSite.getInstrument())
        if instrumentID==-1:
            print("error. unknown instrument")
    notes.append((n.pitch.midi-note_offset, floor(n.offset*sample_freq), 
                  floor(n.duration.quarterLength*sample_freq), instrumentID))

In [14]:
print(len(notes))
notes[-5:]

70


[(35, 534, 6, 0),
 (48, 546, 6, 0),
 (36, 552, 6, 0),
 (36, 564, 6, 0),
 (24, 570, 6, 0)]

## 2d. Recurse through the stream object using Chord filter and add to the list of notes
***

In [15]:
# do the same using a chord filter

for c in s.recurse().addFilter(chordFilter):
    # unlike the noteFilter, this line of code is necessary as there are multiple notes in each chord
    # pitchesInChord is a list of notes at each chord eg. (<music21.pitch.Pitch D5>, <music21.pitch.Pitch F5>)
    pitchesInChord=c.pitches
    
    if chamber:
        instrumentID=assign_instrument(n.activeSite.getInstrument())     
        if instrumentID==-1:
            break # return []

    # do same as noteFilter and append all notes to the notes list
    for p in pitchesInChord:
        notes.append((p.midi-note_offset, floor(c.offset*sample_freq), 
                      floor(c.duration.quarterLength*sample_freq), instrumentID))

In [16]:
print(len(notes))
notes[-5:]

172


[(25, 522, 9, 0),
 (12, 540, 9, 0),
 (24, 540, 9, 0),
 (0, 558, 9, 0),
 (12, 558, 9, 0)]

## 2e. Using the list of notes, populate the array

score_array is structured by timesteps (rows) and notes (columns)
***

In [17]:
# the variable/list "notes" is a collection of all the notes in the song, not ordered in any significant way

for n in notes:
    
    # pitch is the first variable in n, previously obtained by n.midi-note_offset
    pitch=n[0]
    
    # do some calibration for notes that fall our of note range
    # i.e. less than 0 or more than note_range
    while pitch<0:
        pitch+=12
    while pitch>=note_range:
        pitch-=12
        
    # 3rd element refers to instrument type => if instrument is violin, use different pitch calibration
    if n[3]==1:      #Violin lowest note is v22
        while pitch<22:
            pitch+=12

    # start building the 3D-tensor of shape: (796, 1, 38)
    # score_arr[0] = timestep
    # score_arr[1] = type of instrument
    # score_arr[2] = pitch/note out of the range of note eg. 38
    
    # n[0] = pitch
    # n[1] = timestep
    # n[2] = duration
    # n[3] = instrument
    
    score_arr[n[1], n[3], pitch]=1                  # Strike note
    score_arr[n[1]+1:n[1]+n[2], n[3], pitch]=2      # Continue holding note

In [18]:
print(score_arr.shape)
# print first 5 timesteps
score_arr[:5,0,]

(577, 1, 62)


array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0.,
        2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0.,
        2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0.,
        2., 0

In [19]:
for timestep in score_arr:
    print(list(reversed(range(len(timestep)))))
    break

[0]


## 2f. Convert each timestep from a list into a string
***

In [20]:
instr={}
instr[0]="p"
instr[1]="v"

score_string_arr=[]

# loop through all timesteps
for timestep in score_arr:
    
    # selecting the instruments: i=0 means piano and i=1 means violin
    for i in list(reversed(range(len(timestep)))):   # List violin note first, then piano note
        
        # 
        score_string_arr.append(instr[i]+''.join([str(int(note)) for note in timestep[i]]))      

In [21]:
print(type(score_string_arr), len(score_string_arr))
score_string_arr[:5]

<class 'list'> 577


['p00000000000000000000000000000100100000000000000000000000000000',
 'p00000000000000000000000000000200200000000000000000000000000000',
 'p00000000000000000000000000000200200000000000000000000000000000',
 'p00000000000000000000000000000200200000000000000000000000000000',
 'p00000000000000000000000000000200200000000000000000000000000000']

### Output of stream_to_chordwise() 
is a list of strings where each element is the notes that will be played or held on at each timestep
<br/><br/><br/>

## Step 3: Add modulations (not sure for what)

Using the function:  
**add_modulations(score_string_arr)**
***

In [24]:
modulated=[]
# get the note range from the array
note_range=len(score_string_arr[0])-1

for i in range(0,12):
    for chord in score_string_arr:
        
        # minus the instrument letter eg. 'p'
        # add 6 zeros on each side of the string
        padded='000000'+chord[1:]+'000000'
        
        # add back the instrument letter eg. 'p'
        # append window of len=note_range back into 
        # eg. if we have "00012345000"
        # iteratively, we want to get "p00012", "p00123", "p01234", "p12345", "p23450", "p34500", "p45000",
        modulated.append(chord[0]+padded[i:i+note_range])

**Example for i = 0**  
Step1: p00000000000000000000000000000100100000  
Step2: 00000000000000000000000000000000000100100000000000  
Step3: ['p00000000000000000000000000000000000100']

In [25]:
# 796 * 12
print(len(modulated))
modulated[:5]

6924


['p00000000000000000000000000000000000100100000000000000000000000',
 'p00000000000000000000000000000000000200200000000000000000000000',
 'p00000000000000000000000000000000000200200000000000000000000000',
 'p00000000000000000000000000000000000200200000000000000000000000',
 'p00000000000000000000000000000000000200200000000000000000000000']

## Step 4: Create notewise encoding from chordwise encoding

Using the function:  
**chord_to_notewise(long_string, sample_freq)**
***

## 4a. Create a list of notewise events
***

In [22]:
# input of this function is a modulated string
long_string = modulated

translated_list=[]

# for every timestep of the string
for j in range(len(long_string)):
    
    # chord at timestep j eg. 'p00000000000000000000000000000000000100'
    chord=long_string[j]
    next_chord=""
    
    # range is from next_timestep to max_timestep
    for k in range(j+1, len(long_string)):
        
        # checking if instrument of next chord is same as current chord
        if long_string[k][0]==chord[0]:
            
            # if same, set next chord as next element in modulation
            # otherwise, keep going until you find a chord with the same instrument
            # when you do, set it as the next chord
            next_chord=long_string[k]
            break
    
    # set prefix as the instrument
    # set chord and next_chord to be without the instrument prefix
    # next_chord is necessary to check when notes end
    prefix=chord[0]
    chord=chord[1:]
    next_chord=next_chord[1:]
    
    # checking for non-zero notes at one particular timestep
    # i is an integer indicating the index of each note the chord
    for i in range(len(chord)):
        
        if chord[i]=="0":
            continue
        
        # set note as 2 elements: instrument and index of note
        # examples: p22, p16, p4
        note=prefix+str(i)                
        
        # if note in chord is 1, then append the note eg. p22 to the list
        if chord[i]=="1":
            translated_list.append(note)
        
        # If chord[i]=="2" do nothing - we're continuing to hold the note
        
        # unless next_chord[i] is back to "0" and it's time to end the note.
        if next_chord=="" or next_chord[i]=="0":      
            translated_list.append("end"+note)

    # wait indicates end of every timestep
    if prefix=="p":
        translated_list.append("wait")

In [23]:
print(len(translated_list))
translated_list[:10]

907


['p29', 'p32', 'wait', 'wait', 'wait', 'wait', 'wait', 'wait', 'p40', 'wait']

## 4b. Convert list into string and stack "waits"
***

In [24]:
# this section transforms the list of notes into a string of notes

# initialize i as zero and empty string
i=0
translated_string=""


while i<len(translated_list):
    
    # stack all the repeated waits together using an integer to indicate the no. of waits
    # eg. "wait wait" => "wait2"
    wait_count=1
    if translated_list[i]=='wait':
        while wait_count<=sample_freq*2 and i+wait_count<len(translated_list) and translated_list[i+wait_count]=='wait':
            wait_count+=1
        translated_list[i]='wait'+str(wait_count)
        
    # add next note
    translated_string+=translated_list[i]+" "
    i+=wait_count

In [25]:
translated_string[:100]

'p29 p32 wait6 p40 wait2 endp29 endp32 wait3 endp40 wait1 p41 wait5 endp41 wait1 p32 p36 wait6 p48 wa'

In [26]:
len(translated_string)

2993

## Step 5. Export Notewise and Chordwise encodings into .txt files
***

## 5a. Check variables containing Notewise and Chordwise encodings
***

In [28]:
print("chordwise encoding type and length:", type(modulated), len(modulated))
print("notewise encoding type and length:", type(translated_string), len(translated_string))


notewise encoding type and length: <class 'str'> 2993


## 5b. Turn encodings into strings and export into respective folders
***

In [30]:
# default settings: sample_freq=12, note_range=62

chordwise_folder = "../txt-files/chordwise/note_range62/sample_freq12/"
notewise_folder = "../txt-files/notewise/note_range62/sample_freq12/"

# export chordwise encoding
f=open(chordwise_folder+fname[18:-4]+".txt","w+")
f.write(" ".join(modulated))
f.close()

# export notewise encoding
f=open(notewise_folder+fname[18:-4]+".txt","w+")
f.write(translated_string)
f.close()

# Done!