In [615]:
import mido
import sys
import binascii
import pprint
from collections import OrderedDict
from mido import Message, MidiFile, MidiTrack
from enum import Enum

In [616]:
'''
C C# D D# E F F# G G# A A# B
'''
NOTES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
MAP_DEC_TO_NOTE = {}
MAP_NOTE_TO_DEC = {}
octave = -1
i = 0
j = 0
while i<128:
    MAP_DEC_TO_NOTE[ i ] = NOTES[j] + '_' + str(octave)
    MAP_NOTE_TO_DEC[ NOTES[j] + '_' + str(octave) ] = i
    if j == len(NOTES)-1:
        j = 0
        octave += 1
    else:
        j += 1
    i += 1

### State machine to parse raw midi track

In [617]:
### Need to deal with variable length -- for both "len" of meta-event, and "delta-time"
### Need to deal with running status -- http://midi.teragonaudio.com/tech/midispec/run.htm

MSB_1 = ['8', '9', 'a', 'b', 'c', 'd', 'e', 'f']

## TODO: ff00
## TODO: ff20, 21
## TODO: ff7f
## TODO: running status for all events

END_OF_TRACK = 'ff2f00'
TEXT_META_EVENTS = [f'ff0{i+1}' for i in range(9)]
TEMPO = 'ff5103'
SMPTE = 'ff5405'
TIME_SIG = 'ff5804'
KEY_SIG = 'ff5902'

class Track:
    def __init__(self, track):
        self.track = track
        self._parse()
    
    def _get_varlen_value (self, string):
        value = ''
        j = 0
        while True:
            if string[j] not in MSB_1: # reached last byte of variable-length field
                value = string[0:j+2]
                break
            else:
                j += 2
        return value
        
    def _varFieldToInt(self, hexstring):
        ret = 0
        current_byte = ''

        i=0
        while len(hexstring):
            current_byte = "0x" + hexstring[0:2]
            current_integer = int(current_byte, 16)
            ret = (ret << 7) | (current_integer & 0x7f)
            if not current_integer & 0x80:
                return ret
            hexstring = hexstring[2:]

            i += 1
            if i>1000:
                print('error')
                break
    
    def _parse(self):
        D = []
        
        assert self.track[0:8] == '4d54726b'
        self.chunklen = self.track[8:8+8]
        D.append({'type':'header', 'value':'4d54726b'})
        D.append({'type':'chunklen', 'value':self.track[8:8+8]})
        
        track = self.track[16:]
        i = 0
        state = 'delta-time' # state in ['delta-time', 'event']
        while True:
            if state == 'delta-time':
                value = self._get_varlen_value(track[i:])
                D.append( {'type':'delta time', 'value':value} )
                i += len(value)
                state = 'event'
                continue
                
            else: # state == 'event'
                running = False
                ## first decode event
                if track[i] not in MSB_1: # running status in use
                    event = last_event # for note on / note off / control change / program change - include channel n
                    running = True
                else:
                    track_first6 = track[i:i+6]
                    track_first4 = track[i:i+4]
                    track_first = track[i]
                    if track_first6 == END_OF_TRACK: # end of track; ff 2f 00
                        event = END_OF_TRACK
                    elif track_first6 == TIME_SIG: # ff 58 04 nn dd cc bb
                        event = TIME_SIG
                    elif track_first6 == KEY_SIG: # ff 59 02 sf mi
                        event = KEY_SIG
                    elif track_first6 == TEMPO: # ff 51 03 tt tt tt
                        event = TEMPO
                    elif track_first6 == SMPTE: # ff 54 05 hr mn se fr ff
                        event = SMPTE
                    elif track_first4 in TEXT_META_EVENTS:
                        event = track_first4
                    elif track_first in ['b','c','8','9']:
                        event = track[i:i+2]
                    
                ## then parse event and append to event list
                if event == END_OF_TRACK: # end of track; ff 2f 00
                    D.append( {'type':'end of track', 'type hex':END_OF_TRACK, 'value':''} )
                    break
                
                elif event == TIME_SIG: # ff 58 04 nn dd cc bb
                    D.append( {'type':'time signature', 'type hex':track_first6, 'value':track[i+6:i+6+8]} )
                    i += 14
                    
                elif event == KEY_SIG: # ff 59 02 sf mi
                    D.append( {'type':'key signature', 'type hex':track_first6, 'value':track[i+6:i+6+4]} )
                    i += 10
                
                elif event == TEMPO: # ff 51 03 tt tt tt
                    D.append( {'type':'tempo', 'type hex':track_first6, 'value':track[i+6:i+6+6]} )
                    i += 12
                
                elif event == SMPTE: # ff 54 05 hr mn se fr ff
                    D.append( {'type':'smpte', 'type hex':track_first6, 'value':track[i+6:i+6+10]} )
                    i += 16
                    
                # sequence/track name (ff 03 length text)
                # or marker (ff 06 length text)
                # or cue (ff 07 length text)
                elif event in TEXT_META_EVENTS:
                    if running:
                        name_length_varlen = self._get_varlen_value(track)
                        name_length = self._varFieldToInt(name_length_varlen)
                        name = track[i + len(name_length_varlen) : i + len(name_length_varlen) + name_length*2]
                        D.append( {'type':'text meta event', 'type hex':event, 'omitted':True,
                                   'length_varlen':name_length_varlen, 'name':name} )
                        i += len(name_length_varlen) + name_length*2 # name_length * byte, every byte == 2 hex
                    else:
                        name_length_varlen = self._get_varlen_value(track[i+4:])
                        name_length = self._varFieldToInt(name_length_varlen)
                        name = track[i + 4 + len(name_length_varlen) : i + 4 + len(name_length_varlen) + name_length*2]
                        D.append( {'type':'text meta event', 'type hex':track_first4, 'omitted':False,
                                   'length_varlen':name_length_varlen, 'name':name} )
                        i += 4 + len(name_length_varlen) + name_length*2 # name_length * byte, every byte == 2 hex
                
                # Assuming only bn/cn/8n/9n can use running-status
                elif event[0] == 'b': # control change; bn controller value
                    if running:
                        D.append( {'type':'control change', 'type hex':'b', 'omitted':True,
                                   'channel':event[1], 'controller':track[i:i+2], 'value':track[i+2:i+4]} )
                        i += 4
                    else:
                        D.append( {'type':'control change', 'type hex':'b', 'omitted':False,
                                   'channel':track[i+1], 'controller':track[i+2:i+4], 'value':track[i+4:i+6]} )
                        i += 6
                
                elif event[0] == 'c': # program change; cn program
                    if running:
                        D.append( {'type':'program change', 'type hex':'c', 'omitted':True,
                                   'channel':event[1], 'program':track[i:i+2]} )
                        i += 2
                    else:
                        D.append( {'type':'program change', 'type hex':'c', 'omitted':False,
                                   'channel':track[i+1], 'program':track[i+2:i+4]} )
                        i += 4
                
                elif event[0] == '9': # note on; 9n note velocity
                    if running:
                        D.append( {'type':'note on', 'type hex':'9', 'omitted':True,
                                   'channel':event[1], 'note':track[i:i+2], 'velocity':track[i+2:i+4]} )
                        i += 4
                    else:
                        D.append( {'type':'note on', 'type hex':'9', 'omitted':False,
                                   'channel':track[i+1], 'note':track[i+2:i+4], 'velocity':track[i+4:i+6]} )
                        i += 6
                    
                elif track_first == '8': # note off; 8n note velocity
                    if running:
                        D.append( {'type':'note off', 'type hex':'8', 'omitted':True,
                                   'channel':event[1], 'note':track[i:i+2], 'velocity':track[i+2:i+4]} )
                        i += 4
                    else:
                        D.append( {'type':'note off', 'type hex':'8', 'omitted':False,
                                   'channel':track[i+1], 'note':track[i+2:i+4], 'velocity':track[i+4:i+6]} )
                        i += 6
                
                last_event = event
                state = 'delta-time'
                
        self.segs = D    
    
    def segs_to_hexstring(self): # segs is list of events, each event is a dictionary
        hexstring = ''

        for event in self.segs:
            hexstring += Track.event_to_hexstring(event)
        return hexstring

    @staticmethod
    def event_to_hexstring(event):
        if event['type'] == 'header' or event['type'] == 'chunklen' or event['type'] == 'delta time':
            hexstring = event['value']

        elif event['type'] == 'end of track':
            hexstring = event['type hex']

        elif event['type'] == 'text meta event':
            hexstring = event['type hex']*(not event['omitted']) + event['length_varlen'] + event['name']

        elif event['type'] in ['time signature', 'key signature', 'tempo', 'smpte']:
            hexstring = event['type hex'] + event['value']

        elif event['type'] == 'control change':
            hexstring = (event['type hex']+event['channel'])*(not event['omitted']) + event['controller'] + event['value']

        elif event['type'] == 'program change':
            hexstring = (event['type hex']+event['channel'])*(not event['omitted']) + event['program']

        elif event['type'] == 'note on' or event['type'] == 'note off':
            hexstring = (event['type hex']+event['channel'])*(not event['omitted']) + event['note'] + event['velocity']

        return hexstring
    

In [618]:
class Music:
    def __init__(self, name):
        self._parse_midi(name)
        self.name = name
    
    def _parse_midi(self, name):
        midi_file = f'./{name}.mid'

        with open(midi_file, 'rb') as f:
            hexdata = binascii.hexlify(f.read())
        hexdata_str = hexdata.decode("utf-8")

        MTrk = '4d54726b'
        tracks_str = []
        for i,chunk in enumerate(hexdata_str.split(MTrk)):
            if i == 0:
                header_str = chunk
            else:
                tracks_str.append (MTrk + chunk)

        track_objs = [Track(t) for t in tracks_str]
        
        self.header_str = header_str
        self.track_objs = track_objs

    def keyshift(self, shift_amount):
        track_objs = self.track_objs.copy()
        for obj in track_objs:
            for event in obj.segs:
                if event['type'] == 'note on' or event['type'] == 'note off':
                    note_hex = '0x'+event['note']
                    note_integer = int(note_hex, 16)
                    note_integer_shifted = note_integer + shift_amount
                    note_hex_shifted = hex(note_integer_shifted)[2:]
                    note_hex_shifted = note_hex_shifted.rjust(2, '0')
                    event['note'] = note_hex_shifted
        self.track_objs = track_objs
    
    def export_midi(self, name):
        recovered_tracks = [T.segs_to_hexstring() for T in self.track_objs]
        with open(f'{name}.mid', 'wb') as fout:
            fout.write( binascii.unhexlify(self.header_str + ''.join(recovered_tracks)) )

In [619]:
chopin = Music('chpn_op25_e1')
chopin.keyshift(2)
chopin.export_midi('chpn_op25_up2')

In [620]:
for i,track in enumerate(chopin.track_objs):
    for event in track.segs:
        if event['type'] == 'tempo':
            print(i, 'tempo')
            break
        if event['type'] == 'note on':
            print(i, 'note')
            break

0 tempo
1 note
2 note
3 note


In [628]:
chpn = Music('chpn-p1')
for i,track in enumerate(chpn.track_objs):
    for event in track.segs:
        if event['type'] == 'tempo':
            print(i, 'tempo')
            break
        if event['type'] == 'note on':
            print(i, 'note')
            break
len(chpn.track_objs)

0 tempo
1 note
2 note


7

In [629]:
n1 = 0
n2 = 0
for i in [0,1,2]:
    for event in chpn.track_objs[i].segs:
        if event['type'] == 'note on':
            n1 += 1
        elif event['type'] == 'note off':
            n2 += 1
    print(n1, n2)

0 0
408 0
654 0


---

### Converting to Cairo contract

In [601]:
class CairoMidi:
    def __init__(self, music):
        ## music is a Music object
        self.music = music
        self.name = self.music.name
        self._init_template()
    
    def generate_single(self):
        MEAT = []
        
        MEAT.append( '@view')
        MEAT.append(f'func {self.name} {{')
        MEAT.append( '        range_check_ptr')
        MEAT.append( '    } () -> (')
        MEAT.append( '        z_len : felt,')
        MEAT.append( '        z : felt*')
        MEAT.append( '    ):')
        MEAT.append( '    alloc_locals\n')
        MEAT.append( '    let (local z) = alloc()\n')
        
        hexstring = ''.join( [T.segs_to_hexstring() for T in self.music.track_objs] )
        hexstring = self.music.header_str + hexstring
        arr = self._convert_hex_to_arr(hexstring)
        for i,num in enumerate(arr):
            MEAT.append(f'    assert [z+{i}] = {num}')
        MEAT.append(f'    let z_len = {i+1}')
        
        FOOTER = []
        FOOTER.append('\n    return (z_len, z)')
        FOOTER.append('end')
        
        self.contract = self.header + MEAT + FOOTER
    
    def generate_note_manipulable(self): # tracks 1-3
        pass
    
    def generate_tempo_manipulable(self):
        ## note on / note off event -- add shift_amount
        ## tempo event -- add multiplier & divider
        
        MEAT = []
        tempo_track = self.music.track_objs[0]
        music_tracks = self.music.track_objs[1:]
        
        MEAT.append( '@view')
        MEAT.append(f'func {self.name}_tempo {{')
        MEAT.append( '        range_check_ptr')
        MEAT.append( '    } (')
        MEAT.append( '        tempo_multiplier : felt,')
        MEAT.append( '        tempo_divider : felt')
        MEAT.append( '    ) -> (')
        MEAT.append( '        z_len : felt,')
        MEAT.append( '        z : felt*')
        MEAT.append( '    ):')
        MEAT.append( '    alloc_locals\n')
        MEAT.append( '    let (local z) = alloc()\n')
        i = 0
        for event in tempo_track.segs:
            if event['type'] != 'tempo':
                event_hex = Track.event_to_hexstring(event)
                s = event_hex
                while( len(s)>62 ):
                    felt_hex = s[0:62]
                    MEAT.append( f'    assert [z+{i}] = {int(felt_hex, 16)}' )
                    MEAT.append( f'    assert [z+{i+1}] = 62' )
                    s = s[62:]
                    i += 2
                felt_hex = s
                last_length = len(felt_hex)
                MEAT.append( f'    assert [z+{i}] = {int(felt_hex, 16)}' )
                MEAT.append( f'    assert [z+{i+1}] = {last_length}' )
                i += 2
            else:
                event_integer = int(event['type hex'], 16)
                event_length  = len(event['type hex'])
                value_integer = int(event['value'], 16)
                value_length  = len(event['value'])
                assert value_length == 6
                MEAT.append( f'    assert [z+{i}] = {event_integer}\n' )
                MEAT.append( f'    assert [z+{i+1}] = {event_length}\n' )
                MEAT.append( f'    tempvar value_ = {value_integer} * tempo_multiplier\n' )
                MEAT.append( f'    let (adjusted_value, _) = unsigned_div_rem(value_, tempo_divider)\n' )
                MEAT.append( f'    assert [z+{i+2}] = adjusted_value\n' )
                MEAT.append( f'    assert [z+{i+3}] = 6' ) # the hex-length of tempo value is always 6
                i += 4
        MEAT.append(f'    let z_len = {i+1}')
        MEAT.append('\n    return (z_len, z)')
        MEAT.append('end')
        
        MEAT.append( '\n@view')
        MEAT.append(f'func {self.name}_music {{')
        MEAT.append( '        range_check_ptr')
        MEAT.append( '    } () -> (')
        MEAT.append( '        z_len : felt,')
        MEAT.append( '        z : felt*')
        MEAT.append( '    ):')
        MEAT.append( '    alloc_locals\n')
        MEAT.append( '    let (local z) = alloc()\n')
        
        hexstring = ''.join( [T.segs_to_hexstring() for T in music_tracks] )
        hexstring = self.music.header_str + hexstring
        arr = self._convert_hex_to_arr(hexstring)
        for i,num in enumerate(arr):
            MEAT.append(f'    assert [z+{i}] = {num}')
        MEAT.append(f'    let z_len = {i+1}')
        MEAT.append('\n    return (z_len, z)')
        MEAT.append('end')
                            
        self.contract = self.header + MEAT
                            
    def print_contract(self):
        for line in self.contract:
            print(line)
    
    def export_contract(self, appendix=''):
        contract_str = '\n'.join(self.contract)
        if appendix:
            apdx = f'_{appendix}'
        else:
            apdx = ''
        with open(f'{self.name+apdx}.cairo', 'w') as f:
            f.write(contract_str)
    
    def _init_template(self):
        # generate a single contract
        HEADER = []
        HEADER.append( '%lang starknet')
        HEADER.append( '%builtins pedersen range_check')
        HEADER.append( 'from starkware.cairo.common.cairo_builtins import HashBuiltin')
        HEADER.append( 'from starkware.cairo.common.alloc import alloc')
        HEADER.append( 'from starkware.cairo.common.math import (unsigned_div_rem)\n')
        self.header = HEADER
    
    def _convert_hex_to_arr(self, hexstring):
        '''
        last element of the felt array is an integer indicating the hex-length of the last felt value
        '''
        arr = []
        s = hexstring
        while( len(s)>62 ):
            felt_hex = s[0:62]
            arr.append( int(felt_hex, 16) )
            s = s[62:]
        felt_hex = s
        last_length = len(felt_hex)
        arr.append( int(felt_hex, 16) )
        arr.append(last_length)
        
        return arr

In [603]:
chopin = Music('chpn_op25_e1')
cm = CairoMidi(chopin)
#cm.generate_tempo_manipulable()
#cm.export_contract('tempo')
# cm.generate_single()
# cm.export_contract()

In [607]:
n1 = 0
n2 = 0
for i in [1,2,3]:
    for event in chopin.track_objs[i].segs:
        if event['type'] == 'note on':
            n1 += 1
        elif event['type'] == 'note off':
            n2 += 1
    print(n1, n2)

360 0
2272 0
4444 0


In [613]:
arr = []
for event in chopin.track_objs[1].segs:
    if 'note' not in event['type']:
        event_hex = Track.event_to_hexstring(event)
        s = event_hex
        while( len(s)>62 ):
            felt_hex = s[0:62]
            arr.append( f'    assert [z+{i}] = {int(felt_hex, 16)}' )
            arr.append( f'    assert [z+{i+1}] = 62' )
            s = s[62:]
            i += 2
        felt_hex = s
        last_length = len(felt_hex)
        arr.append( f'    assert [z+{i}] = {int(felt_hex, 16)}' )
        arr.append( f'    assert [z+{i+1}] = {last_length}' )
        i += 2
    else:
        event_integer = int(event['type hex']+event['channel'], 16)
        arr.append( f'    assert [z+{i}] = {event_integer}' )
        arr.append( f'    assert [z+{i+1}] = 2' )
        
        note_integer = int(event['note'], 16)
        arr.append( f'    assert [z+{i+2}] = {note_integer} + note_shift')
        arr.append( f'    assert [z+{i+3}] = 2' )
        
        velocity_integer = int(event['velocity'], 16)
        arr.append( f'    assert [z+{i+4}] = {velocity_integer}')
        arr.append( f'    assert [z+{i+5}] = 2' )
        
        i += 6

for line in arr:
    print(line)

    assert [z+3] = 1297379947
    assert [z+4] = 8
    assert [z+5] = 1332
    assert [z+6] = 8
    assert [z+7] = 0
    assert [z+8] = 2
    assert [z+9] = 5172255634965538127576624586516596
    assert [z+10] = 28
    assert [z+11] = 0
    assert [z+12] = 2
    assert [z+13] = 49152
    assert [z+14] = 4
    assert [z+15] = 0
    assert [z+16] = 2
    assert [z+17] = 11536228
    assert [z+18] = 6
    assert [z+19] = 0
    assert [z+20] = 2
    assert [z+21] = 2624
    assert [z+22] = 4
    assert [z+23] = 0
    assert [z+24] = 2
    assert [z+25] = 450553776345030037904731835580182146853641395011731700227710265710693791032
    assert [z+26] = 62
    assert [z+27] = 926233139
    assert [z+28] = 8
    assert [z+29] = 0
    assert [z+30] = 2
    assert [z+31] = 11557759
    assert [z+32] = 6
    assert [z+33] = 35616
    assert [z+34] = 4
    assert [z+35] = 144
    assert [z+36] = 2
    assert [z+37] = 75 + note_shift
    assert [z+38] = 2
    assert [z+39] = 49
    assert [z+40] = 2


    assert [z+2336] = 2
    assert [z+2337] = 0
    assert [z+2338] = 2
    assert [z+2339] = 144
    assert [z+2340] = 2
    assert [z+2341] = 75 + note_shift
    assert [z+2342] = 2
    assert [z+2343] = 69
    assert [z+2344] = 2
    assert [z+2345] = 33632
    assert [z+2346] = 4
    assert [z+2347] = 144
    assert [z+2348] = 2
    assert [z+2349] = 75 + note_shift
    assert [z+2350] = 2
    assert [z+2351] = 0
    assert [z+2352] = 2
    assert [z+2353] = 0
    assert [z+2354] = 2
    assert [z+2355] = 144
    assert [z+2356] = 2
    assert [z+2357] = 77 + note_shift
    assert [z+2358] = 2
    assert [z+2359] = 77
    assert [z+2360] = 2
    assert [z+2361] = 33632
    assert [z+2362] = 4
    assert [z+2363] = 144
    assert [z+2364] = 2
    assert [z+2365] = 77 + note_shift
    assert [z+2366] = 2
    assert [z+2367] = 0
    assert [z+2368] = 2
    assert [z+2369] = 0
    assert [z+2370] = 2
    assert [z+2371] = 144
    assert [z+2372] = 2
    assert [z+2373] = 75 + note_shif

### For demo purposes, manually generate all contracts

In [609]:
### tempo contract track_objs[0] non-manipulable

In [611]:
### track_objs[1]

chopin.track_objs[1]
#for event in chopin.track_objs[1].segs:
    

736

#### Investigating midi file reconstructed from testnet

In [578]:
midi_testnet = Music('test/chpn_from_testnet')

In [582]:
midi_testnet.header_str

''

---

Reference for converting variable-length bytes to integer
```
uint32_t varfieldGet()
{
    uint32_t ret = 0;
    uint8_t byte_in;

    for (;;)
    {
        if (m_source.eof())
            // return error
        byte_in = m_source.byteGet();
        ret = (ret << 7) | (byte_in & 0x7f);
        if (!(byte_in & 0x80))
            return ret;
    }
}
```

In [68]:
def varFieldToInt(hexstring):
    
    ret = 0
    current_byte = ''
    
    i=0
    while len(hexstring):
        current_byte = "0x" + hexstring[0:2]
        current_integer = int(current_byte, 16)
        ret = (ret << 7) | (current_integer & 0x7f)
        if not current_integer & 0x80:
            return ret
        hexstring = hexstring[2:]
        
        i += 1
        if i>1000:
            print('error')
            break

assert varFieldToInt('00') == 0
assert varFieldToInt('7f') == 127
assert varFieldToInt('8100') == 128
assert varFieldToInt('ff7f') == 16383
assert varFieldToInt('818000') == 16384