### 피아노 Midi 사이트
- https://www.freepianotutorials.net/2023/12/ludwig-goransson-can-you-hear-music.html#more
- http://www.piano-midi.de/bach.htm

### Ⅰ. 라이브러리

In [60]:
import os
import mido
import numpy as np
import pandas as pd

from mido import MidiFile, MidiTrack, MetaMessage, Message

In [61]:
data_folder = "midi_data"

In [62]:
def load_midi_data(input_name, target_name):
    input_path  = os.path.join(data_folder, input_name)
    target_path = os.path.join(data_folder, target_name)
    
    input_mid  = mido.MidiFile(input_path)
    target_mid = mido.MidiFile(target_path)
    
    input_mid.tracks  = input_mid.tracks[0:2]
    target_mid.tracks = target_mid.tracks[0:2]
    
    #######################################################
    ######## merge : tick 때문에 merge 하지 않음
    #######################################################
    
    # input_mid = mido.merge_tracks(input_mid.tracks)
    # target_mid = mido.merge_tracks(target_mid.tracks)
    
    return input_mid, target_mid

In [87]:
input_name = "2222.mid"
target_name = "butterfly_test.mid"

input_mid, target_mid = load_midi_data(input_name, target_name)

#### sample data

In [344]:
###########################################################
######### sample
###########################################################
input_name = "piano_pirate.mid"
target_name = "bach_847.mid"

input_mid, target_mid = load_midi_data(input_name, target_name)

### Ⅱ. 데이터 추출하기
- 데이터 전제
    + track 길이 (2가지) : meta data, msg

In [77]:
def extract_data(input_mid, target_mid):
    input_info = {}
    target_info = {}
    
    input_info['MetaMessage']
    input_info['Message']
    
    target_info['MetaMessage']
    target_info['Message']
    
    

In [78]:
df_info_col = ['sec', 'tick', 'msg_type', 'channel', 'note', 'velocity', 'count', 'main_vol','depth', 'pedal', 'pan', 'tempo']
df_info = pd.DataFrame(columns = df_info_col)

input_info = {MetaMessage:{}, Message:df_info }

df_tempo_col = ['tempo', 'tick']
df_tempo = pd.DataFrame(columns = df_tempo_col)

###########################
for i,track in enumerate(target_mid.tracks):
    cur_tick = 0
    
    cur_sec = 0
    
    cur_info = {'sec':0, 'tick':0, 'msg_type': "", 'channel':0, 'note':0, 
                'velocity':0, 'count':0, 'main_vol':-1, 'depth':-1, 'pedal':-1, 'pan':-1, 'tempo':0}
    
    for msg in track:
        
        if isinstance(msg, MetaMessage):
            
            if msg.type not in input_info[MetaMessage]:
                input_info[MetaMessage][msg.type] = msg
            
            else:
                if type(input_info[MetaMessage][msg.type]) != list:
                    input_info[MetaMessage][msg.type] = [input_info[MetaMessage][msg.type]]
                    
                input_info[MetaMessage][msg.type].append(msg)
        
        elif isinstance(msg, Message):
            print(msg)
            if msg.time > 0:
                cur_tick = cur_tick + msg.time
                
                # insert
                cur_tempo = 300000
                cur_sec = mido.tick2second(cur_tick, input_mid.ticks_per_beat, cur_tempo )
                cur_count = 1
                
                #if type(cur_note) == list:
                #    cur_count = len(cur_note)
                
                temp = [cur_info['sec'], cur_info['tick'], cur_info['msg_type'], cur_info['channel'], cur_info['note'], cur_info['velocity'], 
                        cur_info['count'],cur_info['main_vol'], cur_info['depth'], cur_info['pedal'], cur_info['pan'], cur_info['tempo']]
                
                last_tick = input_info[Message].shape[0]
                
                for idx in range(last_tick, cur_tick):
                    temp[0] = mido.tick2second(idx, input_mid.ticks_per_beat, cur_tempo )
                    temp[1] = idx
                    df_info.loc[idx] = temp
                    
                cur_info = {'sec':0, 'tick':0, 'msg_type': "", 'channel':0, 'note':0, 
                'velocity':0, 'count':0, 'main_vol':-1, 'depth':-1, 'pedal':-1, 'pan':-1, 'tempo':0}
            
            
            if msg.type == 'note_on' and msg.velocity == 0:
                msg_type = 'note_off'
                
            else:
                msg_type = msg.type
    
            if msg.type == 'note_on' or msg.type == 'note_off':
                if cur_info['count'] == 1:
                    cur_info['msg_type'] = [cur_info['msg_type']]
                    cur_info['channel']  = [cur_info['channel']]
                    cur_info['note']     = [cur_info['note']]
                    cur_info['velocity'] = [cur_info['velocity']]
                    
                    cur_info['msg_type'].append(msg_type)
                    cur_info['channel'].append(msg.channel)
                    cur_info['note'].append(msg.note)
                    cur_info['velocity'].append(msg.velocity)
                    
                else:
                    cur_info['msg_type'] = msg_type
                    cur_info['channel'] = msg.channel
                    cur_info['note'] = msg.note
                    cur_info['velocity'] = msg.velocity
                    
                cur_info['count']+=1
                    
            elif msg.type == 'program_change':
                print("program_change")
                
            elif msg_type == 'control_change':
                
                if msg.control == 1:
                    ctl_type = 'modulation'
                elif msg.control == 7:
                    ctl_type = 'main_vol'
                elif msg.control == 10:
                    ctl_type = 'pan'
                elif msg.control == 64:
                    ctl_type = 'pedal'
                elif msg.control >= 91 and msg.control <= 93:
                    ctl_type = 'depth'
                    
                if type(cur_info[ctl_type]) == list:
                    cur_info[ctl_type].append(msg.value)
                else:
                    if cur_info[ctl_type] == -1:
                        cur_info[ctl_type] = msg.value
                    else:
                        cur_info[ctl_type] = [cur_info[ctl_type]]
                        cur_info[ctl_type].append(msg.value)

            tempo_idx = 0
            for tempo_idx in range(df_tempo.shape[0]):
                if df_tempo.loc[tempo_idx, 'tick'] > cur_tick:
                    cur_tempo = df_tempo.loc[tempo_idx, 'tempo']
                    break
                
            if tempo_idx == df_tempo.shape[0]:
                cur_tempo = df_tempo.loc[df_tempo.shape[0]-1, 'tempo']
                
            cur_info['sec'] = mido.tick2second(cur_tick, input_mid.ticks_per_beat, cur_tempo )
            cur_info['tick'] = cur_tick
            cur_info['tempo'] = cur_tempo
                
            
        if msg.time == 118:
            break
        
    if i==0:
        tempo_tick = 0
        
        if type(input_info[MetaMessage]['set_tempo']) !=list:
            df_tempo.loc[0, 'tempo'] = input_info[MetaMessage]['set_tempo'].tempo
            df_tempo.loc[0, 'tick'] = -1
            
        else:
            for i, msg in enumerate(input_info[MetaMessage]['set_tempo']):
                tempo_tick += msg.time
            
                if i==0:
                    df_tempo.loc[i,'tempo'] = msg.tempo
                else:
                    df_tempo.loc[i,'tempo'] = msg.tempo
                    df_tempo.loc[i-1,'tick'] = tempo_tick

            df_tempo.loc[df_tempo.shape[0] -1, 'tick'] = -1
        
    if i==1:
        break

input_info[Message].replace(-1, 0, inplace=True)
        
input_info[Message].to_csv("hello.csv")
print(display(input_info[Message]))


program_change channel=0 program=0 time=5760
program_change


KeyError: -1

In [66]:
input_info[MetaMessage]['set_tempo'][-1]

TypeError: 'MetaMessage' object is not subscriptable

In [79]:
def load_mid_Info_from_csv(input_name, target_name):
    input_csv  = input_name.split(".")[0]+".csv"
    target_csv = target_name.split(".")[0]+".csv"
    
    input_info = pd.read_csv(input_csv, index_col = 0)
    target_info = pd.read_csv(target_csv, index_col = 0)
    
    return input_info, target_info

aa, bb = load_mid_Info_from_csv("hello.mid", "hello.mid")

FileNotFoundError: [Errno 2] No such file or directory: 'hello.csv'

In [80]:
def create_df_info():
    df_col = ['sec', 'tick', 'msg_type', 'channel', 'note', 'velocity', 'count', 'main_vol','depth', 'pedal', 'pan', 'tempo']
    df = pd.DataFrame(columns = df_col)
    
    df_tempo_col = ['tempo', 'tick']
    df_tempo = pd.DataFrame(columns = df_tempo_col)

    df_info = { MetaMessage:{}, Message:df }
    
    return df_info, df_tempo

def info_to_list(cur_info):
    
    cur_temp = [cur_info['sec'], cur_info['tick'], 
                cur_info['msg_type'], cur_info['channel'], cur_info['note'], cur_info['velocity'], cur_info['count'],
                cur_info['main_vol'], cur_info['depth'], cur_info['pedal'], cur_info['pan'], cur_info['tempo']]
    
    return cur_temp
                

def initialize_cur_info():
    cur_info = {'sec':0, 'tick':0, 
                'msg_type': "", 'channel':0, 'note':0,  'velocity':0, 'count':0, 
                'main_vol':-1, 'depth':-1, 'pedal':-1, 'pan':-1, 'tempo':0 }
    return cur_info

def check_not_list(msg_list):
    if type(msg_list) == list:
        return msg_list
    else:
        msg_list = [msg_list]
        return msg_list

In [85]:
def tempo_info_list(tempo_list, tempo_info):
    tempo_tick = 0
    
    if type(tempo_list) != list:
        tempo_info.loc[0, 'tempo'] = tempo_list.tempo
        tempo_info.loc[0, 'tick'] = -1 # -1 : 마지막 숫자
        
    else:
        for idx, msg in enumerate(tempo_list):
            tempo_tick += msg.time
            
            if i==0:
                tempo_info.loc[i,'tempo'] = msg.tempo
            else:
                tempo_info.loc[i,'tempo'] = msg.tempo
                tempo_info.loc[i-1,'tick'] = tempo_tick
            
        tempo_info.loc[tempo_info.shape[0] - 1, 'tick'] = -1
        
    return tempo_info

def find_tempo(tick, tempo_info):
    tempo_idx = 0
    last_tempo_idx = tempo_info.shape[0]
    
    cur_tempo = -1
    
    for tempo_idx in range(last_tempo_idx):
        if tempo_info.loc[tempo_idx, 'tick'] > tick:
            cur_tempo = tempo_info.loc[tempo_idx, 'tempo']
            break
            
    if cur_tempo == - 1:
        cur_tempo = tempo_info.loc[last_tempo_idx - 1, 'tempo']
        
    return cur_tempo

In [82]:
def control_type(msg):
    
    if msg.control == 1:
        ctl_type = 'modulation'
        
    elif msg.control == 7:
        ctl_type = 'main_vol'
        
    elif msg.control == 10:
        ctl_type = 'pan'
        
    elif msg.control == 64:
        ctl_type = 'pedal'
        
    elif msg.control >= 91 and msg.control <= 93:
        ctl_type = 'depth'
        
    return ctl_type

In [83]:
def process_msg(msg, info):
    
    if msg.type == 'note_on' or msg.type == 'note_off':
        
        if msg.type == 'note_on' and msg.velocity == 0:
            msg_type = 'note_off'
        else:
            msg_type = msg.type
            
        if info['count'] == 1:
            info['msg_type'] = [info['msg_type']]
            info['channel']  = [info['channel']]
            info['note']     = [info['note']]
            info['velocity'] = [info['velocity']]
                    
            info['msg_type'].append(msg_type)
            info['channel'].append(msg.channel)
            info['note'].append(msg.note)
            info['velocity'].append(msg.velocity)
                    
        else:
            info['msg_type'] = msg_type
            info['channel'] = msg.channel
            info['note'] = msg.note
            info['velocity'] = msg.velocity
                    
        info['count']+=1
        
    elif msg.type == 'program_change':
        print("program_change")
        
    elif msg.type == 'control_change':
        
        ctl_type = control_type(msg)
        
        if cur_info[ctl_type] == -1:
            cur_info[ctl_type] = msg.value
        else:
            cur_info[ctl_type] = check_not_list(cur_info[ctl_type])
            cur_info[ctl_type].append(msg.value)
    
    return info

In [88]:
def extract_mid_data(mid):
    
    mid_info, tempo_info = create_df_info()
    
    for idx, track in enumerate(mid.tracks):
        
        cur_tick = 0
        cur_sec  = 0
        cur_info = initialize_cur_info()
        
        for msg in track:
            
            ###############################################
            ########   MetaMessage
            ###############################################
            if isinstance(msg, MetaMessage):
                
                if msg.type not in mid_info[MetaMessage]:
                    mid_info[MetaMessage][msg.type] = msg
                    
                else:
                    mid_info[MetaMessage][msg.type] = check_not_list(mid_info[MetaMessage][msg.type])
                    mid_info[MetaMessage][msg.type].append(msg)
                    
            ###############################################
            ########   Message
            ###############################################
            elif isinstance(msg, Message):
                
                if msg.time > 0:
                    cur_info['tempo'] = find_tempo(cur_tick, tempo_info)
                    cur_info['tick'] = cur_tick
                    cur_info['sec'] = mido.tick2second( cur_info['tick'], mid.ticks_per_beat, cur_info['tempo'] )

                    cur_temp = info_to_list(cur_info)
                    
                    # iinsert row info
                    cur_tick = cur_tick + msg.time
                    last_tick = mid_info[Message].shape[0]
                    for idx in range(last_tick, cur_tick):
                        cur_temp[0] = mido.tick2second(idx, mid.ticks_per_beat, cur_info['tempo'] ) # second
                        cur_temp[1] = idx # tick
                        
                        mid_info[Message].loc[idx] = cur_temp
                        
                    cur_info = initialize_cur_info()
                
                cur_info = process_msg(msg, cur_info)
                
                    
        # tempo setting
        if idx == 0:
            # 예외 사항
            # set_tempo 가 없을 경우, default : 500000 로 설정
            if 'set_tempo' not in mid_info[MetaMessage]:
                mid_info[MetaMessage][msg.type] = mido.MetaMessage('set_tempo', tempo = 500000)
            
            tempo_info_list(mid_info[MetaMessage]['set_tempo'], tempo_info)
            break
            
    mid_info[Message].replace(-1, 0, inplace = True)
    
    return mid_info
     

input_info = extract_mid_data(input_mid)
print(input_info[MetaMessage])
print("====================")
input_info[Message].to_csv("22hello.csv")
print(display(input_info[Message]))


{'track_name': MetaMessage('track_name', name='butterfly', time=0), 'set_tempo': MetaMessage('set_tempo', tempo=500000, time=0), 'time_signature': MetaMessage('time_signature', numerator=4, denominator=4, clocks_per_click=24, notated_32nd_notes_per_beat=8, time=0), 'end_of_track': MetaMessage('end_of_track', time=0)}


Unnamed: 0,sec,tick,msg_type,channel,note,velocity,count,main_vol,depth,pedal,pan,tempo


None


In [90]:
print(input_mid)

MidiFile(type=1, ticks_per_beat=480, tracks=[
  MidiTrack([
    MetaMessage('track_name', name='butterfly', time=0),
    MetaMessage('set_tempo', tempo=500000, time=0),
    MetaMessage('time_signature', numerator=4, denominator=4, clocks_per_click=24, notated_32nd_notes_per_beat=8, time=0),
    MetaMessage('end_of_track', time=0)]),
  MidiTrack([
    MetaMessage('track_name', name='Yamaha MOX', time=0),
    Message('program_change', channel=0, program=0, time=9600),
    Message('note_on', channel=0, note=69, velocity=100, time=2880),
    Message('note_off', channel=0, note=69, velocity=64, time=240),
    Message('note_on', channel=0, note=70, velocity=100, time=0),
    Message('note_off', channel=0, note=70, velocity=64, time=240),
    Message('note_on', channel=0, note=72, velocity=87, time=0),
    Message('note_off', channel=0, note=72, velocity=64, time=960),
    Message('note_on', channel=0, note=65, velocity=94, time=0),
    Message('note_off', channel=0, note=65, velocity=64, tim

In [72]:
input_info  = extract_mid_data(input_mid)
target_info = extract_mid_data(target_mid)

{<class 'mido.midifiles.meta.MetaMessage'>: {}, <class 'mido.messages.messages.Message'>: Empty DataFrame
Columns: [sec, tick, msg_type, channel, note, velocity, count, main_vol, depth, pedal, pan, tempo]
Index: []} Empty DataFrame
Columns: [tempo, tick]
Index: []
{<class 'mido.midifiles.meta.MetaMessage'>: {}, <class 'mido.messages.messages.Message'>: Empty DataFrame
Columns: [sec, tick, msg_type, channel, note, velocity, count, main_vol, depth, pedal, pan, tempo]
Index: []} Empty DataFrame
Columns: [tempo, tick]
Index: []


KeyError: -1

#### [0]. precheck

In [73]:
def pre_check(input_mid, target_mid):
    input_track_len  = len(input_mid.tracks)
    target_track_len = len(target_mid.tracks)
    
    # track 길이 비교
    if input_track_len != target_track_len:
        return False, None, None
    
    input_info  = extract_mid_data(input_mid)
    target_info = extract_mid_data(target_mid)
    
    # 시간 비교
    #############################################################
    ############ 순서, 시간 차이를 어떻게 할 것인지 고민
    #############################################################
    if input_info[Message].shape[0] != target_info[Message].shape[0]:
        return False
    
    return True, input_info, target_info

bRet, input_info, target_info = pre_check(input_mid, target_mid)

#### [1]. MetaMessage
- (1). midi_port ( 보류 )
- (2). set_tempo
- (3). smpte_offset
- (4). time_signature
- (5). key_signature

In [74]:
def compare_MetaMessage(input_meta, target_meta):
    
    score = 0
    total_score = 0 # 비교 해야 할 대상의 갯수
    
    for key in input_meta:
        
        if key == 'midi_port':
            count = 0
            
            if type(input_meta[key]) == list:
                count += len(input_meta[key])
            else:
                count +=1
                
            total_score +=count
                
            if key not in target_meta: continue
                
            if count > 1: # list
                for idx in range(count):
                    if input_meta[key][idx].port == target_meta[key][idx].port : score+=1
            else:
                if input_meta[key].port == target_meta[key].port : score+=1
            
        elif key == 'set_tempo':
            # Message 에서 비교
            continue
            
        elif key == 'smpte_offset':
            # frame_rate, hours, minutes, seconds, frames, sub_frames
            total_score += 6
            
            if key not in target_meta: continue
            
            if input_meta[key].frame_rate == target_meta[key].frame_rate: score+=1
            if input_meta[key].hours == target_meta[key].hours: score+=1
            if input_meta[key].minutes == target_meta[key].minutes: score+=1   
            if input_meta[key].seconds == target_meta[key].seconds: score+=1   
            if input_meta[key].frames == target_meta[key].frames: score+=1     
            if input_meta[key].sub_frames == target_meta[key].sub_frames: score+=1
            
        elif key == 'time_signature':
            # numerator=4, denominator=4, clocks_per_click=24, notated_32nd_notes_per_beat=8
            total_score += 4
            
            if key not in target_meta: continue
            
            if input_meta[key].numerator == target_meta[key].numerator: score+=1
            if input_meta[key].denominator == target_meta[key].denominator: score+=1
            if input_meta[key].clocks_per_click == target_meta[key].clocks_per_click: score+=1 
            if input_meta[key].notated_32nd_notes_per_beat == target_meta[key].notated_32nd_notes_per_beat: score+=1
            
        elif key == 'key_signature':
            # numerator=4, denominator=4, clocks_per_click=24, notated_32nd_notes_per_beat=8
            total_score += 1
            
            if key not in target_meta: continue
                
            if input_meta[key].key == target_meta[key].key: score+=1  
        
        elif key == 'text':
            count = 0
            
            if type(input_meta[key]) == list:
                count += len(input_meta[key])
            else:
                count +=1
                
            total_score +=count
                
            if key not in target_meta: continue
                
            if count > 1: # list
                for idx in range(count):
                    if input_meta[key][idx].text == target_meta[key][idx].text : score+=1
            else:
                if input_meta[key].text == target_meta[key].text : score+=1
            
            
        else:
            continue
            
    print(f"MetaMessage : score({score}), total_score({total_score})")
    print(f"MetaMessage : percentage({score/total_score*100})")
            
    return score, total_score

In [75]:
compare_MetaMessage(input_info[MetaMessage], target_info[MetaMessage])

TypeError: 'NoneType' object is not subscriptable

#### [2]. Message