In [36]:
import os
import math
from sklearn.cluster import KMeans, MiniBatchKMeans
path = '/content/상영산_v2.mid'

In [31]:
!pip install mido
import mido
def import_midi_notes(midi_file_path, track_name):
    # print('midi_file_path', midi_file_path)
    # ticks_per_beat = 96
    mid = mido.MidiFile(midi_file_path)
    ticks_per_beat = mid.ticks_per_beat
    # assert ticks_per_beat == mid.ticks_per_beat

    note_temp = []
    note_import = []
    tick = 0
    is_ok = True
    for i, track in enumerate(mid.tracks):
        if track.name != track_name:
            continue
        for msg in track:
            tick += msg.time
            if msg.type == 'note_on' or msg.type == 'note_off':
                if msg.type == 'note_on' and msg.velocity > 0:
                    pitch = msg.note
                    start = tick / float(mid.ticks_per_beat)
                    # velocity = 80 if msg.velocity > 80 else msg.velocity
                    velocity = msg.velocity
                    # note_temp.append([pitch, start, -1, velocity])
                    note_info = {
                        'pitch': pitch,
                        'start_time': start,
                        'end_time': None,
                        'duration_time': None,
                        'velocity': velocity,
                    }
                    note_temp.append(note_info)
                elif msg.type == 'note_off' or msg.velocity == 0:
                    for j in range(len(note_temp)):
                        if note_temp[j]['pitch'] == msg.note:
                            end = tick / float(mid.ticks_per_beat)
                            note_temp[j]['end_time'] = end
                            note_import.append(note_temp[j])
                            del note_temp[j]
                            break
    for n in note_import:
        length = n['end_time'] - n['start_time']
        n['duration_time'] = length

    note_import = sorted(note_import, key=lambda k: k['start_time'])

    return note_import

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [24]:
import numpy as np
def quantize(note_seq, total_length=4.0, min_length=0.125, error_range=0.125):
    available_start_and_end_time = []
    # 8분음표가 18-12-12-18
    # 16분음표 단위
    available_time = np.arange(0., total_length, 1 / 8.).tolist()
    for a1 in available_time:
        for a2 in available_time:
            a1 = a1 % total_length
            a2 = a2 % total_length
            if (a1, a2) not in available_start_and_end_time:
                available_start_and_end_time.append((a1, a2))
                # print('1', available_start_and_end_time[-1])
    # 4분음표의 셋잇단음
    tripplet = 1 / 3. * np.array([0., 1., 2., ])  # 4분음표의 셋잇단음
    available_time = np.arange(0., total_length, 1.).tolist()
    for a in available_time:
        # a2 = a % total_length
        end_time = a
        for b in tripplet:
            # b2 = (end_time + b) % total_length
            if (end_time, end_time + 1. / 3.) not in available_start_and_end_time:
                available_start_and_end_time.append((end_time, end_time + 1. / 3.))
                # print('2', available_start_and_end_time[-1])
            end_time += 1. / 3.
    # 8분음표의 셋잇단음
    tripplet = 0.5 / 3. * np.array([0., 1., 2., ])  
    available_time = np.arange(0., total_length, 0.5).tolist()
    for a in available_time:
        # a2 = a % total_length
        end_time = a
        for b in tripplet:
            # b2 = (end_time + b) % total_length
            if (end_time, end_time + 0.5 / 3) not in available_start_and_end_time:
                available_start_and_end_time.append((end_time, end_time + 0.5 / 3))
                # print('3', available_start_and_end_time[-1])
            end_time += 0.5 / 3
    
    available_start_and_end_time = list(set(available_start_and_end_time))

    note_seq_q = []
    for n in note_seq:
        # 기준치보다 짧은 노트는 꾸밈노트로 삭제
        if n['duration_time'] < min_length:
            continue
        # 노트 시작 시간 퀀타이즈
        n_start_time = n['start_time'] % total_length
        n_end_time = (n['start_time'] + n['duration_time']) % total_length
        n_start_offset_beat = int(n['start_time'] / total_length) * total_length
        n_end_offset_beat = int((n['start_time'] + n['duration_time']) / total_length) * total_length
        d_list = []
        for (q_s, q_e) in available_start_and_end_time:
            ds = abs(q_s - n_start_time) ** 2
            de = abs(q_e - n_end_time) ** 2
            d_list.append(ds+de)

        idx = d_list.index(min(d_list))
        (start_time_q, end_time_q) = available_start_and_end_time[idx]
        start_time_q += n_start_offset_beat
        end_time_q += n_end_offset_beat
        # 퀀타이즈된 노트 적용
        new_note = n.copy()
        new_note['start_time'] = start_time_q
        new_note['duration_time'] = end_time_q - start_time_q
        new_note['end_time'] = end_time_q

        if new_note['duration_time'] > 0:
            note_seq_q.append(new_note)
    
    # 같은 위치에서 시작하는 노트는 답이 없음
    # print(len(note_seq_q))
    note_seq_q_2 = []
    for n in note_seq_q:
        check = True
        for n2 in note_seq_q_2:
            if n2['start_time'] == n['start_time'] and n2['pitch'] == n['pitch']:
                check = False
                break
        if check:
            note_seq_q_2.append(n)
    note_seq_q = note_seq_q_2

    # 겹치는 노트 수정
    for n1 in note_seq_q:
        end_time = 9999999.
        for n2 in note_seq_q:
            if n1 != n2 \
                    and n1['start_time'] < n2['start_time'] \
                    and n1['start_time'] + n1['duration_time'] > n2['start_time'] \
                    and n1['pitch'] == n2['pitch'] \
                    and n2['start_time'] < end_time:
                end_time = n2['start_time']
        if end_time < 999999.:
            n1['duration_time'] = end_time - n1['start_time']
            n1['end_time'] = end_time
            
        assert n1['duration_time'] > 0

    return note_seq_q

In [25]:
bass_midi_seq = import_midi_notes(path, '8')
bass_midi_seq = quantize(bass_midi_seq)
melody_length = 0
for n in bass_midi_seq:
    if melody_length < n['end_time']:
        melody_length = n['end_time']
beat_terms = [30] * int(math.ceil(melody_length / 30.))
note_group = []
beat_offset = 0.
for b in beat_terms:
    g = []
    for n in bass_midi_seq:
        if beat_offset <= n['start_time'] < beat_offset + b:
            g.append(n)
    note_group.append(g)
    beat_offset += b

min_len = 999
for n in bass_midi_seq:
    if n['duration_time'] < min_len:
        min_len = n['duration_time']
bass_pattern_list = []
for i, notes in enumerate(note_group):
    offset = i * 30.
    bar_length = beat_terms[i]
    pattern = [0] * 30 * 8  # 4분음표 30개 * 최소단위 32분음표
    for n in notes:
        idx = int((n['start_time'] - offset) * 8)
        pattern[idx] = n['pitch']
    bass_pattern_list.append(pattern)

bass_pattern_list_filled = []
for pattern in bass_pattern_list:
    pattern_filled = pattern[:]
    pitch = None
    for i, p in enumerate(pattern_filled):
        if p > 0:
            pitch = p
        elif pitch is not None and p == 0:
            pattern_filled[i] = pitch
    bass_pattern_list_filled.append(pattern_filled)

1=대금,
2=피리,
3=해금,
4=양금,
5=가야금

In [35]:
t = '1'
midi_seq = import_midi_notes(path, t)
midi_seq = quantize(midi_seq)

melody_length = 0
for n in midi_seq:
    if melody_length < n['end_time']:
        melody_length = n['end_time']
# beat_terms = [9, 6, 6, 9] * int(math.ceil(melody_length / 30.))
beat_terms = [30] * int(math.ceil(melody_length / 30.))
note_group = []
beat_offset = 0.
for b in beat_terms:
    g = []
    for n in midi_seq:
        if beat_offset <= n['start_time'] < beat_offset + b:
            g.append(n)
    note_group.append(g)
    beat_offset += b

min_len = 999
for n in midi_seq:
    if n['duration_time'] < min_len:
        min_len = n['duration_time']
pattern_list = []
for i, notes in enumerate(note_group):
    offset = i * 30.
    bar_length = beat_terms[i]
    pattern = [0] * 30 * 8  # 4분음표 30개 * 최소단위 32분음표
    for n in notes:
        idx = int((n['start_time'] - offset) * 8)
        pattern[idx] = n['pitch']
    pattern_list.append(pattern)

pattern_list_filled = []
for pattern in pattern_list:
    pattern_filled = pattern[:]
    pitch = None
    for i, p in enumerate(pattern_filled):
        if p > 0:
            pitch = p
        elif pitch is not None and p == 0:
            pattern_filled[i] = pitch
    pattern_list_filled.append(pattern_filled)

pattern_list_filled_on_bass = []
for melody_pattern, bass_pattern in zip(pattern_list_filled, bass_pattern_list_filled):
    pattern_on_bass = melody_pattern[:]
    for i in range(len(pattern_on_bass)):
        if pattern_on_bass[i] > 0:
            pattern_on_bass[i] = pattern_on_bass[i] - bass_pattern[i]
    pattern_list_filled_on_bass.append(pattern_on_bass)
print(pattern_list_filled_on_bass)


[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 38, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 38, 36, 36, 36, 36, 36, 36, 34, 36, 38, 38, 36, 34, 34, 34, 34, 34, 34, 34, 39, 38, 38, 38, 38, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 41, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 38, 38, 38, 41, 41, 41, 41, 39, 41, 39, 39], [43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 38, 36, 36, 36, 36, 36, 36, 34, 36, 38, 38, 36, 34, 34, 34

In [53]:
# 6박 4박 4박 6박 단위로 그룹핑
offset = 0
jeongan = ['첫 6정간', '다음 4정간', '다음 4정간','마지막 6정간']
jeongan.reverse()
for t in [9, 6, 6, 9]:
    jeong_gan_group = []
    for pattern in pattern_list_filled:
        # jeong_gan_group.append(pattern[0*8:9*8])  # 9 = 6정간
        # jeong_gan_group.append(pattern[9*8:15*8])  # 6 = 4정간
        # jeong_gan_group.append(pattern[15*8:21*8])  # 6 = 4정간
        # jeong_gan_group.append(pattern[21*8:30*8])  # 9 = 6정간
        jeong_gan_group.append(pattern[offset*8: (offset+t)*8])
        sound = False
        for i in range(len(jeong_gan_group[-1])):
            if jeong_gan_group[-1][i] > 0:
                sound = True
                break
    n_clusters = 6
    model = MiniBatchKMeans(
        n_clusters=n_clusters,
        n_init=100,
        max_iter=1000
    )
    distances = model.fit_transform(jeong_gan_group)
    # assert len(list(set(model.labels_))) == n_clusters
    # print(distances)
    distance_centers = [0.] * n_clusters
    node_nums = [0.] * n_clusters
    for i, label in enumerate(model.labels_):
        node_nums[label] += 1.
        distance_centers[label] += distances[i][label]
    for i, d in enumerate(distance_centers):
        distance_centers[i] = d / (node_nums[i] if node_nums[i] > 0 else 1)
    distance_point_center = [0.] * len(model.labels_)
    for i, label in enumerate(model.labels_):
        distance_point_center[i] = distances[i][label]
    print(jeongan.pop())
    # print(model.labels_)
    # print(distance_point_center)
    for cur_label in range(n_clusters):
        for cur_label in range(n_clusters):
            for i, (dist, group, label) in enumerate(zip(distance_point_center, jeong_gan_group, model.labels_)):
                if label == cur_label:
                    print(cur_label, i+1, dist, *group, sep=',')
    offset += t
    print()

첫 6정간
0,5,11.711886441705856,87,87,87,87,87,87,87,92,89,89,89,89,87,87,87,87,87,87,92,89,87,89,87,87,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,96,94,92,92,92,92
0,6,11.709915891154603,89,89,89,89,89,89,89,89,89,89,89,89,89,89,89,89,89,89,89,89,89,89,89,89,92,89,89,89,89,89,89,89,89,92,89,89,87,87,87,87,87,87,87,92,89,89,92,92,92,92,92,92,92,92,92,92,94,94,94,94,96,94,94,94,94,94,94,96,94,94,94,92
0,10,11.138249041730969,87,87,87,87,87,87,87,92,89,89,89,89,87,87,87,87,87,87,92,89,87,89,87,87,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,92,94,92,89,87,87,87,92,94,92,96,94,94,94,94,94,94,94,94,94,94,94,94,94,94,94,94,94,96,96,96
1,1,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,3,5.962078895699789,92,92,92,92,92,92,92,92,92,96,94,92,87,87,87,87,87,87,87,87,94,96,96,96,92,92,92,92,92,92,92,92,92,92,