In [25]:
!pip install miditoolkit -q # MidiFile() 로 midi file 읽어오기 위한 라이브러리

[0m

In [26]:
import os
from pathlib import Path
from copy import deepcopy
from math import ceil

In [27]:
# 절대 경로 지정 (필요시 수정)
base_path = '/home/level2-3-nlp-finalproject-nlp-07/ml/'
full_path = base_path + 'data/full/'
base_path, full_path

('/home/level2-3-nlp-finalproject-nlp-07/ml/',
 '/home/level2-3-nlp-finalproject-nlp-07/ml/data/full/')

In [28]:
full_midi_paths = ['jazz-midi-366-songs']
full_midi_paths

['jazz-midi-366-songs']

In [29]:
midi_paths = {}
for full_midi_path in full_midi_paths:
    if full_midi_path not in midi_paths:
        midi_paths[full_midi_path] = []
    midi_paths[full_midi_path] += list(Path(full_path + full_midi_path).rglob('*.mid'))
    
for k,v in midi_paths.items():
    print(k, len(v))

jazz-midi-366-songs 366


In [31]:
from miditoolkit import MidiFile
from tqdm import tqdm

MAX_NB_BAR = 8
OVERLAP = 2
# MIN_NB_NOTES = 20

output_path = base_path + f'data/chunks{MAX_NB_BAR}overlap{OVERLAP}/'

for k, v in midi_paths.items():
    print(k + " : " + str(len(v)) + "개의 midi 파일을 처리합니다.")
    for midi_path in tqdm(v):
        try:
            midi = MidiFile(midi_path)
        except Exception as e:
            print("Skipping", midi_path, "because of the following error:", e)
            continue
        
        if not os.path.exists(output_path + f'{k}/{midi_path.stem}'):
            os.makedirs(output_path + f'{k}/{midi_path.stem}')
        
        # 1박 * 4박자 * 8마디
        ticks_per_cut = midi.ticks_per_beat * 4 * MAX_NB_BAR
        ticks_over = midi.ticks_per_beat * 4 * OVERLAP
        nb_cut = ceil(midi.max_tick / ticks_per_cut)
        if nb_cut < 2:
            # 2마디도 안 나오면 그냥 원본 그대로 저장
            midi.dump(output_path + f'{k}/{midi_path.stem}/0.mid')
            continue
        
        # 8마디 단위로 청킹    
        midi_cuts = [deepcopy(midi) for _ in range(nb_cut)]
        for j, track in enumerate(midi.instruments):
            for midi_short in midi_cuts:
                midi_short.instruments[j].notes = []
        
        # Chunking
        for j, track in enumerate(midi.instruments):
            track.notes = sorted(track.notes, key=lambda x: x.start)
            for note in track.notes:
                cut_idx = note.start // ticks_per_cut
                note_copy = deepcopy(note)
                note_copy.start -= cut_idx * ticks_per_cut
                note_copy.end -= cut_idx * ticks_per_cut
                midi_cuts[cut_idx].instruments[j].notes.append(note_copy)            
                if cut_idx!=0 and note_copy.start < ticks_over:
                    note_copy.start += ticks_per_cut
                    note_copy.end += ticks_per_cut
                    midi_cuts[cut_idx-1].instruments[j].notes.append(note_copy)

        # Saving
        for j, midi_short in enumerate(midi_cuts):
            # if sum(len(track.notes) for track in midi_short.instruments) < MIN_NB_NOTES:
            #     print("Skipping", midi_path, "because it's too short")
            #     continue
            midi_short.dump(output_path + f'{k}/{midi_path.stem}/{j}.mid')

jazz-midi-366-songs : 366개의 midi 파일을 처리합니다.


 19%|█▉        | 70/366 [02:46<03:44,  1.32it/s]  

Skipping /home/level2-3-nlp-finalproject-nlp-07/ml/data/full/jazz-midi-366-songs/246_OpFunk.mid because of the following error: Could not decode key with 0 flats and mode 255


 97%|█████████▋| 355/366 [12:38<00:41,  3.78s/it]

Skipping /home/level2-3-nlp-finalproject-nlp-07/ml/data/full/jazz-midi-366-songs/263_poinciana.mid because of the following error: Could not decode key with 0 flats and mode 2


 98%|█████████▊| 360/366 [12:41<00:08,  1.34s/it]

Skipping /home/level2-3-nlp-finalproject-nlp-07/ml/data/full/jazz-midi-366-songs/076_cantalope.mid because of the following error: data byte must be in range 0..127


100%|██████████| 366/366 [12:52<00:00,  2.11s/it]
