# Melody Extraction
In this notebook, we extract melody track for each MIDI file. The result is the track (instrument) index for each MIDI file.

In [1]:
import os
import json
import legacy_tools
import warnings
warnings.filterwarnings('ignore')



## Single Extraction
Extract the melody from a single file.

In [6]:
test_file_path = 'test2.mid'

In [7]:
result = legacy_tools.extract_main_melody_for_file_path(test_file_path)

In [8]:
print(result)

0


## Batch Extraction
Extract melody tract for every MIDI file in the designated directory, and save the results.

In [2]:
# MIDI directory to extract
data_dir = '../../data/kunstderfuge-com_complete_collection'
# Dicectory where the results should be saved
save_dir = '../../processed_data/kunstderfuge/info_note'

In [3]:
os.makedirs(save_dir, exist_ok=True)

In [5]:
def extract_melody_for_all_midi(dir):
    all_midi_paths = []
    null_melody_midi_paths = []
    melody_midi_paths = []
    melody_midi_dict = {}  # {path: melody_idx, ...}
    cnt = 0

    len_path = len(dir.replace('\\', '/').split('/'))
    
    for root_dirs, dirs, files in os.walk(dir):
        for file_name in files:
            if not file_name.endswith('.mid'):
                continue

            file_path = os.path.join(root_dirs, file_name).replace('\\', '/')

            rel_path = file_path.split('/')[len_path:]
            rel_path = '/'.join(rel_path)

            all_midi_paths.append(rel_path)

            try:
                result = legacy_tools.extract_main_melody_for_file_path(file_path)
            except:
                result = None
            
            if result is None:
                null_melody_midi_paths.append(rel_path)
            else:
                melody_midi_paths.append(rel_path)
                melody_midi_dict[file_name] = str(result)
                    
            cnt += 1
                
            if cnt % 100 == 0:
                print(f'Processed MIDI files: {cnt}, Melody MIDI files: {len(melody_midi_paths)}, Null: {len(null_melody_midi_paths)}')
             
    print(f'Total number of MIDI files: {cnt}, Melody MIDI files: {len(melody_midi_paths)}, Null: {len(null_melody_midi_paths)}')
    
    return all_midi_paths, null_melody_midi_paths, melody_midi_paths, melody_midi_dict

In [6]:
all_midi_paths, null_melody_midi_paths, melody_midi_paths, melody_midi_dict = extract_melody_for_all_midi(data_dir)

In [None]:
def write_list(obj, file_path):
    assert isinstance(obj, list)
    with open(file_path, 'w') as f:
        for item in obj:
            f.write(item + '\n')

In [None]:
def write_json(obj, file_path):
    assert isinstance(obj, (list, tuple, dict))
    with open(file_path, 'w') as f:
        json.dump(obj, f)

In [None]:
write_list(all_midi_paths, os.path.join(save_dir, 'all_midi_paths.txt'))

In [None]:
write_list(melody_midi_paths, os.path.join(save_dir, 'melody_midi_paths.txt'))

In [None]:
write_json(melody_midi_dict, os.path.join(save_dir, 'melody_midi_dict.json'))