In [4]:
# set up autoreload 
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
RANDOM_SEED = 20
TEST_SIZE = 0.15

In [6]:
unwanted_classes = ['Main System', 'claps', 'fx/processed sound', 'tuba', 'piccolo', 'cymbal', 'glockenspiel', 'tambourine', 'timpani', 'snare drum', 'clarinet section', 'flute section', 'tenor saxophone', 'trumpet section']


In [7]:
# the first thing to do is to partition the MDB track IDs and stem IDs into only the ones we will use. 
import medleydb as mdb

mtrack_generator = mdb.load_all_multitracks(['V1', 'V2'])
splits = mdb.utils.artist_conditional_split(test_size=TEST_SIZE, num_splits=1, 
                                            random_state=RANDOM_SEED)[0]
partition_map = {}

for mtrack in mtrack_generator:

    # add appropriate partition key for this mtrack
    if mtrack.track_id in splits['test']:
        partition_key = 'test'
    elif mtrack.track_id in splits['train']:
        partition_key = 'train'
    else:
        continue
    
    # add the partition dict if we havent yet
    if partition_key not in partition_map:
        partition_map[partition_key] = []
    
    # shorten name so we don't have to call
    # the very nested dict every time
    partition_list = partition_map[partition_key]

    # iterate through the stems in this mtrack
    for stem_id, stem in mtrack.stems.items():
        label = stem.instrument[0]
        
        # continue if we don't want this class
        if label in unwanted_classes:
            continue

        # append the stem with it's corresponding info
        stem_info = dict(track_id=mtrack.track_id, stem_idx=stem.stem_idx, 
                         label=label, 
                         artist_id=mtrack.track_id.split('_')[0], 
                         path_to_audio=stem.audio_path, 
                         base_chunk_name=f'{mtrack.track_id}-{stem_id}-{label}')
        partition_list.append(stem_info)

In [8]:
import instrument_recognition.utils as utils
# get the unique set of classes for both partition
classlists = {k: utils.data.get_classlist(metadata) for k, metadata in partition_map.items()}

# filter classes so we only have the intersection of the two sets :)
filtered_classes = list(set(classlists['train']) & set(classlists['test']))

# filter out the partition map!!!
for partition_key, metadata in partition_map.items():
    partition_map[partition_key] = [e for e in metadata if e['label'] in  filtered_classes]

print(len(utils.data.get_classlist(partition_map['train'])))
print(len(utils.data.get_classlist(partition_map['test'])))

22
22


In [9]:
CHUNK_SIZE = 1
SR = 48000
HOP_SIZE = 0.25 
AUGMENT_TRAIN_SET = True
PATH_TO_OUTPUT = f'/home/hugo/data/mono_music_sed/mdb/AUDIO/'

In [12]:
from instrument_recognition.scripts.generate_dataset import save_windowed_audio_events, load_audio_file, trim_silence
import os
import tqdm

# now, save and do the magic
for partition_key, metadata in partition_map.items():
    augment = True if partition_key == 'train' else False

    def split_and_augment(entry):
        try:
            path_to_audio = entry['path_to_audio']
            base_chunk_name = entry['base_chunk_name']
            label = entry['label']
            output_path = os.path.join(PATH_TO_OUTPUT, partition_key)

            audio = load_audio_file(path_to_audio, SR)
            # trim silence
            audio = trim_silence(audio, SR, min_silence_duration=0.3)

            save_windowed_audio_events(audio=audio, sr=SR, chunk_size=CHUNK_SIZE, 
                                    hop_size=HOP_SIZE, base_chunk_name=base_chunk_name, 
                                    label=label, path_to_output=output_path, 
                                    metadata_extras=entry, augment=True)
        except Exception as e:
            print(f'exception occured: {e}')
            print(f'FAILED TO LOAD: {path_to_audio}')

    # DO IT IN PARALLEL
    tqdm.contrib.concurrent.process_map(split_and_augment, metadata)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=85.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=504.0), HTML(value='')))

exception occured: Error reading audio file: unknown length
FAILED TO LOAD: /home/hugo/data/medleydb/Audio/EthanHein_HarmonicaFigure/EthanHein_HarmonicaFigure_STEMS/EthanHein_HarmonicaFigure_STEM_04.wav
exception occured: Error reading audio file: unknown length
FAILED TO LOAD: /home/hugo/data/medleydb/Audio/FacesOnFilm_WaitingForGa/FacesOnFilm_WaitingForGa_STEMS/FacesOnFilm_WaitingForGa_STEM_11.wav

