In [9]:
import os
import yaml
from pydub import AudioSegment
import time

program_nums_to_remove = {46, 47, 52, 53, 54, 55, 69, 70, 76, 77, 78, 79}
plugin_names_to_remove = {"harpsichord.nkm", "scarbee_a_200.nkm", "scarbee_clavinet_full.nkm", "scarbee_mark_I.nkm", "scarbee_pianet.nkm", "wurly_ep.nkm"}

# Define the sets to process
data_sets = [
    "slakh2100_flac_redux/reduced_train",
    "slakh2100_flac_redux/reduced_validation",
    "slakh2100_flac_redux/reduced_test",
]

for DIR_PATH in data_sets:
    print(f"\nProcessing data set: {DIR_PATH}")  # Indicate current dataset

    # Iterate through each track folder
    for track in os.listdir(DIR_PATH):
        track_path = os.path.join(DIR_PATH, track)
        metadata_path = os.path.join(track_path, "metadata.yaml")
        stems_path = os.path.join(track_path, "stems")
        mix_path = os.path.join(track_path, "mix.flac")

        if not os.path.isdir(track_path) or not os.path.exists(metadata_path):
            continue

        print(f"Processing track: {track}")

        with open(metadata_path, "r") as f:
            metadata = yaml.safe_load(f)

        if "stems" not in metadata:
            continue

        stems_to_remove = []
        for stem_id, stem_data in metadata["stems"].items():
            inst_class = stem_data.get("inst_class", "")
            program_num = stem_data.get("program_num", None)
            plugin_name = stem_data.get("plugin_name", None)

            if (inst_class in {"Chromatic Percussion", "Organ", "Synth Lead", "Synth Pad", "Sound Effects", "Ethnic", "Percussive", "Sound effects", "Brass", "Reed", "Pipe"} or
                program_num in program_nums_to_remove or
                (plugin_name and plugin_name in plugin_names_to_remove)):
                stems_to_remove.append(stem_id)

        print(f"Stems to remove: {stems_to_remove}")

        if not stems_to_remove:
            print(f"No stems removed for {track}. Skipping mix update.")
            continue

        for stem_id in stems_to_remove:
            stem_file = os.path.join(stems_path, f"{stem_id}.flac")
            if os.path.exists(stem_file):
                print(f"Removing {stem_file}")
                os.remove(stem_file)
            del metadata["stems"][stem_id]

        with open(metadata_path, "w") as f:
            yaml.dump(metadata, f, default_flow_style=False)

        remaining_stems = [os.path.join(stems_path, f"{stem_id}.flac") for stem_id in metadata["stems"] if os.path.exists(os.path.join(stems_path, f"{stem_id}.flac"))]
        print(f"Remaining stems: {remaining_stems}")

        if remaining_stems:
            mixed_audio = None
            for stem_file in remaining_stems:
                print(f"Mixing {stem_file}")
                audio = AudioSegment.from_file(stem_file)
                mixed_audio = audio if mixed_audio is None else mixed_audio.overlay(audio)
            print("Exporting mixed file")
            mixed_audio.export(mix_path, format="flac")
        else:
            print(f"Deleting track folder: {track_path}")
            for root, dirs, files in os.walk(track_path, topdown=False):
                for file in files:
                    file_path = os.path.join(root, file)
                    print(f"Deleting file: {file_path}")
                    os.remove(file_path)
                for dir in dirs:
                    dir_path = os.path.join(root, dir)
                    print(f"Deleting dir: {dir_path}")
                    os.rmdir(dir_path)
            print(f"Removing empty track folder: {track_path}")
            os.rmdir(track_path)

        print(f"Finished processing track: {track}")
        time.sleep(0.1)


Processing data set: slakh2100_flac_redux/reduced_train
Processing track: Track00001
Stems to remove: []
No stems removed for Track00001. Skipping mix update.
Processing track: Track00002
Stems to remove: ['S06']
Removing slakh2100_flac_redux/reduced_train\Track00002\stems\S06.flac
Remaining stems: ['slakh2100_flac_redux/reduced_train\\Track00002\\stems\\S00.flac', 'slakh2100_flac_redux/reduced_train\\Track00002\\stems\\S01.flac', 'slakh2100_flac_redux/reduced_train\\Track00002\\stems\\S08.flac', 'slakh2100_flac_redux/reduced_train\\Track00002\\stems\\S09.flac']
Mixing slakh2100_flac_redux/reduced_train\Track00002\stems\S00.flac
Mixing slakh2100_flac_redux/reduced_train\Track00002\stems\S01.flac
Mixing slakh2100_flac_redux/reduced_train\Track00002\stems\S08.flac
Mixing slakh2100_flac_redux/reduced_train\Track00002\stems\S09.flac
Exporting mixed file
Finished processing track: Track00002
Processing track: Track00003
Stems to remove: []
No stems removed for Track00003. Skipping mix upda