In [17]:
import os
import random
import yaml
import subprocess
import platform

# Define dataset directory
DIR_PATH = "slakh2100_flac_redux/reduced_test"

# Load metadata.yaml from a track folder
def load_metadata(track_path):
    yaml_path = os.path.join(track_path, "metadata.yaml")
    if not os.path.exists(yaml_path):
        return None
    with open(yaml_path, "r") as f:
        return yaml.safe_load(f)

# Search stems based on either program_num or plugin_name
def find_matching_stems(metadata, program_num=None, plugin_name=None):
    if not metadata or "stems" not in metadata:
        return []
    return [
        (stem_id, stem_data)
        for stem_id, stem_data in metadata["stems"].items()
        if (program_num is not None and stem_data.get("program_num") == program_num) or
           (plugin_name is not None and stem_data.get("plugin_name") == plugin_name)
    ]

# Open a file or folder in the system's default file explorer
def open_file_explorer(path):
    if platform.system() == "Windows":
        os.startfile(path)
    elif platform.system() == "Darwin":  # macOS
        subprocess.run(["open", path])
    elif platform.system() == "Linux":
        subprocess.run(["xdg-open", path])
    else:
        print(f"Unsupported operating system: {platform.system()}")

# Find a random track and search for matching stems
def find_random_track(program_num=None, plugin_name=None):
    if program_num is None and plugin_name is None:
        print("You must specify either program_num or plugin_name.")
        return
    
    track_folders = [f for f in os.listdir(DIR_PATH) if f.startswith("Track")]
    if not track_folders:
        print("No tracks found.")
        return
    
    random.shuffle(track_folders)  # Shuffle to pick a random track
    for track in track_folders:
        track_path = os.path.join(DIR_PATH, track)
        metadata = load_metadata(track_path)
        matches = find_matching_stems(metadata, program_num, plugin_name)
        
        if matches:
            print(f"Track: {track}")
            for stem_id, stem_data in matches:
                print(f"  Stem: {stem_id}")
                print(f"    program_num: {stem_data.get('program_num', 'N/A')}")
                print(f"    plugin_name: {stem_data.get('plugin_name', 'N/A')}")
            
            # Open the track folder in the file explorer
            open_file_explorer(track_path)
            return
    
    print("No matching stems found.")

# find_random_track(program_num=6)
find_random_track(plugin_name="wurly_ep.nkm")

Track: Track02067
  Stem: S10
    program_num: 2
    plugin_name: wurly_ep.nkm


In [1]:
import os
import yaml

# Define dataset directory
DIR_PATH = "slakh2100_flac_redux/reduced_train"

# Load metadata.yaml from a track folder
def load_metadata(track_path):
    yaml_path = os.path.join(track_path, "metadata.yaml")
    if not os.path.exists(yaml_path):
        return None
    with open(yaml_path, "r") as f:
        return yaml.safe_load(f)

# Iterate through the entire dataset and collect unique plugin names for a given class
def get_unique_plugins_by_class(inst_class):
    unique_plugins = set()
    
    track_folders = [f for f in os.listdir(DIR_PATH) if f.startswith("Track")]
    if not track_folders:
        print("No tracks found.")
        return
    
    for track in track_folders:
        track_path = os.path.join(DIR_PATH, track)
        metadata = load_metadata(track_path)
        
        if not metadata or "stems" not in metadata:
            continue
        
        for stem_data in metadata["stems"].values():
            # Cast program_num to integer before comparison
            if stem_data.get("inst_class") == inst_class:
                unique_plugins.add(stem_data.get("plugin_name", "Unknown"))
    
    print(f"[program_num: {inst_class}]")
    for plugin in sorted(unique_plugins):
        print(plugin)

# Example usage
get_unique_plugins_by_class("Piano")

[program_num: Piano]
alicias_keys.nkm
august_foerster_grand.nkm
concert_grand.nkm
grand_piano.nkm
harpsichord.nkm
ragtime_piano.nkm
scarbee_a_200.nkm
scarbee_clavinet_full.nkm
scarbee_mark_I.nkm
scarbee_pianet.nkm
the_gentleman.nkm
the_giant_hard_and_tough.nkm
the_giant_modern_studio.nkm
the_giant_vibrant.nkm
the_grandeur.nkm
upright_piano.nkm
wurly_ep.nkm


In [11]:
import os
import yaml
from collections import defaultdict

DIR_PATH = "slakh2100_flac_redux/reduced_train"

# List of target plugin names
piano_plugins = {
    "alicias_keys.nkm",
    "august_foerster_grand.nkm",
    "concert_grand.nkm",
    "grand_piano.nkm",
    "harpsichord.nkm",
    "ragtime_piano.nkm",
    "scarbee_a_200.nkm",
    "scarbee_clavinet_full.nkm",
    "scarbee_mark_I.nkm",
    "scarbee_pianet.nkm",
    "the_gentleman.nkm",
    "the_giant_hard_and_tough.nkm",
    "the_giant_modern_studio.nkm",
    "the_giant_vibrant.nkm",
    "the_grandeur.nkm",
    "upright_piano.nkm",
    "wurly_ep.nkm",
}

# Dictionary to store counts
plugin_counts = defaultdict(int)
total_piano_stems = 0

# Iterate through track directories
for track in os.listdir(DIR_PATH):
    track_path = os.path.join(DIR_PATH, track)
    metadata_path = os.path.join(track_path, "metadata.yaml")

    if os.path.isdir(track_path) and os.path.isfile(metadata_path):
        with open(metadata_path, "r") as file:
            try:
                metadata = yaml.safe_load(file)
                stems = metadata.get("stems", {})

                for stem_data in stems.values():
                    if stem_data.get("inst_class") == "Piano":
                        total_piano_stems += 1
                        plugin_name = stem_data.get("plugin_name")
                        if plugin_name in piano_plugins:
                            plugin_counts[plugin_name] += 1

            except yaml.YAMLError as e:
                print(f"Error parsing {metadata_path}: {e}")

# Print results
print(f"Total Piano Stems: {total_piano_stems}\n")
for plugin, count in sorted(plugin_counts.items(), key=lambda x: -x[1]):
    percentage = (count / total_piano_stems) * 100 if total_piano_stems > 0 else 0
    print(f"{plugin}: {count} ({percentage:.2f}%)")


Total Piano Stems: 2130

scarbee_a_200.nkm: 237 (11.13%)
scarbee_mark_I.nkm: 199 (9.34%)
scarbee_pianet.nkm: 199 (9.34%)
wurly_ep.nkm: 196 (9.20%)
scarbee_clavinet_full.nkm: 180 (8.45%)
concert_grand.nkm: 109 (5.12%)
harpsichord.nkm: 103 (4.84%)
upright_piano.nkm: 100 (4.69%)
the_giant_vibrant.nkm: 97 (4.55%)
august_foerster_grand.nkm: 96 (4.51%)
grand_piano.nkm: 94 (4.41%)
the_grandeur.nkm: 93 (4.37%)
the_gentleman.nkm: 90 (4.23%)
alicias_keys.nkm: 90 (4.23%)
the_giant_modern_studio.nkm: 85 (3.99%)
the_giant_hard_and_tough.nkm: 83 (3.90%)
ragtime_piano.nkm: 79 (3.71%)
