In [2]:
import os
import yaml
import shutil
from pydub import AudioSegment

# Define source and target directories
DIR_PATH = "slakh2100_flac_redux/reduced_train"
OUTPUT_DIR = "slakh2100_flac_redux/reduced_isolated"
N = 10  # Number of tracks per class

# Target instrument classes and their row indices
TARGET_CLASSES = {"Piano": 0, "Guitar": 1, "Bass": 2, "Strings": 3, "Drums": 4}

# Dictionary to store selected tracks per class
selected_tracks = {cls: [] for cls in TARGET_CLASSES}
used_tracks = set()  # Set to track already assigned tracks

# Iterate through each track folder
for track in os.listdir(DIR_PATH):
    track_path = os.path.join(DIR_PATH, track)
    metadata_path = os.path.join(track_path, "metadata.yaml")
    stems_path = os.path.join(track_path, "stems")
    
    if not os.path.isdir(track_path) or not os.path.exists(metadata_path):
        continue  
    
    with open(metadata_path, "r") as f:
        metadata = yaml.safe_load(f)
    
    if "stems" not in metadata:
        continue  
    
    # Identify instrument classes present in this track
    track_classes = {}
    for stem_id, stem_data in metadata["stems"].items():
        inst_class = stem_data.get("inst_class", "")
        if inst_class in TARGET_CLASSES:
            track_classes[inst_class] = stem_id
    
    # Skip if track is already assigned
    if track in used_tracks:
        continue
    
    # Add track to the first available class and mark it as used
    for inst_class, stem_id in track_classes.items():
        if len(selected_tracks[inst_class]) < N:
            stem_file = os.path.join(stems_path, f"{stem_id}.flac")
            
            if os.path.exists(stem_file):
                selected_tracks[inst_class].append(track)
                used_tracks.add(track)
                
                # Create track directory directly in reduced_isolated/
                target_track_dir = os.path.join(OUTPUT_DIR, track)
                os.makedirs(target_track_dir, exist_ok=True)
                
                # Create stem directory
                target_stem_dir = os.path.join(target_track_dir, "stems")
                os.makedirs(target_stem_dir, exist_ok=True)
                
                # Copy and rename the stem file as mix.flac
                target_mix_path = os.path.join(target_track_dir, "mix.flac")
                shutil.copy(stem_file, target_mix_path)
                print(f"Copied {stem_file} to {target_mix_path}")
                
                # Copy the original stem file into stem/ directory
                target_stem_path = os.path.join(target_stem_dir, f"{stem_id}.flac")
                shutil.copy(stem_file, target_stem_path)
                print(f"Copied {stem_file} to {target_stem_path}")
                
                # Copy metadata file
                target_metadata_path = os.path.join(target_track_dir, "metadata.yaml")
                with open(metadata_path, "r") as f:
                    metadata = yaml.safe_load(f)
                
                # Modify metadata to keep only the relevant stem
                metadata["stems"] = {stem_id: metadata["stems"][stem_id]}
                with open(target_metadata_path, "w") as f:
                    yaml.dump(metadata, f, default_flow_style=False)
                print(f"Updated metadata file at {target_metadata_path}")
            break
    
    # Stop early if all classes have enough tracks
    if all(len(v) >= N for v in selected_tracks.values()):
        break

# Print selected tracks
for inst_class, tracks in selected_tracks.items():
    print(f"{inst_class} ({len(tracks)} tracks):")
    for track in tracks:
        print(f"  - {track}")
    print()


Copied slakh2100_flac_redux/reduced_train\Track00001\stems\S08.flac to slakh2100_flac_redux/reduced_isolated\Track00001\mix.flac
Copied slakh2100_flac_redux/reduced_train\Track00001\stems\S08.flac to slakh2100_flac_redux/reduced_isolated\Track00001\stems\S08.flac
Updated metadata file at slakh2100_flac_redux/reduced_isolated\Track00001\metadata.yaml
Copied slakh2100_flac_redux/reduced_train\Track00002\stems\S01.flac to slakh2100_flac_redux/reduced_isolated\Track00002\mix.flac
Copied slakh2100_flac_redux/reduced_train\Track00002\stems\S01.flac to slakh2100_flac_redux/reduced_isolated\Track00002\stems\S01.flac
Updated metadata file at slakh2100_flac_redux/reduced_isolated\Track00002\metadata.yaml
Copied slakh2100_flac_redux/reduced_train\Track00003\stems\S00.flac to slakh2100_flac_redux/reduced_isolated\Track00003\mix.flac
Copied slakh2100_flac_redux/reduced_train\Track00003\stems\S00.flac to slakh2100_flac_redux/reduced_isolated\Track00003\stems\S00.flac
Updated metadata file at slakh21