<a href="https://colab.research.google.com/github/guitorte/audio/blob/main/split_wav.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# @title 1. Install Demucs (Corrected)
# This cell installs the latest stable version of the Demucs library.
# This method is more reliable and resolves the dependency conflicts.

!python3 -m pip install -U demucs



In [None]:

# @title 2. Mount Your Google Drive
# This cell will connect the Colab notebook to your Google Drive.
# You will be asked to authorize the connection.

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

# @title 3. Define Input and Output Folders
# This cell sets the paths for your audio files and where the separated stems will be saved.

import os

# Path to the folder in your Google Drive containing the .wav files.
input_folder = "/content/drive/MyDrive/demucs"

# Path where the separated stems will be saved.
output_folder = "/content/drive/MyDrive/demucs_separated"

# Create the output folder if it doesn't exist.
os.makedirs(output_folder, exist_ok=True)

In [None]:

# @title 4. Run Demucs with Automatic GPU/CPU Detection
# This final cell is now "smart." It checks if a GPU is available.
# - If YES, it uses the GPU for fast separation.
# - If NO, it falls back to the CPU and warns you it will be much slower.

import torch
import os
import glob

# --- Step 1: Hardware Detection ---
if torch.cuda.is_available():
  device = "cuda"
  print("✅ GPU found! Using 'cuda' for fast processing.")
  print("If you want to switch to CPU, change the 'device' variable below to 'cpu'.")
else:
  device = "cpu"
  print("⚠️ No GPU found. The process will run on the CPU.")
  print("This will be MUCH SLOWER (potentially 10x or more).")
  print("RECOMMENDATION: Go to the menu -> 'Runtime' -> 'Change runtime type' -> Select 'T4 GPU' and restart the notebook.")

# --- Step 2: Find Audio Files ---
search_pattern = os.path.join(input_folder, '*.mp3')
track_paths = glob.glob(search_pattern)

if not track_paths:
  print(f"\nERROR: No .wav files were found in '{input_folder}'.")
  print("Please make sure your audio files are in that folder and have the .wav extension.")
else:
  print(f"\nFound {len(track_paths)} .wav file(s) to process:")
  for path in track_paths:
    print(f"- {os.path.basename(path)}")

  # Convert the list of file paths into a single string for the command line
  # We add quotes around each path to handle filenames with spaces
  files_to_process = " ".join([f'"{path}"' for path in track_paths])

  # --- Step 3: Run the Demucs Command ---
  print("\nStarting the separation process... This may take a while.")
  !python3 -m demucs -d "{device}" -n htdemucs_6s --mp3 --mp3-bitrate 320 -o "{output_folder}" {files_to_process}
  print("\n✅ Separation complete!")

✅ GPU found! Using 'cuda' for fast processing.
If you want to switch to CPU, change the 'device' variable below to 'cpu'.

Found 1 .wav file(s) to process:
- p do pecado.mp3

Starting the separation process... This may take a while.
Downloading: "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/5c90dfd2-34c22ccb.th" to /root/.cache/torch/hub/checkpoints/5c90dfd2-34c22ccb.th
100% 52.4M/52.4M [00:00<00:00, 179MB/s]
Selected model is a bag of 1 models. You will see that many progress bars per track.
Separated tracks will be stored in /content/drive/MyDrive/demucs_separated/htdemucs_6s
Separating track /content/drive/MyDrive/demucs/p do pecado.mp3
100%|██████████████████████████████████████████████| 193.04999999999998/193.04999999999998 [00:08<00:00, 22.78seconds/s]

✅ Separation complete!


In [None]:
# @title 5. Install Basic Pitch (Spotify's Audio-to-MIDI Converter)
# Basic Pitch is a lightweight neural network by Spotify for automatic music transcription.
# It converts audio files into MIDI with pitch bend detection.
# Works best on isolated stems (single instruments), which is exactly what Demucs produces.
#
# NOTE: We install with --no-deps to avoid numpy version conflicts with the
# PyTorch/Demucs environment (numpy 2.x), then install missing deps separately.

# Step 1: Install basic-pitch without letting pip resolve deps (avoids numpy conflict)
!pip install "basic-pitch>=0.3.0" --no-deps --quiet

# Step 2: Install the ONNX runtime backend (lightweight, no TF needed)
!pip install onnxruntime --quiet

# Step 3: Install the remaining dependencies that aren't already in the Colab env
# (numpy, scipy, librosa, scikit-learn are already installed by Demucs)
!pip install pretty-midi mir-eval "resampy>=0.2.2,<0.4.3" --quiet

# Verify installation
try:
    import basic_pitch
    print(f"Basic Pitch {basic_pitch.__version__} installed successfully!")
except ImportError as e:
    print(f"Installation issue: {e}")
    print("Try restarting the runtime (Runtime -> Restart runtime) and run this cell again.")

In [None]:
# @title 6. Convert Stems to MIDI
# This cell converts each separated stem into a MIDI file using Basic Pitch.
# Each stem type has optimized parameters for the best transcription quality.
#
# NOTE on drums: Basic Pitch is a pitch detector, so it works best on tonal
# instruments (vocals, bass, piano, guitar). For the drums stem, it will capture
# pitched percussive elements (toms, kicks with tonal content) but won't produce
# a full GM drum map. The drums MIDI is still useful as a rhythmic reference
# that you can remap in your DAW.

import os
import glob
from basic_pitch.inference import predict, Model
from basic_pitch import ICASSP_2022_MODEL_PATH

# --- Per-stem optimized parameters ---
# These settings are tuned for each instrument type to get the best MIDI output.
STEM_CONFIGS = {
    "vocals": {
        "onset_threshold": 0.4,
        "frame_threshold": 0.3,
        "minimum_note_length": 80,
        "minimum_frequency": 80.0,    # ~E2, lowest typical singing note
        "maximum_frequency": 1100.0,  # ~C#6, covers most vocal ranges
        "multiple_pitch_bends": False,
        "melodia_trick": True,
    },
    "bass": {
        "onset_threshold": 0.45,
        "frame_threshold": 0.35,
        "minimum_note_length": 100,
        "minimum_frequency": 28.0,    # ~A0, lowest bass notes
        "maximum_frequency": 400.0,   # ~G4, upper bass range
        "multiple_pitch_bends": False,
        "melodia_trick": True,
    },
    "drums": {
        "onset_threshold": 0.55,
        "frame_threshold": 0.4,
        "minimum_note_length": 30,    # Short for percussive hits
        "minimum_frequency": None,
        "maximum_frequency": None,
        "multiple_pitch_bends": False,
        "melodia_trick": False,
    },
    "piano": {
        "onset_threshold": 0.5,
        "frame_threshold": 0.3,
        "minimum_note_length": 60,
        "minimum_frequency": 27.5,    # A0, lowest piano key
        "maximum_frequency": 4186.0,  # C8, highest piano key
        "multiple_pitch_bends": False,
        "melodia_trick": True,
    },
    "guitar": {
        "onset_threshold": 0.45,
        "frame_threshold": 0.3,
        "minimum_note_length": 50,
        "minimum_frequency": 75.0,    # ~D2, drop D low string
        "maximum_frequency": 1400.0,  # ~F6, high fret harmonics
        "multiple_pitch_bends": True, # Guitar often uses bends/vibrato
        "melodia_trick": True,
    },
    "other": {
        "onset_threshold": 0.5,
        "frame_threshold": 0.3,
        "minimum_note_length": 80,
        "minimum_frequency": None,
        "maximum_frequency": None,
        "multiple_pitch_bends": False,
        "melodia_trick": True,
    },
}

# --- Find all stem directories from Demucs output ---
demucs_model_name = "htdemucs_6s"
stems_base_dir = os.path.join(output_folder, demucs_model_name)

if not os.path.exists(stems_base_dir):
    print(f"ERROR: Demucs output directory not found at '{stems_base_dir}'.")
    print("Make sure you ran step 4 (Demucs separation) first.")
else:
    # Get all track directories (each separated track has its own subfolder)
    track_dirs = [
        d for d in sorted(os.listdir(stems_base_dir))
        if os.path.isdir(os.path.join(stems_base_dir, d))
    ]

    if not track_dirs:
        print(f"No separated tracks found in '{stems_base_dir}'.")
    else:
        # Load the Basic Pitch model ONCE for efficiency
        print("Loading Basic Pitch model...")
        bp_model = Model(ICASSP_2022_MODEL_PATH)
        print("Model loaded.\n")

        midi_output_base = os.path.join(output_folder, "midi")
        os.makedirs(midi_output_base, exist_ok=True)

        for track_name in track_dirs:
            track_stem_dir = os.path.join(stems_base_dir, track_name)
            midi_track_dir = os.path.join(midi_output_base, track_name)
            os.makedirs(midi_track_dir, exist_ok=True)

            print(f"{'='*60}")
            print(f"Processing track: {track_name}")
            print(f"{'='*60}")

            # Find all audio stems (mp3 or wav)
            stem_files = glob.glob(os.path.join(track_stem_dir, "*.mp3")) + \
                         glob.glob(os.path.join(track_stem_dir, "*.wav"))

            for stem_path in sorted(stem_files):
                stem_name = os.path.splitext(os.path.basename(stem_path))[0]
                config = STEM_CONFIGS.get(stem_name, STEM_CONFIGS["other"])

                print(f"\n  Converting '{stem_name}' to MIDI...")

                try:
                    model_output, midi_data, note_events = predict(
                        audio_path=stem_path,
                        model_or_model_path=bp_model,
                        onset_threshold=config["onset_threshold"],
                        frame_threshold=config["frame_threshold"],
                        minimum_note_length=config["minimum_note_length"],
                        minimum_frequency=config["minimum_frequency"],
                        maximum_frequency=config["maximum_frequency"],
                        multiple_pitch_bends=config["multiple_pitch_bends"],
                        melodia_trick=config["melodia_trick"],
                    )

                    # Save the MIDI file
                    midi_filename = f"{stem_name}.mid"
                    midi_path = os.path.join(midi_track_dir, midi_filename)
                    midi_data.write(midi_path)

                    num_notes = len(note_events)
                    print(f"    -> Saved: {midi_path}")
                    print(f"    -> Notes detected: {num_notes}")

                    if stem_name == "drums" and num_notes > 0:
                        print(f"    -> TIP: Drums MIDI captures pitched elements only.")
                        print(f"       Remap notes to your drum kit in the DAW.")

                except Exception as e:
                    print(f"    -> ERROR converting '{stem_name}': {e}")

        print(f"\n{'='*60}")
        print("MIDI conversion complete!")
        print(f"All MIDI files saved to: {midi_output_base}")
        print(f"{'='*60}")

In [None]:
# @title 7. Summary - List All Generated Files
# This cell lists all stems (audio) and MIDI files generated, organized per track.

import os

print("=" * 60)
print("  OUTPUT SUMMARY")
print("=" * 60)

# List audio stems
stems_dir = os.path.join(output_folder, demucs_model_name)
midi_dir = os.path.join(output_folder, "midi")

if os.path.exists(stems_dir):
    for track_name in sorted(os.listdir(stems_dir)):
        track_path = os.path.join(stems_dir, track_name)
        if not os.path.isdir(track_path):
            continue

        print(f"\nTrack: {track_name}")
        print("-" * 40)

        # Audio stems
        print("  Audio Stems:")
        for f in sorted(os.listdir(track_path)):
            fpath = os.path.join(track_path, f)
            size_mb = os.path.getsize(fpath) / (1024 * 1024)
            print(f"    {f} ({size_mb:.1f} MB)")

        # MIDI files
        midi_track_path = os.path.join(midi_dir, track_name)
        if os.path.exists(midi_track_path):
            print("  MIDI Files:")
            for f in sorted(os.listdir(midi_track_path)):
                fpath = os.path.join(midi_track_path, f)
                size_kb = os.path.getsize(fpath) / 1024
                print(f"    {f} ({size_kb:.1f} KB)")

print(f"\n{'=' * 60}")
print("  DAW IMPORT TIPS")
print("=" * 60)
print("""
1. Import each .mid file into a separate track in your DAW.
2. Assign appropriate virtual instruments:
   - vocals.mid  -> Synth lead / vocal synth
   - bass.mid    -> Bass synth / electric bass
   - piano.mid   -> Piano VST (already well-mapped)
   - guitar.mid  -> Guitar VST
   - drums.mid   -> Drum rack (remap notes to your kit)
   - other.mid   -> Pad / synth of choice
3. The MIDI files preserve timing from the original audio,
   so all tracks should align when imported together.
4. You may need to quantize and clean up notes in your DAW
   for a polished result.
""")