In [12]:
#@title Mount Drive & define paths
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

from pathlib import Path

PROJECT_DIR = Path('/content/drive/MyDrive/ddsp-demucs')
DATASETS_DIR = Path('/content/drive/MyDrive/datasets')  # keep big corpora here
MUSDB_HQ_SRC = DATASETS_DIR / 'musdb18hq'              # expected dataset location

# Create project skeleton
for p in [
    PROJECT_DIR / 'data' / 'stems',
    PROJECT_DIR / 'data' / 'features',
    PROJECT_DIR / 'data' / 'tfrecords',
    PROJECT_DIR / 'exp',
    PROJECT_DIR / 'notebooks',
    PROJECT_DIR / 'env',
    PROJECT_DIR / 'src' / 'prepare',
    PROJECT_DIR / 'src' / 'train'
]:
    p.mkdir(parents=True, exist_ok=True)

# Symlink dataset into project
MUSDB_LINK = PROJECT_DIR / 'data' / 'musdb18hq'
if MUSDB_HQ_SRC.exists():
    if not MUSDB_LINK.exists():
        MUSDB_LINK.symlink_to(MUSDB_HQ_SRC, target_is_directory=True)
else:
    print("⚠️ MUSDB18-HQ not found at", MUSDB_HQ_SRC)
    print("  Please place WAV dataset at /MyDrive/datasets/musdb18hq")
print("Project dir:", PROJECT_DIR)
print("MUSDB18-HQ link present:", MUSDB_LINK.exists(), "->", MUSDB_LINK)

Mounted at /content/drive
Project dir: /content/drive/MyDrive/ddsp-demucs
MUSDB18-HQ link present: True -> /content/drive/MyDrive/ddsp-demucs/data/musdb18hq


In [13]:
#@title Save config: dataset roots, paths, thresholds
import yaml

cfg = {
    "dataset": {
        "kind": "hq",
        "root": str(MUSDB_LINK),
        "sample_rate": 44100
    },
    "paths": {
        "project": str(PROJECT_DIR),
        "stems_dir": str(PROJECT_DIR / "data" / "stems" / "demucs_htdemucs44k"),
        "features_dir": str(PROJECT_DIR / "data" / "features"),
        "tfrecords_dir": str(PROJECT_DIR / "data" / "tfrecords"),
        "exp_dir": str(PROJECT_DIR / "exp")
    },
    "mono_downmix": "avg_lr",   # (L+R)/2 for DDSP
    "gate_thresholds": {
        "f0_conf_min": 0.65,
        "harmonicity_ratio_min": 0.65,
        "residual_energy_ratio_max": 0.38,
        "mono_frame_fraction_min": 0.80,
        "mono_track_fraction_min": 0.88
    }
}
(PROJECT_DIR / "env").mkdir(parents=True, exist_ok=True)
with open(PROJECT_DIR / "env" / "config.yaml", "w") as f:
    yaml.safe_dump(cfg, f, sort_keys=False)
print("✅ Wrote config at", PROJECT_DIR / "env" / "config.yaml")


✅ Wrote config at /content/drive/MyDrive/ddsp-demucs/env/config.yaml


In [14]:
#@title Install libraries (separation, audio I/O, evaluation)
!pip -q install musdb museval stempeg demucs torchmetrics librosa soundfile torchaudio -U

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.5/1.2 MB[0m [31m15.5 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.2/1.2 MB[0m [31m24.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.1/87.1 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.6/59.6 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata 

In [18]:
#@title Record versions & GPU info
import importlib, platform, torch, json, sys
pkgs = ['musdb','museval','stempeg','demucs','librosa','soundfile','torch','torchmetrics']
vers = {}
for p in pkgs:
    try:
        m = importlib.import_module(p)
        v = getattr(m, '__version__', 'n/a')
    except Exception as e:
        v = f'load-failed: {e}'
    vers[p] = v

env_info = {
    "python": sys.version,
    "platform": platform.platform(),
    "gpu_available": torch.cuda.is_available(),
    "gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
    "packages": vers
}
with open(PROJECT_DIR / "env" / "env_manifest.json", "w") as f:
    json.dump(env_info, f, indent=2)
print(json.dumps(env_info, indent=2))


{
  "python": "3.12.11 (main, Jun  4 2025, 08:56:18) [GCC 11.4.0]",
  "platform": "Linux-6.1.123+-x86_64-with-glibc2.35",
  "gpu_available": false,
  "gpu_name": null,
  "packages": {
    "musdb": "n/a",
    "museval": "n/a",
    "stempeg": "0.2.2",
    "demucs": "4.0.1",
    "librosa": "0.11.0",
    "soundfile": "0.13.1",
    "torch": "2.8.0+cu128",
    "torchmetrics": "1.8.1"
  }
}


In [19]:
#@title Verify dataset presence and load one WAV
import soundfile as sf
from pathlib import Path
import yaml

with open(PROJECT_DIR / "env" / "config.yaml") as f:
    cfg = yaml.safe_load(f)

root = Path(cfg['dataset']['root'])
assert root.exists(), f"MUSDB root not found at {root}. Please ensure musdb18_hq is symlinked."

# Try to find a mixture.wav
cand = sorted(root.glob("train/*/mixture.wav"))[:1]
assert cand, "Couldn't find train/*/mixture.wav under MUSDB18-HQ. Check dataset structure."
sig, sr = sf.read(str(cand[0]), always_2d=True)
print("✅ Loaded:", cand[0].relative_to(root), "| sr:", sr, "| shape:", sig.shape)

import torch
print("GPU:", torch.cuda.is_available(), "|", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU only")


✅ Loaded: train/A Classic Education - NightOwl/mixture.wav | sr: 44100 | shape: (7560512, 2)
GPU: False | CPU only


In [20]:
#@title Run tiny Demucs inference and save stereo+mono stems
import torchaudio, torch
from demucs.pretrained import get_model
from demucs.apply import apply_model
from pathlib import Path
import yaml

with open(PROJECT_DIR / "env" / "config.yaml") as f:
    cfg = yaml.safe_load(f)

mixture_path = cand[0]
wav, sr = torchaudio.load(str(mixture_path))  # shape: (2, T)
excerpt = wav[:, :sr*8]  # 8-second slice for quick test

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = get_model('htdemucs').to(device).eval()

with torch.inference_mode():
    # apply_model expects: (batch, channels, time)
    sources = apply_model(model, excerpt.unsqueeze(0).to(device), split=True, overlap=0.25)[0].cpu()
    # Demucs order: [drums, bass, other, vocals]
    vocals = sources[3]  # (2, T)

stems_dir = Path(cfg["paths"]["stems_dir"]) / "SMOKE_TEST"
stems_dir.mkdir(parents=True, exist_ok=True)
torchaudio.save(str(stems_dir / "vocals.stereo.wav"), vocals, sr)
torchaudio.save(str(stems_dir / "vocals.mono.wav"), vocals.mean(0, keepdim=True), sr)

print("✅ Saved Demucs stems to:", stems_dir)


  s = torchaudio.io.StreamReader(src, format, None, buffer_size)


Downloading: "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/955717e8-8726e21a.th" to /root/.cache/torch/hub/checkpoints/955717e8-8726e21a.th


100%|██████████| 80.2M/80.2M [00:01<00:00, 42.2MB/s]


✅ Saved Demucs stems to: /content/drive/MyDrive/ddsp-demucs/data/stems/demucs_htdemucs44k/SMOKE_TEST


  s = torchaudio.io.StreamWriter(uri, format=muxer, buffer_size=buffer_size)


In [21]:
#@title Extract F0 with librosa.pyin and RMS loudness
import numpy as np, librosa, soundfile as sf, math, yaml
from pathlib import Path

with open(PROJECT_DIR / "env" / "config.yaml") as f:
    cfg = yaml.safe_load(f)
stems_dir = Path(cfg["paths"]["stems_dir"]) / "SMOKE_TEST"

y, sr = sf.read(str(stems_dir / "vocals.mono.wav"))
if y.ndim > 1: y = y.mean(axis=1)

# Optional: resample to speed up F0 (keep consistent later in your pipeline)
target_sr = min(sr, 22050)
if sr != target_sr:
    y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
    sr = target_sr

# pyin params for singing (tune later)
fmin, fmax = librosa.note_to_hz('C2'), librosa.note_to_hz('C7')
f0, voiced_flag, voiced_prob = librosa.pyin(y, fmin=fmin, fmax=fmax, sr=sr, frame_length=2048, hop_length=int(0.01*sr))
rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=int(0.01*sr)).squeeze()

print(f"F0 frames: {np.sum(~np.isnan(f0))}/{len(f0)} voiced | Median F0 (Hz): {np.nanmedian(f0):.1f}")
print(f"RMS frames: {len(rms)} | Voiced prob (mean): {np.nanmean(voiced_prob):.3f}")


F0 frames: 604/802 voiced | Median F0 (Hz): 226.4
RMS frames: 802 | Voiced prob (mean): 0.362


In [25]:
import numpy as np, librosa, torchaudio

# Load GT & estimate
gt_wav, sr = torchaudio.load(str(mixture_path.with_name('vocals.wav')))
est_wav, _  = torchaudio.load(str(stems_dir / "vocals.mono.wav"))
gt = gt_wav.mean(0).numpy()
est = est_wav.mean(0).numpy()
T = min(len(gt), len(est)); gt = gt[:T]; est = est[:T]

# Pick a voiced region from GT (avoid silence)
rms = librosa.feature.rms(y=gt, frame_length=2048, hop_length=512).squeeze()
th = rms.max()*0.2  # 20% of max as a quick gate
mask = (rms > th).astype(np.float32)
if mask.sum() == 0:
    print("No voiced region found in excerpt—choose a different segment.")
else:
    # choose the largest contiguous voiced chunk (~5–10s)
    hop = 512; idx = np.where(mask)[0]
    start_f, end_f = idx[0], idx[-1]
    start, end = start_f*hop, min(len(gt), (end_f+1)*hop)
    gt_v, est_v = gt[start:end], est[start:end]

    def si_sdr(x, s, eps=1e-8):
        alpha = (x @ s) / (s @ s + eps)
        e_t = alpha * s
        e_n = x - e_t
        return 10*np.log10((np.sum(e_t**2)+eps)/(np.sum(e_n**2)+eps))
    print("SI-SDR on voiced segment (dB):", si_sdr(est_v, gt_v))


SI-SDR on voiced segment (dB): 8.483761


In [26]:
#@title Set seeds and freeze requirements
import os, random, numpy as np, torch

SEED = 1337
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)

with open(PROJECT_DIR / "env" / "seed.txt", "w") as f:
    f.write(str(SEED))
print("✅ Seed set:", SEED)

!pip freeze > "{PROJECT_DIR}/env/requirements_freeze.txt"
with open(PROJECT_DIR / "env" / "README_env.md", "w") as f:
    f.write("""# Environment Notes
- Run this notebook first in every new Colab runtime.
- Datasets live under /MyDrive/datasets, symlinked into data/.
- Heavy/conflicting installs (e.g., TensorFlow + DDSP, or CREPE) are done in their own notebooks (e.g., 04_train_ddsp.ipynb).
- All outputs go under data/ and exp/.
""")
print("✅ Wrote env/requirements_freeze.txt and env/README_env.md")


✅ Seed set: 1337
✅ Wrote env/requirements_freeze.txt and env/README_env.md
