In [None]:
# DSU fast bootstrap: cache wheels + site-packages (Kaggle/Colab)
import os, sys, subprocess, pathlib, json, shutil, re

ENV = "kaggle" if "KAGGLE_URL_BASE" in os.environ else ("colab" if os.environ.get("COLAB_GPU") else "local")
print("DSU Worker backend:", ENV)

# Persistent locations
if ENV == "kaggle":
    DATASET = pathlib.Path("/kaggle/input/dsu-cache")
    BASE = pathlib.Path("/kaggle/working/DSU_cache")  # persists with Files on
else:
    try:
        from google.colab import drive
        drive.mount("/content/drive")
    except Exception:
        pass
    DATASET = pathlib.Path("/not/attached")
    BASE = pathlib.Path("/content/drive/MyDrive/DSU_cache")

WHEELS = BASE / "wheels"
SITEPKG = BASE / "site-packages"
MODELS = BASE / "models"
for p in (WHEELS, SITEPKG, MODELS): p.mkdir(parents=True, exist_ok=True)

# Model/checkpoint cache (Demucs)
os.environ["XDG_CACHE_HOME"] = str(MODELS)
os.environ["DEMUCS_CACHE"] = str(MODELS)
os.environ["HF_HOME"] = str(MODELS)

# Version pins
TORCH_VER = "2.8.0"
AUDIO_VER = "2.8.0"
DEMUCS_VER = "4.0.1"

# Detect CUDA flavor
smi = subprocess.getoutput('nvidia-smi')
m = re.search(r'CUDA Version:\s*([0-9.]+)', smi)
flavor_order = []
if m:
    ver = m.group(1)
    if ver.startswith('12.6'): flavor_order = ['cu126','cu124','cu121','cu118']
    elif ver.startswith('12.4'): flavor_order = ['cu124','cu126','cu121','cu118']
    elif ver.startswith('12.'): flavor_order = ['cu126','cu121','cu118']
    elif ver.startswith('11.8'): flavor_order = ['cu118','cu121']
else:
    flavor_order = ['cu126','cu121','cu118']
print('CUDA flavors preference:', flavor_order)

# Helpers
def install_from_wheels(wdir: pathlib.Path):
    print('Installing from cached wheels:', wdir)
    subprocess.check_call([
        sys.executable, '-m', 'pip', 'install', '--no-index', '--find-links', str(wdir),
        '--upgrade', '--target', str(SITEPKG),
        'torch', 'torchaudio', f'demucs=={DEMUCS_VER}', 'ffmpeg-python', 'numpy'
    ])

# Prefer Kaggle Dataset wheels for instant cold-start
used_dataset = False
if DATASET.exists():
    for fl in flavor_order:
        wd = DATASET / f'wheels_{fl}'
        if wd.exists():
            try:
                install_from_wheels(wd)
                used_dataset = True
                break
            except Exception as e:
                print('Dataset wheels failed for', fl, e)
    if not used_dataset and (DATASET / 'wheels').exists():
        try:
            install_from_wheels(DATASET / 'wheels')
            used_dataset = True
        except Exception as e:
            print('Dataset generic wheels failed', e)
    # Optional copy models
    if (DATASET / 'models').exists():
        for item in (DATASET / 'models').iterdir():
            dst = MODELS / item.name
            if not dst.exists():
                if item.is_dir(): shutil.copytree(item, dst)
                else: shutil.copy2(item, dst)

# Drive-only cache: download once into WHEELS and reuse
marker = SITEPKG / f".ok_torch{TORCH_VER}_demucs{DEMUCS_VER}"
if not used_dataset and not marker.exists():
    def download_wheels(cuda_flavor: str):
        idx = f"https://download.pytorch.org/whl/{cuda_flavor}"
        print('Downloading wheels', cuda_flavor, 'to', WHEELS)
        return subprocess.call([
            'pip','download','-d',str(WHEELS),'--prefer-binary','--only-binary=:all:',
            '--extra-index-url', idx,
            f'torch=={TORCH_VER}+{cuda_flavor}', f'torchaudio=={AUDIO_VER}+{cuda_flavor}',
            f'demucs=={DEMUCS_VER}','ffmpeg-python','numpy'
        ])
    ok = False
    for fl in flavor_order:
        if download_wheels(fl) == 0:
            try:
                install_from_wheels(WHEELS)
                ok = True
                break
            except Exception as e:
                print('Install from wheels failed for', fl, e)
                continue
    if not ok:
        raise RuntimeError('Failed to fetch/install Torch/Demucs wheels for available CUDA flavors')
    marker.write_text('ok')

# Ensure our site-packages is importable
if str(SITEPKG) not in sys.path:
    sys.path.insert(0, str(SITEPKG))

# Verify and persist runtime mode for watcher runner
import torch, demucs
print('Boot OK | torch', torch.__version__, '| cuda', torch.cuda.is_available(), '| demucs', demucs.__version__)
BASE_RUN = pathlib.Path('/kaggle/working/M4L-Demucs') if ENV=='kaggle' else pathlib.Path('/content/drive/MyDrive/M4L-Demucs')
BASE_RUN.mkdir(parents=True, exist_ok=True)
PMARK = BASE_RUN / 'PY_MODE.json'
PMARK.write_text(json.dumps({'mode':'target','py':sys.executable,'site':str(SITEPKG),'base':str(BASE_RUN),'fast':True}))
print('PY_MODE written:', PMARK)



# DSU Worker (Kaggle)

Run all: sets up GPU wheels and launches the watcher. Uses Kaggle's /kaggle/working as the DSU root.


In [None]:
# One-click venv (preferred) or virtualenv, else target-site + CUDA-correct installs (Kaggle)
import os, sys, subprocess, pathlib, re, json
BASE = pathlib.Path('/kaggle/working/M4L-Demucs')
VENV = BASE / '.venv'
PY = VENV / 'bin' / 'python'
SITE_TARGET = BASE / '.py-site' / 'site-packages'
PMARK = BASE / 'PY_MODE.json'
BASE.mkdir(parents=True, exist_ok=True)

print('Kaggle bootstrap. BASE=', BASE)
print(subprocess.getoutput('nvidia-smi'))

mode = 'venv'
py_exec = None
site_dir = ''

# Try Python venv first (with --copies to avoid symlink issues)
try:
    if not VENV.exists():
        subprocess.check_call([sys.executable, '-m', 'venv', '--copies', str(VENV)])
        subprocess.check_call([str(PY), '-m', 'pip', 'install', '--upgrade', 'pip', 'setuptools', 'wheel'])
    py_exec = str(PY)
except Exception as e1:
    print('python -m venv failed:', e1)
    # Try virtualenv (often more robust on hosted envs)
    try:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', 'virtualenv'])
        subprocess.check_call([sys.executable, '-m', 'virtualenv', '--download', str(VENV)])
        py_exec = str(PY)
        mode = 'venv'
        print('virtualenv succeeded')
    except Exception as e2:
        print('virtualenv failed, using --target site-packages fallback:', e2)
        mode = 'target'
        site_dir = str(SITE_TARGET)
        os.makedirs(site_dir, exist_ok=True)
        py_exec = sys.executable

# Detect CUDA and choose torch index/spec
smi = subprocess.getoutput('nvidia-smi')
m = re.search(r'CUDA Version:\s*([0-9.]+)', smi)
if not m:
    raise RuntimeError('GPU not detected. In Kaggle: Settings → Accelerator → GPU, then Run all.')
ver = m.group(1)
if ver.startswith('12.6'):
    fl, torch_ver, ta_ver, idx = ('cu126','2.8.0','2.8.0','https://download.pytorch.org/whl/cu126')
elif ver.startswith('12.4'):
    fl, torch_ver, ta_ver, idx = ('cu124','2.5.1','2.5.1','https://download.pytorch.org/whl/cu124')
elif ver.startswith('12.'):
    fl, torch_ver, ta_ver, idx = ('cu126','2.8.0','2.8.0','https://download.pytorch.org/whl/cu126')
elif ver.startswith('11.8'):
    fl, torch_ver, ta_ver, idx = ('cu118','2.1.2','2.1.2','https://download.pytorch.org/whl/cu118')
else:
    fl, torch_ver, ta_ver, idx = ('cu126','2.8.0','2.8.0','https://download.pytorch.org/whl/cu126')

print('CUDA flavor:', fl)

if mode == 'venv':
    # Torch/torchaudio strictly from CUDA index inside venv
    subprocess.check_call([py_exec, '-m', 'pip', 'install', '--extra-index-url', idx,
                           f'torch=={torch_ver}+{fl}', f'torchaudio=={ta_ver}+{fl}'])
    # Demucs and deps inside venv (no global bleed)
    subprocess.check_call([py_exec, '-m', 'pip', 'install', 'demucs==4.0.1', 'torchcodec', 'ffmpeg-python', 'numpy==2.3.4'])
    # Verify
    subprocess.run([py_exec, '-c', "import torch, demucs; print('Boot OK.', 'torch', torch.__version__, 'cuda', torch.cuda.is_available(), 'demucs', demucs.__version__)"] , check=True)
else:
    # Target install into site_dir and use PYTHONPATH
    subprocess.check_call([py_exec, '-m', 'pip', 'install', '--extra-index-url', idx,
                           '--target', site_dir, f'torch=={torch_ver}+{fl}', f'torchaudio=={ta_ver}+{fl}'])
    subprocess.check_call([py_exec, '-m', 'pip', 'install', '--target', site_dir,
                           'demucs==4.0.1', 'torchcodec', 'ffmpeg-python', 'numpy==2.3.4'])
    env = os.environ.copy()
    env['PYTHONPATH'] = site_dir + (os.pathsep + env.get('PYTHONPATH',''))
    subprocess.run([py_exec, '-c', "import torch, demucs; import os; print('Boot OK.', 'torch', torch.__version__, 'cuda', torch.cuda.is_available(), 'demucs', demucs.__version__, 'mode', 'target')"], check=True, env=env)

# Persist run mode for next cell
PMARK.write_text(json.dumps({'mode': mode, 'py': py_exec, 'site': site_dir, 'base': str(BASE)}))
print('PY_MODE:', PMARK.read_text())


In [None]:
# Launch watcher (Kaggle)
import urllib.request, pathlib, os, json, subprocess
BASE = pathlib.Path('/kaggle/working/M4L-Demucs')
w = BASE/'colab_watcher.py'
urllib.request.urlretrieve('https://raw.githubusercontent.com/VSTOPIA/Doctor-Sample-Unit-DSU/main/colab_watcher.py', w)
print('Watcher saved to', w)

# Load runtime mode from previous cell
pmark = BASE/'PY_MODE.json'
mode = 'venv'
py_exec = 'python'
site_dir = ''
try:
    data = json.loads(pmark.read_text())
    mode = data.get('mode', 'venv')
    py_exec = data.get('py', 'python')
    site_dir = data.get('site', '')
except Exception:
    pass

os.environ['DSU_ROOT'] = str(BASE)
os.environ['DSU_REMOTE_JOBS_URL'] = 'https://raw.githubusercontent.com/VSTOPIA/Doctor-Sample-Unit-DSU/main/remote_jobs.jsonl'
print('DSU_ROOT=', os.environ['DSU_ROOT'])
print('DSU_REMOTE_JOBS_URL=', os.environ['DSU_REMOTE_JOBS_URL'])

if mode == 'target' and site_dir:
    env = os.environ.copy()
    env['PYTHONPATH'] = site_dir + (os.pathsep + env.get('PYTHONPATH',''))
    subprocess.call([py_exec, str(w)], env=env)
else:
    subprocess.call([py_exec, str(w)])
