In [1]:
# This is how I needed to install madmom on my system
!pip install git+https://github.com/CPJKU/madmom
!pip install demucs

In [2]:
import os
import sys
import subprocess

import numpy as np
import pandas as pd
from tqdm import tqdm
import madmom
from pathlib import Path

from madmom.audio.signal import FramedSignalProcessor, Signal
from madmom.audio.stft import ShortTimeFourierTransformProcessor
from madmom.processors import SequentialProcessor
from madmom.audio.spectrogram import FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor

In [3]:
# input
source_mp3_path = '../../datasets/HarmonixMP3_original/'
metadata_file = '../dataset/metadata.csv'

# output
demucs_path = './tmp_demucs'
specs_path = './out_peter'

# device
device = 'cpu' # 'cuda', 'mps' or 'cpu'

SAMPLE_RATE = 44100
FPS = 100
FFT_SIZE = 2048
NUM_BANDS = 12

# ensure path is present
os.makedirs(demucs_path, exist_ok=True)
os.makedirs(specs_path, exist_ok=True)

In [4]:
def get_processor():
    frames = FramedSignalProcessor(
        frame_size=FFT_SIZE,
        fps=FPS
    )
    stft = ShortTimeFourierTransformProcessor()
    filt = FilteredSpectrogramProcessor(
        num_bands=NUM_BANDS
    )
    spec = LogarithmicSpectrogramProcessor()
    processor = SequentialProcessor([frames, stft, filt, spec])
    return processor

In [5]:

def get_demucs_audio(id, audio_path):
    bass_path = f'{demucs_path}/htdemucs/{id}/bass.wav'
    drums_path = f'{demucs_path}/htdemucs/{id}/drums.wav'
    other_path = f'{demucs_path}/htdemucs/{id}/other.wav'
    vocals_path = f'{demucs_path}/htdemucs/{id}/vocals.wav'

    if os.path.exists(bass_path) and os.path.exists(drums_path) and os.path.exists(other_path) and os.path.exists(vocals_path):
        return bass_path, drums_path, other_path, vocals_path

    subprocess.run([
        sys.executable, '-m', 'demucs.separate',
        '--out', demucs_path,
        '--name', 'htdemucs',
        '--device', str(device),
        Path(audio_path).as_posix(),
      ],
      check=True,
    )
          
    return bass_path, drums_path, other_path, vocals_path

def generate_spectograms(id, audio_path):
    spectogram_path_stems = f'{specs_path}/{id}_stems.npy'
    spectogram_path_full = f'{specs_path}/{id}_full.npy'
    if os.path.exists(spectogram_path_stems) and os.path.exists(spectogram_path_full):
        return True
    
    bass_path, drums_path, other_path, vocals_path = get_demucs_audio(id, audio_path)
    
    audio = Signal(audio_path, num_channels=1, sample_rate=SAMPLE_RATE)
    audio_bass = Signal(bass_path, num_channels=1, sample_rate=SAMPLE_RATE)
    audio_drums = Signal(drums_path, num_channels=1, sample_rate=SAMPLE_RATE)
    audio_other = Signal(other_path, num_channels=1, sample_rate=SAMPLE_RATE)
    audio_vocals = Signal(vocals_path, num_channels=1, sample_rate=SAMPLE_RATE)

    pre_processor = get_processor()
    spec = pre_processor(audio)
    spec_bass = pre_processor(audio_bass)
    spec_drums = pre_processor(audio_drums)
    spec_other = pre_processor(audio_other)
    spec_vocals = pre_processor(audio_vocals)

    spec_stems = np.stack([spec_bass, spec_drums, spec_other, spec_vocals])
    np.save(spectogram_path_stems, spec_stems)
    np.save(spectogram_path_full, spec)

    return True

In [None]:
metadata = pd.read_csv(metadata_file)
total_items = len(metadata)
for index, row in tqdm(metadata.iterrows(), total=total_items, desc="Processing"):
    id = row['File']
    audio_path = f'{source_mp3_path}/{id}.mp3'
    generate_spectograms(id, audio_path)