A one-song example for Music Source Separation using NMF.

Find more details about the principle of the method in [1].

References:

[1] Vincent, E., Virtanen, T., & Gannot, S. (Eds.). (2018). Audio source separation and speech enhancement. John Wiley & Sons.

In [None]:
import nmf_audio_benchmark.dataloaders.music.mss_dataloader as mss_dl
import nmf_audio_benchmark.tasks.music.mss as mss
import nmf_audio_benchmark.algorithms.nn_fac_nmf as nn_fac_nmf

import base_audio.audio_helper as audio_helper

In [None]:
# Path to the dataset
datapath = '/home/a23marmo/datasets/musdb18'

# Instanciate a dataset object.
dataset = mss_dl.MusDBDataloader(datapath=datapath, feature = "stft_complex")

In [None]:
# Select a particular track
track_id, (mag, phase), stems, stems_labels = dataset[0]

# Listen to this song
print("Original sound")
audio_helper.listen_to_this_spectrogram(mag, dataset.feature_object, phase_retrieval = "original_phase", original_phase = phase)

# Listen to the original sources
print("Original sources")
for i in range(len(stems)):
    print(f"Source: {stems_labels[i]} - ({i+1}/{len(stems)})")
    if not (stems[i] == 0).all():
        audio_helper.listen_to_this_signal(stems[i])
    else:
        print("Silent source")

In [None]:
# Instanciate a NMF object.
nmf = nn_fac_nmf.unconstrained_NMF(10, beta=2, init = "nndsvd", nmf_type="unconstrained", normalize=[True, False])

print("Computing NMF...")
# Compute NMF
W, H = nmf.run(data=mag, feature_object = dataset.feature_object) # the feature object is useless if the init is not set to "harmonic"

print("NMF done")

In [None]:
# Estimate the individual sources
estimated_sources = mss.estimate_sources(W, H, dataset.feature_object, phase_retrieval="original_phase", phase=phase)

# Score this estimation
si_sdr, idx_argmax = mss.evaluate_si_sdr(stems, estimated_sources, scaling=True)
snr, idx_argmax_snr = mss.evaluate_si_sdr(stems, estimated_sources, scaling=False)

# Listen to the estimated sources
for idx in range(len(estimated_sources)):
    print(f"Estimated source {idx+1}/{len(estimated_sources)}")
    print(f"SDR: {si_sdr[idx]}, SNR: {snr[idx]}, Closest original stem: {stems_labels[idx_argmax[idx]]}")
    audio_helper.listen_to_this_signal(estimated_sources[idx])
