#### Date: Jun 2019 (*Review: March 2024*)

<br>Programmer: Christian Dittmar, Yiğitcan Özer
This is the demo script which illustrates the main functionalities of the 'NMF toolbox'. For a detailed description we refer to [1,2] (see References below).

#### The script proceeds in the following steps:
<br>1. It loads an example audio file containing drums and melodic instruments
<br>2. It computes the STFT of the audio data.
<br>3. It applies KAM and NMF as described in [2], with random initialization of the NMF components. The final classification into harmonic and percussive is done according to the percussiveness threshold p_thresh = 0.25 as given in [2].
<br>4. It visualizes the decomposition results.
<br>5. It resynthesizes the separated audio streams and saves them as wav files to the hard drive.

### Initialization

In [None]:
import os
import numpy as np
import soundfile as sf
import IPython.display as ipd

from libnmfd.core.nmfconv import conv_model, init_activations, init_templates, nmfd
from libnmfd.dsp.algorithms import hpss_kam_fitzgerald
from libnmfd.dsp.filters import alpha_wiener_filter
from libnmfd.dsp.transforms import forward_stft, inverse_stft, log_freq_log_mag
from libnmfd.utils import make_monaural
from libnmfd.utils.core_utils import drum_specific_soft_constraints_nmf, \
    percussiveness_estimation, visualize_components_kam, visualize_components_nmf

INPUT_DIR = 'data/'
OUT_DIR = 'output/'

# create the output directory if it doesn't exist
if not os.path.isdir(OUT_DIR):
    os.makedirs(OUT_DIR)

filename = 'runningExample_IGotYouMixture.wav'

### 1. Load the audio signal

In [None]:
# read signal
x, fs = sf.read(os.path.join(INPUT_DIR, filename))

# make monaural if necessary
x = make_monaural(x)

### 2. Compute STFT

In [None]:
# spectral parameters
BLOCK_SIZE = 2048
HOP_SIZE = 512

# STFT computation
X, A, P = forward_stft(x, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True, append_frames=True)

# get dimensions and time and freq resolutions
num_bins, num_frames = X.shape
time_res = HOP_SIZE / fs
freq_res = fs / BLOCK_SIZE

# get logarithmically-spaced frequency axis version for visualization purposes
log_freq_log_mag_A, log_freq_axis = log_freq_log_mag(A=A, freq_res=freq_res)
num_log_bins = len(log_freq_axis)

### 3. Apply KAM-based Harmonic Percussive Separation

In [None]:
# set common parameters
num_iter_kam = 30
kam_A, kern, kern_ord = hpss_kam_fitzgerald(X=A,
                                            num_iter=num_iter_kam, 
                                            kern_dim=13)

# visualize
fh1 = visualize_components_kam(kam_A, time_res=time_res, freq_res=freq_res, font_size=14)

# save result
fh1.savefig(os.path.join(OUT_DIR, 'demoDrumExtractionKAM_NMF_percThreshold_KAM.png'))

In [None]:
audios = []

# resynthesize KAM results
for k in range(2):
    Y = kam_A[k] * np.exp(1j * P);
    y, _ = inverse_stft(X=Y, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True,
                        append_frames=True, num_samp=len(x))
    audios.append(y)
    # save result
    out_filepath = os.path.join(OUT_DIR,
                                'demoDrumExtractionKAM_NMF_percThreshold_KAM_component_{}_extracted_from_{}'.format(k, filename))
    
    sf.write(file=out_filepath, samplerate=fs, data=y)

#### Input audio mixture

In [None]:
ipd.Audio(x, rate=fs)

#### KAM-based percussive component

In [None]:
ipd.Audio(audios[0].T, rate=fs)

#### KAM-based harmonic component

In [None]:
ipd.Audio(audios[1].T, rate=fs)

In [None]:
# concatenate new NMF target
V = np.concatenate([kam_A[0], kam_A[1]])
num_double_bins = V.shape[0]

# prepare matrix to revert concatenation,
accu_mat = np.concatenate([np.eye(num_bins), np.eye(num_bins)], axis=1)

### 4. Apply NMF with drum-specific soft constraints to KAM-based target

In [None]:
# set common parameters
num_iter_nmf = 60
num_comp = 30
num_template_frames = 1

# generate random templates covering 2 times the original frequency range
init_W = init_templates(num_comp=num_comp,
                        num_bins=num_double_bins,
                        num_template_frames=num_template_frames,
                        strategy='random')

# generate uniform activations
init_H = init_activations(num_comp=num_comp,
                          num_frames=num_frames,
                          strategy='uniform')
# NMFD core method

nmfd_W, nmfd_H, _, _, tensor_W = nmfd(V=V, 
                                      num_comp=num_comp, 
                                      num_frames=num_frames, 
                                      num_iter=num_iter_nmf,
                                      num_template_frames=num_template_frames,
                                      init_W=init_W,
                                      init_H=init_H,
                                      num_bins=num_double_bins,
                                      # set soft constraint parameters
                                      func_preprocess=drum_specific_soft_constraints_nmf,
                                      kern=kern,
                                      decay=0.75)

In [None]:
# get final percussiveness estimate
perc_weight = percussiveness_estimation(tensor_W)

# re-order components, only for visualization
idx_sorted_rev = np.argsort(-perc_weight)

tensor_W = tensor_W[:, idx_sorted_rev, :]
nmfd_H = nmfd_H[idx_sorted_rev, :]
nmfd_W = [nmfd_W[idx] for idx in idx_sorted_rev]
perc_weight = perc_weight[idx_sorted_rev]


# perform final thresholding
perc_weight = np.where(perc_weight > 0.25, 1.0, 0.0)


# compute separate models for percussive and harmonic part
# in the case of numTemplateFrames=1, this step equals eq. (1) in [2]
Vp = conv_model(W=tensor_W, H=np.diag(perc_weight) @ nmfd_H)
Vh = conv_model(W=tensor_W, H=np.diag(1 - perc_weight) @ nmfd_H)

In [None]:
# accumulate back to original spectrum, reverting the stacking
# this step is described in the last paragraph of sec. 2.4 in [2]
Ap = accu_mat @ Vp
Ah = accu_mat @ Vh

# alpha-Wiener filtering
nmfd_A, _ = alpha_wiener_filter(mixture_X=A, source_A=[Ap, Ah], alpha=1.0)

In [None]:
# create reduced version of templates for visualization
nmfdW_vis = list()
for nmfdW_curr in nmfd_W:
    nmfdW_curr = accu_mat @ nmfdW_curr
    nmfdW_vis.append(nmfdW_curr)

fh2, _ = visualize_components_nmf(V=A, 
                                  W=nmfdW_vis, 
                                  H=nmfd_H, 
                                  comp_V=nmfd_A, 
                                  freq_res=freq_res, 
                                  time_res=time_res, 
                                  font_size=14);

# save result
fh2.savefig(os.path.join(OUT_DIR, 'demoDrumExtractionKAM_NMF_percThreshold_NMF.png'))

In [None]:
audios = []

# resynthesize NMF with soft constraints results
for k in range(2):
    Y = nmfd_A[k] * np.exp(1j * P);
    y, _ = inverse_stft(X=Y,
                        block_size=BLOCK_SIZE,
                        hop_size=HOP_SIZE,
                        reconst_mirror=True,
                        append_frames=True,
                        num_samp=len(x))
    audios.append(y)
    
    # save result
    out_filepath = os.path.join(OUT_DIR,
                                'demoDrumExtractionKAM_NMF_percThreshold_NMF_component_{}_extracted_from_{}'.format(k, filename))
    
    sf.write(file=out_filepath, samplerate=fs, data=y)

#### Input audio mixture

In [None]:
ipd.Audio(x, rate=fs)

#### Percussive component based on KAM + NMF

In [None]:
ipd.Audio(audios[0].T, rate=fs)

#### Harmonic component based on KAM + NMF

In [None]:
ipd.Audio(audios[1].T, rate=fs)

#### Reference: 
[1] Christian Dittmar, Meinard Müller
<br>**Reverse Engineering the Amen Break — Score-Informed Separation and Restoration Applied to Drum Recordings**
<br>IEEE/ACM Transactions on Audio, Speech, and Language Processing, 24(9): 1531-1543, 2016.
<br>
[2] Christian Dittmar, Patricio López-Serrano, Meinard Müller
<br>**Unifying Local and Global Methods for Harmonic-Percussive Source Separation**
<br>In Proceedings of the IEEE International Conference on Acoustics,<br>Speech, and Signal Processing (ICASSP), 2018.

#### If you use the libnmfd (NMF toolbox) please refer to 
[3] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard Müller<br>
**NMF Toolbox: Music Processing Applications of Nonnegative Matrix Factorization**<br>
In Proceedings of the  International Conference on Digital Audio Effects (DAFx), 2019.