Install dependencies.

In [None]:
!pip install -q musdb museval
!pip install -q git+https://github.com/shi0rik0/open-unmix-pytorch.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [None]:
import musdb
import museval
import openunmix
import numpy as np
import torch

Get the train set, validation set and test set of MUSDB.

In [None]:
mus_train = musdb.DB(subsets="train", split='train', download=True)
mus_valid = musdb.DB(subsets="train", split='valid', download=True)
mus_test = musdb.DB(subsets="test", download=True)

In [None]:
sampling_rate = mus_train[0].rate # 44100
device = 'cuda' if torch.cuda.is_available() else 'cpu'

This code block shows how to retrieve the audio from the dataset.

In [None]:
mus_train[0].audio # mixture
mus_train[0].targets['vocals'].audio # vocals
mus_train[0].targets['accompaniment'].audio # accompaniment

array([[-0.04733276, -0.03793335],
       [-0.05758667, -0.05212402],
       [-0.04681396, -0.05422974],
       ...,
       [ 0.00241089,  0.00057983],
       [ 0.00244141,  0.00057983],
       [ 0.00231934,  0.00036621]])

A helper function that gets the SDR of the estimations.

In [None]:
def eval_track(track, vocals, accompaniment):
  estimates = {
    'vocals': vocals,
    'accompaniment': accompaniment
  }
  scores = museval.eval_mus_track(track, estimates).scores
  sdr = {i['name']: np.median([j['metrics']['SDR'] for j in i['frames']]) for i in scores['targets']}
  return sdr

UMX is the default model provided by open-unmix. It's our baseline model.

In [None]:
def load_umx():
  separator = openunmix.umx(device=device)
  return separator

def predict_umx(separator, track):
  result = separator(torch.as_tensor(track.audio.T[None, ...], device=device).float())
  vocals = result[0,0,:,:].detach().cpu().numpy().T
  accom = track.audio - vocals
  return {'vocals': vocals, 'accompaniment': accom}

In [None]:
sdr_vocals = []
sdr_accompaniment = []
umx = load_umx()
for track in mus_test:
  prediction = predict_umx(umx, track)
  sdr = eval_track(track, prediction['vocals'], prediction['accompaniment'])
  sdr_vocals.append(sdr['vocals'])
  sdr_accompaniment.append(sdr['accompaniment'])
sdr_vocals = np.median(sdr_vocals)
sdr_accompaniment = np.median(sdr_accompaniment)
print('The scores of UMX:')
print('vocals:', sdr_vocals)
print('acccompaniment:', sdr_accompaniment)

The scores of UMX:
vocals: 7.1481625
acccompaniment: 11.6912075
