In [None]:
!pip install scaper
!pip install nussl
!pip install git+https://github.com/source-separation/tutorial


In [None]:
from IPython.display import Audio, display
import matplotlib.pyplot as plt

In [None]:
from common import data, viz
import nussl
# Prepare MUSDB
data.prepare_musdb('~/.nussl/tutorial/')


In [None]:
stft_params = nussl.STFTParams(window_length=512, hop_length=128, window_type='sqrt_hann')
fg_path = "~/.nussl/tutorial/"
train_data = data.on_the_fly(stft_params, transform=None, fg_path=fg_path, num_mixtures=1000, coherent_prob=1.0)


In [None]:
item = train_data[0]
viz.show_sources(item['sources'])


# 1. Load Dataset

In [None]:
musdb_train = nussl.datasets.MUSDB18(subsets='train', split="train")
musdb_valid = nussl.datasets.MUSDB18(subsets='train', split="valid")
musdb_test = nussl.datasets.MUSDB18(subsets='test')
len(musdb_train), len(musdb_valid), len(musdb_test)

In [None]:
train_item = musdb_train[1]

In [None]:
train_item['mix'].audio_data

In [None]:
train_item['sources']['vocals'].audio_data

In [None]:
display(Audio(train_item['mix'].audio_data[:, :200000], rate=44100))

In [None]:
duration = 5

dataset_path = "~/.nussl/tutorial/"

trainset = data.on_the_fly(stft_params, transform=None, fg_path=dataset_path+"/train", num_mixtures=10000, duration=duration)
item = trainset[200]


In [None]:
display(Audio(item['mix'].audio_data[:, :200000], rate=44100))

### Transform Data
- We have to transform nussl.core.AudioSignal into desired format
    1. We want to make One Vs All separation system. Therefore, we have to combine the sources except target
        - If you want to make vocal separator, you can mix drum, bass, and other as a single source
        - If you want to make drum separator, you can mix vocal, bass, and other as a single source
    2. We want to use spectrogram instead of waveform audio samples 


In [None]:
from nussl.datasets import transforms as nussl_tfm

item = trainset[0]
sum_sources = nussl_tfm.SumSources([['bass', 'drums', 'other']])
transformed_item = sum_sources(item)
print(transformed_item['sources'])


In [None]:
# Make Magnitude Spectrogram np.abs(AudioSignal.stft())
msa = nussl_tfm.MagnitudeSpectrumApproximation()

item = trainset[0]

transformed_item = msa(item)
print(transformed_item.keys())
print(transformed_item['source_magnitudes'].shape)

plt.figure(figsize=(10,20))
plt.subplot(4,1,1)
plt.imshow(np.log10(transformed_item['source_magnitudes'][...,0]), origin='lower', aspect='auto')
plt.subplot(4,1,2)
plt.imshow(np.log10(transformed_item['source_magnitudes'][...,1]), origin='lower', aspect='auto')
plt.subplot(4,1,3)
plt.imshow(np.log10(transformed_item['source_magnitudes'][...,2]), origin='lower', aspect='auto')
plt.subplot(4,1,4)
plt.imshow(np.log10(transformed_item['source_magnitudes'][...,3]), origin='lower', aspect='auto')

In [None]:
tfm = nussl_tfm.Compose([
    nussl_tfm.SumSources([['bass', 'drums', 'other']]),
    nussl_tfm.MagnitudeSpectrumApproximation(),
    nussl_tfm.IndexSources('source_magnitudes', 1),
    nussl_tfm.ToSeparationModel(),
])
item = trainset[0]
print("Before transforms")
for key in item:
    print(key, type(item[key]))
print("\nAfter transforms")
item = tfm(item)
for key in item:
    print(key, type(item[key]))

In [None]:
stft_params = nussl.STFTParams(window_length=512, hop_length=128, window_type='sqrt_hann')
tfm = nussl_tfm.Compose([
    nussl_tfm.SumSources([['bass', 'drums', 'other']]),
    nussl_tfm.MagnitudeSpectrumApproximation(),
    nussl_tfm.IndexSources('source_magnitudes', 1),
    nussl_tfm.ToSeparationModel(),
])
duration = 5
trainset = data.on_the_fly(stft_params, 
                          transform=tfm, 
                          fg_path=dataset_path+"/train", 
                          num_mixtures=10000000,
                          time_stretch=None,
                          duration=duration)
item = trainset[0]
print(item.keys())