In [None]:
!wget -c https://zenodo.org/records/3338373/files/musdb18hq.zip

In [None]:
!unzip musdb18hq.zip -d musdb18hq

In [1]:
import os
import torch
import torchaudio
from glob import glob
from datasets import Dataset, Features, Audio, Value
from einops import rearrange

In [2]:
N=2**21
for split in ["train", "test"]:
    tracks = [path.split("/")[2] for path in glob(f"musdb18hq/{split}/*/*.wav")][::5]
    for i_track, track in enumerate(tracks):
            # load the mixture and vocal tracks
            mix_path = f"musdb18hq/{split}/{track}/mixture.wav"
            vocal_path = f"musdb18hq/{split}/{track}/vocals.wav"
            x,fs = torchaudio.load(mix_path, normalize=False)
            v,fs = torchaudio.load(vocal_path, normalize=False)
            C, L = v.shape
            assert(x.shape == v.shape)
            assert(fs==44100)
            assert(C==2)
            assert(x.dtype == v.dtype == torch.int16)
            if L<=N//2:
                continue
        
            if (L%N)/N > 0.5:
                # pad
                B = L//N + 1
                pad_length = B * N - L
                x = torch.nn.functional.pad(x, (0, pad_length))
                v = torch.nn.functional.pad(v, (0, pad_length))
            else:
                # drop last segment
                B = L//N
                x = x[:,:(B*N)]
                v = v[:,:(B*N)]
                
            # Split the file into non-overlapping 48-second chunks
            x = rearrange(x, 'C (B N) -> B C N', B=B, N=N)
            v = rearrange(v, 'C (B N) -> B C N', B=B, N=N)
            
            # remove segments that don't have enough vocals
            p = v.to(torch.float).norm(dim=1).mean(dim=1)
            x = x[p>200]
            v = v[p>200]
            B = x.shape[0]
            
            # TODO: save each of the
            for i_seg in range(B):
                mix_file = f"musdb_vss/{split}/{track}/vocals/{i_seg}.wav"
                vocal_file = f"musdb_vss/{split}/{track}/mixture/{i_seg}.wav"
                mix_dir = os.path.dirname(mix_file)
                vocal_dir = os.path.dirname(vocal_file)
                os.makedirs(mix_dir, exist_ok=True)
                os.makedirs(vocal_dir, exist_ok=True)
                torchaudio.save(
                    uri = mix_file,
                    src = v[i_seg],
                    sample_rate = fs,
                )
                torchaudio.save(
                    uri = vocal_file,
                    src = x[i_seg],
                    sample_rate = fs,
                )



In [2]:
ds = []
for split in ["train","test"]:
    tracks = list(set([p.split("/")[2] for p in glob(f"musdb_vss/{split}/*/*/*.wav")]))
    path_mix = []
    path_vocal = []
    audio_mix = []
    audio_vocal = []
    for track in tracks:
        segments = glob(f"musdb_vss/{split}/{track}/vocals/*.wav")
        for i_seg in range(len(segments)):
            mix_file = f"musdb_vss/{split}/{track}/mixture/{i_seg}.wav"
            vocal_file = f"musdb_vss/{split}/{track}/vocals/{i_seg}.wav"
            path_mix.append(mix_file)
            path_vocal.append(vocal_file)
            audio_mix.append({'path': mix_file})
            audio_vocal.append({'path': vocal_file})
    
    ds.append(Dataset.from_dict({
        'audio_mix': audio_mix,
        'audio_vocal': audio_vocal,
        'path_mix': path_mix,
        'path_vocal': path_vocal,
    }))

In [3]:
features = Features({
    'audio_mix': Audio(sampling_rate=44100, mono=False, decode=False),
    'audio_vocal': Audio(sampling_rate=44100, mono=False, decode=False),
    'path_mix': Value('string'),
    'path_vocal': Value('string'),
})

In [4]:
train_ds = ds[0].cast(features)

Casting the dataset:   0%|          | 0/436 [00:00<?, ? examples/s]

In [5]:
train_ds

Dataset({
    features: ['audio_mix', 'audio_vocal', 'path_mix', 'path_vocal'],
    num_rows: 436
})

In [6]:
valid_ds = ds[1].cast(features)

Casting the dataset:   0%|          | 0/235 [00:00<?, ? examples/s]

In [7]:
valid_ds

Dataset({
    features: ['audio_mix', 'audio_vocal', 'path_mix', 'path_vocal'],
    num_rows: 235
})

In [8]:
train_ds.push_to_hub("danjacobellis/musdb18hq_vss",split='train')
valid_ds.push_to_hub("danjacobellis/musdb18hq_vss",split='validation')

Uploading the dataset shards:   0%|          | 0/15 [00:00<?, ?it/s]

Map:   0%|          | 0/30 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/8 [00:00<?, ?it/s]

Map:   0%|          | 0/30 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/30 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/30 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/561 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/danjacobellis/musdb18hq_vss/commit/208f50c04ef9b4503028655deae988bfc58e35a5', commit_message='Upload dataset', commit_description='', oid='208f50c04ef9b4503028655deae988bfc58e35a5', pr_url=None, pr_revision=None, pr_num=None)