In [None]:
!wget -c https://zenodo.org/records/3338373/files/musdb18hq.zip

In [None]:
!unzip musdb18hq.zip -d musdb18hq

In [1]:
import os
import torch
import torchaudio
from glob import glob
from datasets import Dataset, Features, Audio, Value
from einops import rearrange

In [2]:
N=2**21
for split in ["train", "test"]:
    tracks = [path.split("/")[2] for path in glob(f"musdb18hq/{split}/*/*.wav")][::5]
    for i_track, track in enumerate(tracks):
            mix_path = f"musdb18hq/{split}/{track}/mixture.wav"
            vocal_path = f"musdb18hq/{split}/{track}/vocals.wav"
            bass_path = f"musdb18hq/{split}/{track}/bass.wav"
            drums_path = f"musdb18hq/{split}/{track}/drums.wav"
            other_path = f"musdb18hq/{split}/{track}/other.wav"
            x, fs = torchaudio.load(mix_path, normalize=False)
            v, fs = torchaudio.load(vocal_path, normalize=False)
            b, fs = torchaudio.load(bass_path, normalize=False)
            d, fs = torchaudio.load(drums_path, normalize=False)
            o, fs = torchaudio.load(other_path, normalize=False)
            C, L = x.shape
            assert(x.shape == v.shape == b.shape == d.shape == o.shape)
            assert(fs==44100)
            assert(C==2)
            assert(x.dtype == v.dtype == b.dtype == d.dtype == o.dtype == torch.int16)
        
            if L<=N//2:
                continue
        
            if (L%N)/N > 0.5:
                # pad
                B = L//N + 1
                pad_length = B * N - L
                x = torch.nn.functional.pad(x, (0, pad_length))
                v = torch.nn.functional.pad(v, (0, pad_length))
                b = torch.nn.functional.pad(b, (0, pad_length))
                d = torch.nn.functional.pad(d, (0, pad_length))
                o = torch.nn.functional.pad(o, (0, pad_length))
            else:
                # drop last segment
                B = L//N
                x = x[:, :(B * N)]
                v = v[:, :(B * N)]
                b = b[:, :(B * N)]
                d = d[:, :(B * N)]
                o = o[:, :(B * N)]
                
            # Split the file into non-overlapping 48-second chunks
            x = rearrange(x, 'C (B N) -> B C N', B=B, N=N)
            v = rearrange(v, 'C (B N) -> B C N', B=B, N=N)
            b = rearrange(b, 'C (B N) -> B C N', B=B, N=N)
            d = rearrange(d, 'C (B N) -> B C N', B=B, N=N)
            o = rearrange(o, 'C (B N) -> B C N', B=B, N=N)
            
            # remove segments that don't have enough vocals
            p = x.to(torch.float).norm(dim=1).mean(dim=1)
            x = x[p > 200]
            v = v[p > 200]
            b = b[p > 200]
            d = d[p > 200]
            o = o[p > 200]
            B = x.shape[0]
            
            # TODO: save each of the
            for i_seg in range(B):
                mix_file = f"musdb_segments/{split}/{track}/mixture/{i_seg}.wav"
                vocal_file = f"musdb_segments/{split}/{track}/vocals/{i_seg}.wav"
                bass_file = f"musdb_segments/{split}/{track}/bass/{i_seg}.wav"
                drums_file = f"musdb_segments/{split}/{track}/drums/{i_seg}.wav"
                other_file = f"musdb_segments/{split}/{track}/other/{i_seg}.wav"
                mix_dir = os.path.dirname(mix_file)
                vocal_dir = os.path.dirname(vocal_file)
                bass_dir = os.path.dirname(bass_file)
                drums_dir = os.path.dirname(drums_file)
                other_dir = os.path.dirname(other_file)
                os.makedirs(mix_dir, exist_ok=True)
                os.makedirs(vocal_dir, exist_ok=True)
                os.makedirs(bass_dir, exist_ok=True)
                os.makedirs(drums_dir, exist_ok=True)
                os.makedirs(other_dir, exist_ok=True)
                torchaudio.save(
                    uri=mix_file,
                    src=x[i_seg],
                    sample_rate=fs,
                )
                torchaudio.save(
                    uri=vocal_file,
                    src=v[i_seg],
                    sample_rate=fs,
                )
                torchaudio.save(
                    uri=bass_file,
                    src=b[i_seg],
                    sample_rate=fs,
                )
                torchaudio.save(
                    uri=drums_file,
                    src=d[i_seg],
                    sample_rate=fs,
                )
                torchaudio.save(
                    uri=other_file,
                    src=o[i_seg],
                    sample_rate=fs,
                )

In [3]:
ds = []
for split in ["train", "test"]:
    tracks = list(set([p.split("/")[2] for p in glob(f"musdb_segments/{split}/*/*/*.wav")]))
    path_mix = []
    path_vocal = []
    path_bass = []
    path_drums = []
    path_other = []
    audio_mix = []
    audio_vocal = []
    audio_bass = []
    audio_drums = []
    audio_other = []
    for track in tracks:
        segments = glob(f"musdb_segments/{split}/{track}/mixture/*.wav")
        for i_seg in range(len(segments)):
            mix_file = f"musdb_segments/{split}/{track}/mixture/{i_seg}.wav"
            vocal_file = f"musdb_segments/{split}/{track}/vocals/{i_seg}.wav"
            bass_file = f"musdb_segments/{split}/{track}/bass/{i_seg}.wav"
            drums_file = f"musdb_segments/{split}/{track}/drums/{i_seg}.wav"
            other_file = f"musdb_segments/{split}/{track}/other/{i_seg}.wav"
            path_mix.append(mix_file)
            path_vocal.append(vocal_file)
            path_bass.append(bass_file)
            path_drums.append(drums_file)
            path_other.append(other_file)
            audio_mix.append({'path': mix_file})
            audio_vocal.append({'path': vocal_file})
            audio_bass.append({'path': bass_file})
            audio_drums.append({'path': drums_file})
            audio_other.append({'path': other_file})

    ds.append(Dataset.from_dict({
        'audio_mix': audio_mix,
        'audio_vocal': audio_vocal,
        'audio_bass': audio_bass,
        'audio_drums': audio_drums,
        'audio_other': audio_other,
        'path_mix': path_mix,
        'path_vocal': path_vocal,
        'path_bass': path_bass,
        'path_drums': path_drums,
        'path_other': path_other,
    }))

In [4]:
features = Features({
    'audio_mix': Audio(sampling_rate=44100, mono=False, decode=False),
    'audio_vocal': Audio(sampling_rate=44100, mono=False, decode=False),
    'audio_bass': Audio(sampling_rate=44100, mono=False, decode=False),
    'audio_drums': Audio(sampling_rate=44100, mono=False, decode=False),
    'audio_other': Audio(sampling_rate=44100, mono=False, decode=False),
    'path_mix': Value('string'),
    'path_vocal': Value('string'),
    'path_bass': Value('string'),
    'path_drums': Value('string'),
    'path_other': Value('string'),
})

In [5]:
train_ds = ds[0].cast(features)

Casting the dataset:   0%|          | 0/477 [00:00<?, ? examples/s]

In [6]:
train_ds

Dataset({
    features: ['audio_mix', 'audio_vocal', 'audio_bass', 'audio_drums', 'audio_other', 'path_mix', 'path_vocal', 'path_bass', 'path_drums', 'path_other'],
    num_rows: 477
})

In [7]:
valid_ds = ds[1].cast(features)

Casting the dataset:   0%|          | 0/262 [00:00<?, ? examples/s]

In [8]:
valid_ds

Dataset({
    features: ['audio_mix', 'audio_vocal', 'audio_bass', 'audio_drums', 'audio_other', 'path_mix', 'path_vocal', 'path_bass', 'path_drums', 'path_other'],
    num_rows: 262
})

In [9]:
train_ds.push_to_hub("danjacobellis/musdb_segments", split='train')
valid_ds.push_to_hub("danjacobellis/musdb_segments", split='validation')

Uploading the dataset shards:   0%|          | 0/41 [00:00<?, ?it/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/22 [00:00<?, ?it/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/1.03k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/danjacobellis/musdb_segments/commit/f964dceea4a08776edc16fe567a63f070a4e62da', commit_message='Upload dataset', commit_description='', oid='f964dceea4a08776edc16fe567a63f070a4e62da', pr_url=None, pr_revision=None, pr_num=None)