In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import musdb
import torch
import h5py
import os
from concurrent.futures import ProcessPoolExecutor

(0 - mixture, 1 - drumm, 2 - bass, 3 - other, 4 - vocal)

In [4]:

def musdb_to_h5(mus):
    os.makedirs("hdf5_data", exist_ok=True)
    track_idxs = []
    for i in range(len(mus.tracks)):
        track_idxs.append(i)

    with ProcessPoolExecutor(max_workers=None) as pool:
        pool.map(audio_to_h5, track_idxs, mus)

def audio_to_h5(track_idx, mus):
    source_types = ["vocals", "drums", "bass", "other"]
    sample_rate=44100
    track = mus.track[track_idx]
    h5_path = os.path.join("hdf5_data/", "{}.h5".format(track.name))

    with h5py.File(h5_path, "w") as hf:

        hf.attrs.create("audio_name", data=track.name.encode(), dtype="S100")
        hf.attrs.create("sample_rate", data=sample_rate, dtype=np.int32)

        for source_type in source_types:
            audio = track.targets[source_type].audio[:,0].T
            hf.create_dataset(name=source_type, data=audio, dtype=np.float32)

        audio = track.audio[:,0].T
        hf.create_dataset(name="mixture", data=audio, dtype=np.float32)

In [8]:
class MusDBDataset(torch.utils.data.Dataset):
    def __init__(self, data, stem='vocal', shuffle=True, debug=False):
        super(MusDBDataset, self).__init__()
        self.stems = {'mix': 0, 'drums': 1, 'bass': 2, 'other': 3, 'vocal': 4}
        self.inputs = np.array([])
        self.targets = np.array([])

        for i, music in enumerate(data):
            m = music.stems[0][:,1]
            o = music.stems[self.stems[stem]][:,1]
            shape = m.shape[0]
            self.inputs = np.append(self.inputs, np.array_split(m, range(22050, shape, 22050)[:-1]))
            self.targets = np.append(self.targets, np.array_split(o, range(22050, shape, 22050)[:-1]))
            print(i)
            # if debug and i == 50:    
                # break
        
        self.inputs = self.inputs.flatten()
        self.targets = self.targets.flatten()
        if shuffle:
            p = np.random.permutation(len(self.inputs))
            self.inputs = self.inputs[p]
            self.targets = self.targets[p]
    
    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]
            

In [5]:
root = 'musdb-dataset'
train_data = musdb.DB(root, subsets='train', split='train')
valid_data = musdb.DB(root, subsets='train', split='valid')
test_data = musdb.DB(root, subsets='test')

In [None]:
musdb_to_h5(train_data)

In [9]:
from torch.utils.data import DataLoader

train_set = MusDBDataset(data=train_data)
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)

  return asanyarray(a).ravel(order=order)


0
1
2
3
4
5
6
7
8
9
10
11
12
13


MemoryError: Unable to allocate 187. MiB for an array with shape (12280832, 2) and data type float64

In [None]:
torch.save(train_loader, 'train_loader.pth')