In [1]:
import yaml
import numpy as np
import os
import soundfile as sf
from scipy.signal import stft
from tqdm import tqdm
import torch
from multiprocessing import Pool
# If you have a GPU, put the data on the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

directory = "../Data/slakh2100_flac_redux/train"
listdir = os.listdir(directory)
listdir.sort()
format = '.flac'
savedir = 'data/train'
sample_freq = 44100
freq_amount = 129
mixing = True
mix_amount = 2
np.random.seed(0)


def load_sound(file_path):
    data, _ = sf.read(file_path)
    return data


def parallel_load(file_path):
    num_processes = 8
    with Pool(num_processes) as pool:
        data = pool.map(load_sound, file_path)

    return np.sum(data ,axis=0)



In [2]:
%%timeit
while True:
    tr = np.random.choice(listdir)
    tr_dicts_2 = {}
    tr_dict = {}
    tr_path = os.path.join(directory, tr)
    with open(os.path.join(tr_path, "metadata.yaml")) as meta:
        metadata = yaml.safe_load(meta)

    file_inst = np.array([[stem + format, metadata['stems'][stem]['inst_class']] for stem in metadata['stems'].keys()])

    stems_path = os.path.join(tr_path, 'stems')
    file_inst = file_inst[[file_inst[:,0][i] in os.listdir(stems_path) for i in range(len(file_inst))]]
    # combine all stems from the same instrument and track using soundfile
    for inst in np.unique(file_inst[:,1]):
        inst_files = file_inst[file_inst[:,1] == inst][:,0]
        inst_files = [os.path.join(stems_path, inst_file) for inst_file in inst_files]

        inst_data = parallel_load(inst_files)
        #inst_data = np.array([sf.read(inst_file)[0] for inst_file in inst_files])
        #inst_data = np.sum(inst_data, axis=0)

        tr_dict.update({inst: inst_data})

    # Choose a random amount of instruments to combine (between 2 and lenght-1)
    if mixing:
        # Find all instruments that are in the dictionary
        inst_in_dict = list(tr_dict.keys())
        for _ in range(mix_amount):
            # Choose the instruments to combine
            inst_amount = np.random.randint(2, len(inst_in_dict))
            inst_to_mix = np.random.choice(inst_in_dict, inst_amount, replace=False)
            # Add the combined data to the dictionary
            tr_dict.update({''.join(inst_to_mix):  np.sum([tr_dict[inst] for inst in inst_to_mix], axis=0)})

     # Add mix to the dictionary
    inst_data = np.array([sf.read(os.path.join(tr_path, 'mix' + format))[0]])
    tr_dict.update({'mix': inst_data})

    for inst in tr_dict.keys():
        # compute the STFT of the combined data
        f, t, Zxx = stft(tr_dict[inst], fs=sample_freq, nperseg=freq_amount*2-2)
        tr_dicts_2.update({inst: torch.view_as_real_copy(torch.from_numpy(Zxx).to(torch.complex128).to(device))})
    torch.cuda.empty_cache()
    break

OutOfMemoryError: CUDA out of memory. Tried to allocate 252.00 MiB (GPU 0; 9.77 GiB total capacity; 2.45 GiB already allocated; 132.06 MiB free; 2.46 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF