In [None]:
!pip install musdb

In [6]:
import librosa
import math
import numpy as np
import os
import pickle
import sys

import musdb
mus = musdb.DB(root_dir='/mnt/z/data/musdb18')

In [7]:
SAMPLE_RATE = 44100
TARGET_SAMPLE_RATE = 16000
tracks = mus.load_mus_tracks()
all_data = []
for track in tracks:
    vocal = track.targets['vocals'].audio
    off_vocal = track.targets['accompaniment'].audio
    # Stereo to mono and resample.
    vocal = (vocal[:, 0] + vocal[:, 1]) / 2
    vocal = librosa.core.resample(vocal, SAMPLE_RATE, TARGET_SAMPLE_RATE)
    off_vocal = (off_vocal[:, 0] + off_vocal[:, 1]) / 2
    off_vocal = librosa.core.resample(off_vocal, SAMPLE_RATE, TARGET_SAMPLE_RATE)
    all_data.append((track.name, vocal + off_vocal, off_vocal))

In [9]:
idx = np.array(range(len(all_data)))
np.random.seed(12345678)
np.random.shuffle(idx)
num_train = int(math.ceil(0.8 * len(all_data)))

train_data = {}
test_data = {}
for i in range(len(all_data)):
    filename, data_on, data_off = all_data[idx[i]]
    assert len(data_on) == len(data_off)
    if i >= num_train:
        test_data[filename] = data_on, data_off
    else:
        train_data[filename] = data_on, data_off

output_dir = '/mnt/z/data/musdb18'
print('Train:', train_data.keys())
with open(os.path.join(output_dir, 'train.pkl.gz'), 'wb') as f:
    pickle.dump(train_data, f, protocol=2)

print('Test:', test_data.keys())
with open(os.path.join(output_dir, 'test.pkl.gz'), 'wb') as f:
    pickle.dump(test_data, f, protocol=2)

('Train:', ['James May - If You Say', 'ANiMAL - Rockshow', 'M.E.R.C. Music - Knockout', 'Music Delta - Punk', 'BKS - Bulldozer', 'The Scarlet Brand - Les Fleurs Du Mal', 'James May - All Souls Moon', 'James May - On The Line', 'Triviul - Dorothy', 'Traffic Experiment - Once More (With Feeling)', 'North To Alaska - All The Same', 'James May - Dont Let Go', 'Side Effects Project - Sing With Me', 'Skelpolu - Together Alone', 'Moosmusic - Big Dummy Shake', 'Titanium - Haunted Age', 'PR - Happy Daze', 'BigTroubles - Phantom', 'Sambasevam Shanmugam - Kaathaadi', 'The Mountaineering Club - Mallory', 'Steven Clark - Bounty', 'Lyndsey Ollard - Catching Up', 'Alexander Ross - Velvet Curtain', 'Drumtracks - Ghost Bitch', 'Tim Taler - Stalker', 'Georgia Wonder - Siren', 'Hollow Ground - Ill Fate', 'Secret Mountains - High Horse', 'Tom McKenzie - Directions', 'Helado Negro - Mitad Del Mundo', 'Music Delta - Gospel', 'PR - Oh No', 'Young Griffo - Blood To Bone', 'Punkdisco - Oral Hygiene', 'Hop Alon