In [1]:
import os

from joblib import Parallel, delayed
from tqdm import tqdm_notebook as tqdm
import pickle
import pandas as pd

import pumpp
import jams
import numpy as np

In [2]:
def root(x):
    return os.path.splitext(os.path.basename(x))[0]

AUDIO = jams.util.find_with_extension('/home/bmcfee/data/eric_chords/audio/', 'mp3')
ANNOS = jams.util.find_with_extension('/home/bmcfee/data/eric_chords/references_v2/', 'jams')

# Make sure there are the same number of files
assert len(AUDIO) == len(ANNOS)

# And that they're in agreement
assert all([root(_1) == root(_2) for (_1, _2) in zip(AUDIO, ANNOS)])

In [6]:
# Build a pump
sr = 44100
hop_length = 4096

p_feature = pumpp.feature.CQTMag(name='cqt', sr=sr, hop_length=hop_length, log=True, conv='tf')
p_chord_tag = pumpp.task.ChordTagTransformer(name='chord_tag', sr=sr, hop_length=hop_length)
p_chord_struct = pumpp.task.ChordTransformer(name='chord_struct', sr=sr, hop_length=hop_length)

pump = pumpp.Pump(p_feature, p_chord_tag, p_chord_struct)

# Save the pump

with open('/home/bmcfee/working/chords/pump.pkl', 'wb') as fd:
    pickle.dump(pump, fd)

In [7]:
def convert(aud, jam, pump, outdir):
    
    data = pump.transform(aud, jam)
    
    fname = os.path.extsep.join([root(aud), 'npz'])
    
    np.savez(os.path.join(outdir, fname), **data)

In [8]:
OUTDIR = '/home/bmcfee/working/chords/pump/'

In [9]:
Parallel(n_jobs=20, verbose=10)(delayed(convert)(aud, jam, pump, OUTDIR) for (aud, jam) in zip(AUDIO, ANNOS));

[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:    3.9s
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    5.2s
[Parallel(n_jobs=20)]: Done  21 tasks      | elapsed:    8.1s
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:   11.5s
[Parallel(n_jobs=20)]: Done  45 tasks      | elapsed:   15.5s
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   18.6s
[Parallel(n_jobs=20)]: Done  73 tasks      | elapsed:   22.3s
[Parallel(n_jobs=20)]: Done  88 tasks      | elapsed:   26.9s
[Parallel(n_jobs=20)]: Done 105 tasks      | elapsed:   31.3s
[Parallel(n_jobs=20)]: Done 122 tasks      | elapsed:   36.5s
[Parallel(n_jobs=20)]: Done 141 tasks      | elapsed:   41.9s
[Parallel(n_jobs=20)]: Done 160 tasks      | elapsed:   46.9s
[Parallel(n_jobs=20)]: Done 181 tasks      | elapsed:   53.0s
[Parallel(n_jobs=20)]: Done 202 tasks      | elapsed:   58.7s
[Parallel(n_jobs=20)]: Done 225 tasks      | elapsed:  1.1min
[Parallel(n_jobs=20)]: Done 248 tasks      | elapsed:  1.2min
[Paralle

In [27]:
# Make the artist index

In [41]:
index = pd.Series()

null_artist = 0

for ann in tqdm(ANNOS):
    J = jams.load(ann, validate=False)
    if not J.file_metadata.artist:
        artist = 'artist_{:05d}'.format(null_artist)
        null_artist += 1
    else:
        artist = J.file_metadata.artist
        
    index[root(ann)] = artist

index.to_json('/home/bmcfee/working/chords/artist_index.json')




In [None]:
for ann in tqdm(ANNOS):
    J = jams.load(ann, validate=False)
    print('{}: {}'.format(root(ann), len(J.annotations['chord'])))

TR6R91L11C8A40D710: 1
TRACGVT149E3B9BE3F: 1
TRACPPB149E33C10B9: 1
TRADINA127F847B84E: 1
TRAEQJQ149E3BA694B: 1
TRAGPGW149E3A9DFC7: 1
TRAGTML149E3B2F15F: 1
TRAHKKV149E3BDA124: 1
TRAHMSN127F92CD4AD: 1
TRAHXQW149E3BCE2C1: 1
TRAIIEF149E3861F6C: 1
TRAITGI149E3C71235: 1
TRAJQHL149E3EF231A: 2
TRAKIXJ149E332D53F: 1
TRALJVL127F98F7094: 1
TRAMEPQ149E2C6E391: 1
TRAOIOP149E3AD2F51: 1
TRAOVGN149E362699A: 1
TRAPEUF149E3BF4C4C: 1
TRAPSWJ149E36CDE23: 1
TRAPYYI149E33239E1: 1
TRAQZSN149E30ECF47: 1
TRARMNB149E3DECF43: 1
TRATELE149E33C7DFD: 1
TRATLJQ149E33D7D2A: 1
TRATRSK149E2CC934D: 1
TRAUSXP149E3BBA300: 1
TRAVBNX149E3F1BDEB: 1
TRAVDXB127FA694101: 1
TRAVJKH149E37C05B8: 1
TRAWUJT149E3DFAE74: 1
TRAWVNL127FA2C9CD6: 1
TRAXCRW149E2CC259A: 1
TRAXWBI149E3D6BC67: 1
TRAYBJG149E34B0C9E: 1
TRAYVUZ149E3840C6F: 1
TRAYYLG149E379CC85: 1
TRAZIEZ149E316AD26: 1
TRAZQMJ149E35018A5: 1
TRBACIS149E3490E0D: 1
TRBAERZ149E316341A: 1
TRBAFVO149E33A9195: 1
TRBASYY149E3B90EC1: 1
TRBBJEM149E3D337D2: 1
TRBBMDO127FA027922: 1
TRBBPZJ149