In [14]:
from tqdm import tqdm
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path

In [157]:
def featurize(track):
    y, sr = librosa.load(track, res_type='polyphase')
    x = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20).mean(axis=1)
    c = librosa.feature.chroma_cqt(y=y, sr=sr, n_octaves=6, fmin=librosa.note_to_hz('C1')).mean(axis=1)
    
    # Extremely quick and dirty dominant frequency estimator
    S = np.mean(np.abs(librosa.stft(y))**2, axis=1)
    freqs = np.fft.rfftfreq(2048, d=1/sr)
    S[freqs < librosa.note_to_hz('C1')] = -1
    S[freqs > librosa.note_to_hz('C6')] = -1
    
    f0 = freqs[np.argmax(S)]
    
    return np.concatenate((x, c, [f0]))

In [158]:
def extract(index_file, base='nsynth_GCT634/'):
    
    files = pd.read_csv(index_file, header=None, squeeze=True).values
    base = Path(base)
    
    X = []
    Y = []
    for file in tqdm(files):
        fp = Path(file)
        xi = featurize(base / fp)
        yi = str(fp.name).split('_', 2)[0]
        X.append(xi)
        Y.append(yi)
        
    df = pd.DataFrame(data=X, index=files, columns=['mfcc_{:02d}'.format(_) for _ in range(20)] + ['chroma_{:02d}'.format(_) for _ in range(12)] + ['f0'])
    df['instrument'] = Y
    
    return df

In [159]:
df = extract('nsynth_GCT634/test_list.txt')

100%|██████████| 200/200 [00:49<00:00,  4.06it/s]


In [160]:
df.to_csv('nsynth_test.csv')

In [161]:
df = extract('nsynth_GCT634/valid_list.txt')

100%|██████████| 200/200 [00:52<00:00,  3.77it/s]


In [162]:
df.to_csv('nsynth_valid.csv')

In [163]:
df = extract('nsynth_GCT634/train_list.txt')

100%|██████████| 1000/1000 [04:18<00:00,  3.87it/s]


In [164]:
df.to_csv('nsynth_train.csv')