In [1]:
import os

import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
from sklearn.neighbors import NearestNeighbors
from scipy import stats
from utils import *

import librosa
import librosa.display

import os
import multiprocessing

from tqdm import tqdm

data_path = 'data'
fma_small_path = 'data/fma_small'
fma_meta_path = 'data/fma_metadata'

In [2]:
tracks = fma_load(f'{data_path}/tracks_small.csv')

In [3]:
def columns():
    feature_sizes = dict(chroma_stft=12,
                         chroma_cqt=12,
                         chroma_cens=12,
                         tonnetz=6,
                         mfcc=20,
                         rmse=1,
                         zcr=1,
                         spectral_centroid=1,
                         spectral_bandwidth=1,
                         spectral_contrast=7,
                         spectral_rolloff=1)
    moments = ('mean', 'std', 'skew', 'kurtosis', 'median', 'min', 'max')

    columns = []
    for name, size in feature_sizes.items():
        for moment in moments:
            it = ((name, moment, '{:02d}'.format(i+1)) for i in range(size))
            columns.extend(it)

    names = ('feature', 'statistics', 'number')
    columns = pd.MultiIndex.from_tuples(columns, names=names)

    # More efficient to slice if indexes are sorted.
    return columns.sort_values()

def compute_features(params):
    [tid, duration] = params

    print(f'computing features for: {duration}s of {tid}.\n')

    features = pd.Series(index=columns(), dtype=np.float32, name=f'{tid}-{duration}')

    def feature_stats(name, values):
        features[name, 'mean'] = np.mean(values, axis=1)
        features[name, 'std'] = np.std(values, axis=1)
        features[name, 'skew'] = stats.skew(values, axis=1)
        features[name, 'kurtosis'] = stats.kurtosis(values, axis=1)
        features[name, 'median'] = np.median(values, axis=1)
        features[name, 'min'] = np.min(values, axis=1)
        features[name, 'max'] = np.max(values, axis=1)


    filepath = f'{fma_small_path}/{tid[0:3]}/{tid}.mp3'
    x, sr = librosa.load(filepath, sr=None, mono=True, duration=duration)  # kaiser_fast

    f = librosa.feature.zero_crossing_rate(x, frame_length=2048, hop_length=512)
    feature_stats('zcr', f)

    cqt = np.abs(librosa.cqt(x, sr=sr, hop_length=512, bins_per_octave=12,
                                n_bins=7*12, tuning=None))
    assert cqt.shape[0] == 7 * 12
    assert np.ceil(len(x)/512) <= cqt.shape[1] <= np.ceil(len(x)/512)+1

    f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7)
    feature_stats('chroma_cqt', f)
    f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)
    feature_stats('chroma_cens', f)
    f = librosa.feature.tonnetz(chroma=f)
    feature_stats('tonnetz', f)

    del cqt
    stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))
    assert stft.shape[0] == 1 + 2048 // 2
    assert np.ceil(len(x)/512) <= stft.shape[1] <= np.ceil(len(x)/512)+1
    del x

    f = librosa.feature.chroma_stft(S=stft**2, n_chroma=12)
    feature_stats('chroma_stft', f)

    f = librosa.feature.rms(S=stft)
    feature_stats('rmse', f)

    f = librosa.feature.spectral_centroid(S=stft)
    feature_stats('spectral_centroid', f)
    f = librosa.feature.spectral_bandwidth(S=stft)
    feature_stats('spectral_bandwidth', f)
    f = librosa.feature.spectral_contrast(S=stft, n_bands=6)
    feature_stats('spectral_contrast', f)
    f = librosa.feature.spectral_rolloff(S=stft)
    feature_stats('spectral_rolloff', f)

    mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)
    del stft
    f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
    feature_stats('mfcc', f)

    return features

In [4]:
param = ['000002', 2]
f = compute_features(param)

computing features for: 2s of 000002.



In [5]:
tracks = tracks.head(2)
tracks

Unnamed: 0_level_0,album,album,album,album,album,album,album,album,album,album,...,track,track,track,track,track,track,track,track,track,track
Unnamed: 0_level_1,comments,date_created,date_released,engineer,favorites,id,information,listens,producer,tags,...,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,0,2008-11-26 01:44:45,2009-01-05,,4,1,<p></p>,6073,,[],...,,4656,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1293,,3,,[],Food
5,0,2008-11-26 01:44:45,2009-01-05,,4,1,<p></p>,6073,,[],...,,1933,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1151,,6,,[],This World


In [6]:
# features = pd.DataFrame(index=tracks.index,
#                         columns=columns(), dtype=np.float32)
features = pd.DataFrame(columns=columns(), dtype=np.float32)
# More than usable CPUs to be CPU bound, not I/O bound. Beware memory.
nb_workers = int(1.5 * os.cpu_count())

# Longest is ~11,000 seconds. Limit processes to avoid memory errors.
# all are lower than 600
# table = ((5000, 1), (3000, 3), (2000, 5), (1000, 10), (0, nb_workers))
# for duration, nb_workers in table:
print('Working with {} processes.'.format(nb_workers))

# tids = tracks[tracks['track', 'duration'] >= duration].index
# tracks.drop(tids, axis=0, inplace=True)

tids = ['{:06d}'.format(i) for i in tracks.index]
frame_size = 2
whole_duration = 30
durations = list(range(frame_size, whole_duration+1, frame_size))

params = [ [i, d] for i in tids for d in durations]
print(params)
print(len(params))

Working with 18 processes.
[['000002', 2], ['000002', 4], ['000002', 6], ['000002', 8], ['000002', 10], ['000002', 12], ['000002', 14], ['000002', 16], ['000002', 18], ['000002', 20], ['000002', 22], ['000002', 24], ['000002', 26], ['000002', 28], ['000002', 30], ['000005', 2], ['000005', 4], ['000005', 6], ['000005', 8], ['000005', 10], ['000005', 12], ['000005', 14], ['000005', 16], ['000005', 18], ['000005', 20], ['000005', 22], ['000005', 24], ['000005', 26], ['000005', 28], ['000005', 30]]
30


In [7]:
pool = multiprocessing.Pool(nb_workers)

# it = pool.imap_unordered(compute_features, tids)
it = pool.imap_unordered(compute_features, params)

In [8]:
for i, row in enumerate(tqdm(it, total=len(params))):
    features.loc[row.name] = row
    
    #TODO: need work. why exist NA?
    if i % 1000 == 0:
        features.to_csv('data/duration_features.csv')

features.to_csv('data/duration_features.csv')

100%|██████████| 30/30 [00:05<00:00,  5.17it/s]


In [9]:
duration_features = fma_load('data/duration_features.csv') 
duration_features.head()

feature,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,...,tonnetz,tonnetz,tonnetz,zcr,zcr,zcr,zcr,zcr,zcr,zcr
statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,kurtosis,max,mean,median,min,skew,std
number,01,02,03,04,05,06,07,08,09,10,...,04,05,06,01,01,01,01,01,01,01
000005-2,-0.739629,-0.556086,-1.225924,-0.34955,-1.432164,-1.053181,-0.004125,1.206131,-1.416585,0.967784,...,0.096407,0.026804,0.016525,1.845586,0.21582,0.065991,0.059082,0.007812,1.268123,0.039375
000002-2,-1.362567,-1.187726,0.080128,0.131664,-1.207001,1.873778,-0.605126,-0.551435,-1.265933,-0.372783,...,0.104048,0.021292,0.030223,6.50565,0.351562,0.091543,0.075195,0.011719,2.477756,0.060097
000002-4,-0.638055,-1.313917,-0.503075,1.368423,-0.402402,-0.781425,-0.343968,-0.495769,-0.870274,-0.3755,...,0.137629,0.01984,0.03112,5.951344,0.351562,0.093072,0.07959,0.006836,2.187647,0.053671
000005-4,-0.970172,-0.0538,-1.285565,-0.562506,-0.134861,-1.430826,-0.691401,1.399996,-0.498214,2.961234,...,0.087989,0.037871,0.026276,1.824047,0.225586,0.064395,0.053711,0.007812,1.429513,0.046289
000005-6,-1.137293,-0.764389,-1.313448,-0.136405,-0.31478,-1.003491,-0.629377,1.320236,-0.504661,4.350224,...,0.102904,0.03395,0.027398,2.945787,0.225586,0.056057,0.043457,0.004883,1.680217,0.042505
