# Intro

In [1]:
%load_ext autoreload
%autoreload 

In [2]:
import os
import multiprocessing
import warnings
import numpy as np
from scipy import stats
import pandas as pd
import librosa
from tqdm import tqdm

In [3]:
from Project_Spotify_502 import utils, features

In [4]:
AUDIO_DIR = os.environ.get('AUDIO_DIR')

In [5]:
print(AUDIO_DIR)

/home/achot/code/a-pellissier/Project_Spotify_502/raw_data/fma_small/fma_small


filepath = utils.get_audio_path(AUDIO_DIR, 2)

x, sr = librosa.load(filepath, sr=None, mono=True)  # kaiser_fast

x

# Defining understood functions

In [6]:
def compute_features_from_mp3(tid):

    features = pd.Series(index=columns(), dtype=np.float32, name=tid)

    def feature_stats(name, values):
        features[name, 'mean'] = np.mean(values, axis=1)
        features[name, 'std'] = np.std(values, axis=1)
        features[name, 'skew'] = stats.skew(values, axis=1)
        features[name, 'kurtosis'] = stats.kurtosis(values, axis=1)
        features[name, 'median'] = np.median(values, axis=1)
        features[name, 'min'] = np.min(values, axis=1)
        features[name, 'max'] = np.max(values, axis=1)

    filepath = utils.get_audio_path(os.environ.get('AUDIO_DIR'), tid)
    x, sr = librosa.load(filepath, sr=None, mono=True)  # kaiser_fast
    f = librosa.feature.zero_crossing_rate(x, frame_length=2048, hop_length=512)
    feature_stats('zcr', f)

    cqt = np.abs(librosa.cqt(x, sr=sr, hop_length=512, bins_per_octave=12,
                                n_bins=7*12, tuning=None))
    assert cqt.shape[0] == 7 * 12
    assert np.ceil(len(x)/512) <= cqt.shape[1] <= np.ceil(len(x)/512)+1

    f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7)
    feature_stats('chroma_cqt', f)
    f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)
    feature_stats('chroma_cens', f)
    f = librosa.feature.tonnetz(chroma=f)
    feature_stats('tonnetz', f)

    del cqt
    stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))
    assert stft.shape[0] == 1 + 2048 // 2
    assert np.ceil(len(x)/512) <= stft.shape[1] <= np.ceil(len(x)/512)+1
    del x

    f = librosa.feature.chroma_stft(S=stft**2, n_chroma=12)
    feature_stats('chroma_stft', f)

    f = librosa.feature.rms(S=stft)
    feature_stats('rmse', f)

    f = librosa.feature.spectral_centroid(S=stft)
    feature_stats('spectral_centroid', f)
    f = librosa.feature.spectral_bandwidth(S=stft)
    feature_stats('spectral_bandwidth', f)
    f = librosa.feature.spectral_contrast(S=stft, n_bands=6)
    feature_stats('spectral_contrast', f)
    f = librosa.feature.spectral_rolloff(S=stft)
    feature_stats('spectral_rolloff', f)

    mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)
    del stft
    f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
    feature_stats('mfcc', f)

    return features

In [7]:
def compute_features(tid):

    features = pd.Series(index=columns(), dtype=np.float32, name=tid)

    # Catch warnings as exceptions (audioread leaks file descriptors).
    warnings.filterwarnings('error', module='librosa')

    def feature_stats(name, values):
        features[name, 'mean'] = np.mean(values, axis=1)
        features[name, 'std'] = np.std(values, axis=1)
        features[name, 'skew'] = stats.skew(values, axis=1)
        features[name, 'kurtosis'] = stats.kurtosis(values, axis=1)
        features[name, 'median'] = np.median(values, axis=1)
        features[name, 'min'] = np.min(values, axis=1)
        features[name, 'max'] = np.max(values, axis=1)

    try:
        filepath = utils.get_audio_path(os.environ.get('AUDIO_DIR'), tid)
        x, sr = librosa.load(filepath, sr=None, mono=True)  # kaiser_fast

        f = librosa.feature.zero_crossing_rate(x, frame_length=2048, hop_length=512)
        feature_stats('zcr', f)

        cqt = np.abs(librosa.cqt(x, sr=sr, hop_length=512, bins_per_octave=12,
                                 n_bins=7*12, tuning=None))
        assert cqt.shape[0] == 7 * 12
        assert np.ceil(len(x)/512) <= cqt.shape[1] <= np.ceil(len(x)/512)+1

        f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7)
        feature_stats('chroma_cqt', f)
        f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)
        feature_stats('chroma_cens', f)
        f = librosa.feature.tonnetz(chroma=f)
        feature_stats('tonnetz', f)

        del cqt
        stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))
        assert stft.shape[0] == 1 + 2048 // 2
        assert np.ceil(len(x)/512) <= stft.shape[1] <= np.ceil(len(x)/512)+1
        del x

        f = librosa.feature.chroma_stft(S=stft**2, n_chroma=12)
        feature_stats('chroma_stft', f)

        f = librosa.feature.rmse(S=stft)
        feature_stats('rmse', f)

        f = librosa.feature.spectral_centroid(S=stft)
        feature_stats('spectral_centroid', f)
        f = librosa.feature.spectral_bandwidth(S=stft)
        feature_stats('spectral_bandwidth', f)
        f = librosa.feature.spectral_contrast(S=stft, n_bands=6)
        feature_stats('spectral_contrast', f)
        f = librosa.feature.spectral_rolloff(S=stft)
        feature_stats('spectral_rolloff', f)

        mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)
        del stft
        f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
        feature_stats('mfcc', f)

    except Exception as e:
        print('{}: {}'.format(tid, repr(e)))

    return features

In [8]:
def columns():
    feature_sizes = dict(chroma_stft=12, chroma_cqt=12, chroma_cens=12,
                         tonnetz=6, mfcc=20, rmse=1, zcr=1,
                         spectral_centroid=1, spectral_bandwidth=1,
                         spectral_contrast=7, spectral_rolloff=1)
    moments = ('mean', 'std', 'skew', 'kurtosis', 'median', 'min', 'max')

    columns = []
    for name, size in feature_sizes.items():
        for moment in moments:
            it = ((name, moment, '{:02d}'.format(i+1)) for i in range(size))
            columns.extend(it)

    names = ('feature', 'statistics', 'number')
    columns = pd.MultiIndex.from_tuples(columns, names=names)

    # More efficient to slice if indexes are sorted.
    return columns.sort_values()

In [9]:
def save(features, ndigits):

    # Should be done already, just to be sure.
    features.sort_index(axis=0, inplace=True)
    features.sort_index(axis=1, inplace=True)

    features.to_csv('features_new.csv', float_format='%.{}e'.format(ndigits))

In [10]:
def compute_features_from_filepath(name, filepath):

    features = pd.Series(index=columns(), dtype=np.float32, name=name)

    def feature_stats(name, values):
        features[name, 'mean'] = np.mean(values, axis=1)
        features[name, 'std'] = np.std(values, axis=1)
        features[name, 'skew'] = stats.skew(values, axis=1)
        features[name, 'kurtosis'] = stats.kurtosis(values, axis=1)
        features[name, 'median'] = np.median(values, axis=1)
        features[name, 'min'] = np.min(values, axis=1)
        features[name, 'max'] = np.max(values, axis=1)

    filepath = filepath
    x, sr = librosa.load(filepath, sr=None, mono=True)  # kaiser_fast
    f = librosa.feature.zero_crossing_rate(x, frame_length=2048, hop_length=512)
    feature_stats('zcr', f)

    cqt = np.abs(librosa.cqt(x, sr=sr, hop_length=512, bins_per_octave=12,
                                n_bins=7*12, tuning=None))
    assert cqt.shape[0] == 7 * 12
    assert np.ceil(len(x)/512) <= cqt.shape[1] <= np.ceil(len(x)/512)+1

    f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7)
    feature_stats('chroma_cqt', f)
    f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)
    feature_stats('chroma_cens', f)
    f = librosa.feature.tonnetz(chroma=f)
    feature_stats('tonnetz', f)

    del cqt
    stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))
    assert stft.shape[0] == 1 + 2048 // 2
    assert np.ceil(len(x)/512) <= stft.shape[1] <= np.ceil(len(x)/512)+1
    del x

    f = librosa.feature.chroma_stft(S=stft**2, n_chroma=12)
    feature_stats('chroma_stft', f)

    f = librosa.feature.rms(S=stft)
    feature_stats('rmse', f)

    f = librosa.feature.spectral_centroid(S=stft)
    feature_stats('spectral_centroid', f)
    f = librosa.feature.spectral_bandwidth(S=stft)
    feature_stats('spectral_bandwidth', f)
    f = librosa.feature.spectral_contrast(S=stft, n_bands=6)
    feature_stats('spectral_contrast', f)
    f = librosa.feature.spectral_rolloff(S=stft)
    feature_stats('spectral_rolloff', f)

    mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)
    del stft
    f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
    feature_stats('mfcc', f)

    return features


# Understanding main function

In [11]:
tracks = utils.load('../raw_data/fma_metadata/tracks.csv')

In [12]:
tracks.shape

(106574, 52)

In [13]:
features = pd.DataFrame(index=tracks.index, columns=columns(), dtype=np.float32)

In [14]:
nb_workers = int(1.5 * len(os.sched_getaffinity(0)))

In [15]:
nb_workers

12

In [16]:
tids = tracks[tracks['set', 'subset'] == 'small'][tracks['album','comments'] > 9].index

  tids = tracks[tracks['set', 'subset'] == 'small'][tracks['album','comments'] > 9].index


In [17]:
tids

Int64Index([ 42372,  42373,  42374,  42375,  42376,  42377,  65063,  65064,
             73760,  73761,  73762,  73763,  73764,  73765,  73766,  73767,
             73768,  73769,  73770,  73771,  73772,  73773,  73774,  73775,
             73776,  73777,  73778,  73779, 109896, 109900, 109901, 109902,
            109903, 109904, 109905, 109906],
           dtype='int64', name='track_id')

In [18]:
pool = multiprocessing.Pool(nb_workers)















In [19]:
it = pool.imap_unordered(compute_features, tids)

In [20]:
for i, row in enumerate(tqdm(it, total=len(tids))):
    features.loc[row.name] = row

    if i % 1000 == 0:
        save(features, 10)

100%|██████████| 36/36 [00:03<00:00, 10.66it/s]


# Testing

In [21]:
metrics_wedding_ring = compute_features_from_filepath('The_Wedding_Ring', '/home/achot/code/a-pellissier/Project_Spotify_502/raw_data/Harrius_-_03_-_03_The_Wedding_Ring.mp3')



In [22]:
metrics_international_hello = compute_features_from_filepath('The_Wedding_Ring', '/home/achot/code/a-pellissier/Project_Spotify_502/raw_data/Monoshock_-_02_-_International_Hello.mp3')



In [23]:
delta_wedding_ring = pd.DataFrame(dict(delta=features.loc[20818,] - metrics_wedding_ring)).transpose()

In [24]:
# comparer les normes avec deux tracks qui n'ont rien à voir pour avoir un benchmark

In [25]:
np.linalg.norm(delta_full_sample.to_numpy())

NameError: name 'delta_full_sample' is not defined

In [None]:
delta_full_sample.to_numpy().shape

In [None]:
features_categories = np.unique(features.columns.to_frame().feature)

In [None]:
features_categories

In [None]:
np.linalg.norm(delta_full_sample['mfcc'].to_numpy())

In [None]:
def delta_table(delta_df):
    deltas = []
    for cat in features_categories:
        deltas.append(np.linalg.norm(delta_df[cat].to_numpy()))

    deltas = pd.DataFrame(dict(feature_category = features_categories, delta_l2_norm = deltas))
    return deltas

In [None]:
delta_table(delta_wedding_ring)

In [None]:
delta_international_hello = pd.DataFrame(dict(delta=features.loc[20818,] - metrics_international_hello)).transpose()

In [None]:
delta_table(delta_international_hello)

In [None]:
metrics_wedding_30sec = compute_features_from_filepath('The_Wedding_Ring_30_sec', '/home/achot/code/a-pellissier/Project_Spotify_502/raw_data/fma_small/fma_small/020/020818.mp3')

In [None]:
metrics_wedding_30sec

In [None]:
delta_wedding_ring_30_sec = pd.DataFrame(dict(delta=features.loc[20818,] - metrics_wedding_30sec)).transpose()

In [None]:
delta_table(delta_wedding_ring_30_sec)

# Loading new dataset

In [26]:
features_new_extract_first30 = pd.read_csv('../raw_data/features_new_30k.csv', index_col=0, header = [0,1,2])

In [27]:
features_new_extract_lastpart = pd.read_csv('../Project_Spotify_502/features_new.csv', index_col=0, header = [0,1,2])

In [None]:
features

In [44]:
tracks = utils.load('../raw_data/fma_metadata/tracks.csv')

In [45]:
features_new_extract_first30[tracks.index < 30000][tracks['set','subset']=='small']

  features_new_extract[tracks.index < 30000][tracks['set','subset']=='small']


feature,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,...,tonnetz,tonnetz,tonnetz,zcr,zcr,zcr,zcr,zcr,zcr,zcr
statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,kurtosis,max,mean,median,min,skew,std
number,01,02,03,04,05,06,07,08,09,10,...,04,05,06,01,01,01,01,01,01,01
track_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2,-0.234454,-0.723562,-0.550151,-0.684790,-0.545918,0.992595,0.817805,1.003109,-0.779054,0.436601,...,0.104404,0.029344,0.037927,3.496868,0.416504,0.098366,0.078613,0.006348,1.825609,0.068404
5,-0.565442,-0.483677,-1.312427,-0.399168,-0.251954,-0.492181,-0.794685,0.155258,-0.777880,2.738015,...,0.112733,0.031024,0.027122,6.147398,0.320801,0.059331,0.047852,0.002930,2.089943,0.047805
10,-0.086666,-0.720077,0.351088,-0.277289,-0.655878,-0.764400,-0.493031,-0.049194,-0.493217,-0.715003,...,0.062708,0.025836,0.040896,2.130873,0.188477,0.081014,0.079102,0.028320,0.907677,0.020832
140,-0.269109,1.616986,0.690625,0.394412,-0.514087,-1.105226,0.848205,-0.183420,-0.466267,-1.212579,...,0.230912,0.069595,0.086311,18.315052,0.353027,0.027262,0.014648,0.000000,3.912465,0.037718
141,0.346136,-0.344943,-0.237539,-0.814522,-0.476990,-1.210633,-0.537783,-0.756438,0.816724,0.152150,...,0.205202,0.094768,0.057476,34.342533,0.330566,0.034126,0.025879,0.000000,5.083410,0.033770
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29807,-0.871498,0.166901,1.063830,-0.557027,2.718152,-1.100499,-0.372705,2.466767,-1.839830,-0.774753,...,0.065963,0.044353,0.063405,10.500811,0.204102,0.041067,0.040527,0.005859,1.883840,0.018854
29813,0.012643,-1.054969,0.296404,-0.196315,1.452351,-1.173816,-1.385333,-0.998947,-1.095647,-0.795238,...,0.109983,0.036542,0.052422,-0.488918,0.124512,0.049573,0.048828,0.005371,0.326838,0.024831
29816,-1.234842,-1.363108,-0.559725,-0.038281,0.162651,1.328081,0.451713,-0.977526,-1.057003,-1.352387,...,0.121198,0.066619,0.061851,2.935960,0.137695,0.036209,0.033203,0.002930,1.302138,0.018952
29961,-0.738321,-0.751253,-0.711987,-1.236735,-1.155765,-0.857559,-0.958115,-0.597951,-0.643578,-0.934821,...,0.135111,0.032434,0.037415,0.273363,0.274902,0.075655,0.050537,0.003906,1.231881,0.064237


In [32]:
features_new_extract_lastpart[tracks.index >= 30000][tracks['set','subset']=='small']

  features_new_extract_lastpart[tracks.index >= 30000][tracks['set','subset']=='small']


feature,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,...,tonnetz,tonnetz,tonnetz,zcr,zcr,zcr,zcr,zcr,zcr,zcr
statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,kurtosis,max,mean,median,min,skew,std
number,01,02,03,04,05,06,07,08,09,10,...,04,05,06,01,01,01,01,01,01,01
track_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
30041,-0.746030,-1.649857,-1.120620,-0.980195,0.241229,-0.168819,-1.392467,0.288521,2.668109,5.155762,...,0.160262,0.045294,0.032419,0.711287,0.131836,0.032594,0.026611,0.002930,1.069415,0.021163
30043,-0.902688,1.039840,-0.889394,-0.249334,-0.242329,-0.778594,1.470717,-0.906222,0.518828,-1.052623,...,0.118261,0.031785,0.041147,10.223324,0.259277,0.030482,0.020508,0.002441,2.749270,0.031301
30050,-0.449988,-0.067020,-0.317329,-0.245840,-0.963507,-0.113025,-0.679039,0.666721,1.699054,1.463093,...,0.076344,0.028356,0.031217,3.367347,0.230469,0.055703,0.050293,0.004883,1.479655,0.031566
30056,-0.053871,-0.497621,1.081104,0.298895,-0.580378,-0.796130,-1.338921,-0.215662,-0.033094,0.030424,...,0.144374,0.040801,0.072783,2.228630,0.112305,0.026948,0.021973,0.000000,1.307655,0.015387
30058,-0.411841,0.579794,-0.748917,-1.142410,-0.962254,-0.094058,-1.038614,0.072684,-0.731750,-0.341752,...,0.117326,0.043082,0.022605,1.482603,0.115723,0.030255,0.026367,0.003418,1.237760,0.019932
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154308,-1.047428,0.136716,1.697985,-0.793889,0.649678,-0.662867,-1.362654,-0.705569,-1.064591,2.664282,...,0.188863,0.063958,0.131238,10.709531,0.324219,0.042116,0.037598,0.002441,2.681788,0.037111
154309,2.153042,-1.181223,-0.268857,-1.099372,-1.209430,-0.831267,-0.470001,7.136765,0.411933,1.636677,...,0.285518,0.054292,0.078645,6.474872,0.536133,0.078302,0.042480,0.007812,2.375333,0.086856
154413,-1.235917,-1.426050,0.055181,4.777861,-0.562822,3.312628,5.718841,8.667578,-1.528110,1.774721,...,0.458426,0.099747,0.138526,15.444253,0.176758,0.020765,0.016113,0.000977,2.908938,0.018920
154414,-0.285016,-0.617669,1.145069,-0.804947,-1.300349,-1.039903,-1.185792,1.309471,-0.258665,-0.987307,...,0.162793,0.070587,0.072798,6.249998,0.254395,0.049925,0.037109,0.008789,2.324980,0.037535


In [35]:
features_new_extract = pd.concat([features_new_extract_first30.dropna(),features_new_extract_lastpart.dropna()])

In [36]:
features_new_extract

feature,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,...,tonnetz,tonnetz,tonnetz,zcr,zcr,zcr,zcr,zcr,zcr,zcr
statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,kurtosis,max,mean,median,min,skew,std
number,01,02,03,04,05,06,07,08,09,10,...,04,05,06,01,01,01,01,01,01,01
track_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2,-0.234454,-0.723562,-0.550151,-0.684790,-0.545918,0.992595,0.817805,1.003109,-0.779054,0.436601,...,0.104404,0.029344,0.037927,3.496868,0.416504,0.098366,0.078613,0.006348,1.825609,0.068404
5,-0.565442,-0.483677,-1.312427,-0.399168,-0.251954,-0.492181,-0.794685,0.155258,-0.777880,2.738015,...,0.112733,0.031024,0.027122,6.147398,0.320801,0.059331,0.047852,0.002930,2.089943,0.047805
10,-0.086666,-0.720077,0.351088,-0.277289,-0.655878,-0.764400,-0.493031,-0.049194,-0.493217,-0.715003,...,0.062708,0.025836,0.040896,2.130873,0.188477,0.081014,0.079102,0.028320,0.907677,0.020832
140,-0.269109,1.616986,0.690625,0.394412,-0.514087,-1.105226,0.848205,-0.183420,-0.466267,-1.212579,...,0.230912,0.069595,0.086311,18.315052,0.353027,0.027262,0.014648,0.000000,3.912465,0.037718
141,0.346136,-0.344943,-0.237539,-0.814522,-0.476990,-1.210633,-0.537783,-0.756438,0.816724,0.152150,...,0.205202,0.094768,0.057476,34.342533,0.330566,0.034126,0.025879,0.000000,5.083410,0.033770
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154308,-1.047428,0.136716,1.697985,-0.793889,0.649678,-0.662867,-1.362654,-0.705569,-1.064591,2.664282,...,0.188863,0.063958,0.131238,10.709531,0.324219,0.042116,0.037598,0.002441,2.681788,0.037111
154309,2.153042,-1.181223,-0.268857,-1.099372,-1.209430,-0.831267,-0.470001,7.136765,0.411933,1.636677,...,0.285518,0.054292,0.078645,6.474872,0.536133,0.078302,0.042480,0.007812,2.375333,0.086856
154413,-1.235917,-1.426050,0.055181,4.777861,-0.562822,3.312628,5.718841,8.667578,-1.528110,1.774721,...,0.458426,0.099747,0.138526,15.444253,0.176758,0.020765,0.016113,0.000977,2.908938,0.018920
154414,-0.285016,-0.617669,1.145069,-0.804947,-1.300349,-1.039903,-1.185792,1.309471,-0.258665,-0.987307,...,0.162793,0.070587,0.072798,6.249998,0.254395,0.049925,0.037109,0.008789,2.324980,0.037535


In [37]:
features_new_extract.to_csv('../raw_data/features_own_small.csv')

In [49]:
tracks.loc[features_new_extract.dropna().index,:].set.subset

track_id
2        small
5        small
10       small
140      small
141      small
         ...  
29807    small
29813    small
29816    small
29961    small
29971    small
Name: subset, Length: 1216, dtype: category
Categories (3, object): ['small' < 'medium' < 'large']

In [None]:
features_new_extract[tracks.set.subset == 'small']

In [42]:
tracks[np.logical_and(tracks.index >= 30000,tracks.index < 60000)][tracks['set','subset'] == 'small']

  tracks[np.logical_and(tracks.index >= 30000,tracks.index < 60000)][tracks['set','subset'] == 'small']


Unnamed: 0_level_0,album,album,album,album,album,album,album,album,album,album,...,track,track,track,track,track,track,track,track,track,track
Unnamed: 0_level_1,comments,date_created,date_released,engineer,favorites,id,information,listens,producer,tags,...,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
30041,0,2010-05-14 11:07:08,2009-04-27,MC Cullah,0,6390,<p>This album is lost somewhere in between Ele...,19068,MC Cullah,"[electronica, experimental, indie]",...,,2312,,Attribution-ShareAlike,1763,,2,,"[electronica, experimental, indie]",Rocket Into The Future
30043,0,2010-05-14 11:07:08,2009-04-27,MC Cullah,0,6390,<p>This album is lost somewhere in between Ele...,19068,MC Cullah,"[electronica, experimental, indie]",...,,6730,,Attribution-ShareAlike,5130,,5,,"[electronica, experimental, indie]",Soul Hip Hop
30050,0,2010-05-14 11:29:43,NaT,,0,6391,,16069,,[],...,,4023,,Attribution-ShareAlike,3356,,2,,[],Accordion
30056,0,2010-05-14 11:29:43,NaT,,0,6391,,16069,,[],...,,2095,,Attribution-ShareAlike,1652,,8,,[],Gypsy
30058,0,2010-05-14 11:29:43,NaT,,0,6391,,16069,,[],...,,1710,,Attribution-ShareAlike,1189,,10,,[],JazzBass
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59726,0,2012-02-01 16:17:31,NaT,,0,10964,<p>The Rosen Sisters (Ariana and Amberly both ...,9294,,"[rosen sisters, violin, greece, greek, swing, ...",...,,1487,,Attribution-Noncommercial-No Derivative Works ...,1246,,8,,"[rosen sisters, violin, greece, greek, swing, ...",Gravel Walk
59727,0,2012-02-01 16:17:31,NaT,,0,10964,<p>The Rosen Sisters (Ariana and Amberly both ...,9294,,"[rosen sisters, violin, greece, greek, swing, ...",...,,2194,,Attribution-Noncommercial-No Derivative Works ...,1945,,9,,"[rosen sisters, violin, greece, greek, swing, ...",In The Mood
59823,0,2012-02-03 08:42:13,NaT,,0,10978,,4129,,[],...,,2700,,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1233,,15,,[],Cumbia De Los Locos
59876,0,2012-02-03 14:24:46,NaT,,3,10984,"<p><strong><a title=""http://www.newtownradio.c...",27059,,"[glam pop, new wave]",...,,3529,,Attribution-Noncommercial-No Derivative Works ...,2881,,7,,"[glam pop, new wave]",Lovedropper


In [34]:
tracks[tracks['set','subset'] == 'small'][]

Unnamed: 0_level_0,album,album,album,album,album,album,album,album,album,album,...,track,track,track,track,track,track,track,track,track,track
Unnamed: 0_level_1,comments,date_created,date_released,engineer,favorites,id,information,listens,producer,tags,...,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,0,2008-11-26 01:44:45,2009-01-05,,4,1,<p></p>,6073,,[],...,,4656,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1293,,3,,[],Food
5,0,2008-11-26 01:44:45,2009-01-05,,4,1,<p></p>,6073,,[],...,,1933,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1151,,6,,[],This World
10,0,2008-11-26 01:45:08,2008-02-06,,4,6,,47632,,[],...,,54881,en,Attribution-NonCommercial-NoDerivatives (aka M...,50135,,1,,[],Freeway
140,1,2008-11-26 01:49:59,2007-05-22,,1,61,<p>Alec K. Redfearn &amp; The Eyesores: Ellen ...,1300,"Alec K. Refearn, Rob Pemberton",[],...,,1593,en,Attribution-Noncommercial-No Derivative Works ...,1299,,2,,[],Queen Of The Wires
141,0,2008-11-26 01:49:57,2009-01-16,,1,60,"<p>A full ensamble of strings, drums, electron...",1304,,[],...,,839,en,Attribution-Noncommercial-No Derivative Works ...,725,,4,,[],Ohio
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154308,0,2017-03-05 04:57:38,2017-03-05,,0,22780,,22334,Fleslit,"[fleslit, trap beat free use, trap beat free d...",...,,3371,,Attribution,2705,,17,,"[fleslit, trap beat free use, trap beat free d...",MIA
154309,0,2017-03-05 04:57:38,2017-03-05,,0,22780,,22334,Fleslit,"[fleslit, trap beat free use, trap beat free d...",...,,4525,,Attribution,3589,,18,,"[fleslit, trap beat free use, trap beat free d...",A1 Symphony
154413,0,2017-03-07 18:44:11,NaT,Ernie Indradat,0,22789,<p>A live performance at WFMU for Dark Night o...,3777,Julie Bennack,[],...,,809,,Creative Commons Attribution-NonCommercial-NoD...,676,,9,,[],Do Easy
154414,0,2017-03-07 18:44:11,NaT,Ernie Indradat,0,22789,<p>A live performance at WFMU for Dark Night o...,3777,Julie Bennack,[],...,,851,,Creative Commons Attribution-NonCommercial-NoD...,788,,10,,[],Dead Can Dance (uncensored)
