In [None]:
import os

import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
from sklearn.neighbors import NearestNeighbors
from utils import *

import pickle

import librosa
import librosa.display

import numpy as np
from scipy import stats

from tqdm import tqdm
import multiprocessing

from datetime import datetime

data_path = 'data'
fma_small_path = f'{data_path}/fma_small'
fma_meta_path = f'{data_path}/fma_metadata'

# Load Data

In [None]:
audio_paths = get_all_audio_paths(fma_small_path)

# Filter out features for small

# features = fma_load(f'{fma_meta_path}/features.csv')
# tracks = fma_load(f'{fma_meta_path}/tracks.csv')
# genres = fma_load(f'{fma_meta_path}/genres.csv')
# echonest = fma_load(f'{fma_meta_path}/echonest.csv')

# small = tracks['set', 'subset'] <= 'small'
# features_small = features.loc[small]
# features_small.to_csv('data/features_small.csv')
# tracks_small = tracks.loc[small]
# tracks_small.to_csv('data/tracks_small.csv')
# genres_small = genres.loc[small]
# genres_small.to_csv('data/genres_small.csv')
# echonest_small = echonest.loc[small]
# echonest_small.to_csv('data/echonest_small.csv')

features = fma_load(f'{data_path}/features_small.csv')
tracks = fma_load(f'{data_path}/tracks_small.csv')

# Model

In [None]:
features = skl.utils.shuffle(features, random_state=42)

# Standardize features by removing the mean and scaling to unit variance.
scaler = skl.preprocessing.StandardScaler(copy=False)
scaler.fit_transform(features)

i_to_id = features.index
with open(f'{data_path}/i_to_id.pkl', 'wb') as f:
    pickle.dump(i_to_id, f)

nbrs = NearestNeighbors(n_neighbors=11, algorithm='auto').fit(features)
with open(f'{data_path}/all_features_nn.pkl', 'wb') as f:
    pickle.dump(nbrs, f)

# Query

In [None]:
# Query one song
tid = 121346
audio_feature = features[features.index == tid]
distances, indices = nbrs.kneighbors(audio_feature)
print(audio_feature.index)
print([str(e) for e in i_to_id[indices[0]]])
sns.lineplot(x=[str(e) for e in i_to_id[indices[0]]], y=distances[0])
plt.xlabel('index')
plt.ylabel('distance')
# ['121346', '130986', '48492', '1680', '70174', '145708', '127286', '154414', '145653', '127193', '1685']
# ['121346', '22472', '22473', '121317', '37538', '121322', '9152', '56496', '49039', '86415', '142092']

# Tempo / Beats Features

In [None]:
audio_paths = get_all_audio_paths(fma_small_path)

def compute_beats_features(path):

    def tid_from_path(p):
        return p.split('/')[-1].split('.')[0]
    
    def feature_stat(name, values):
        if len(values) == 0:
            features[f'{name}_mean'] = 0
            features[f'{name}_std'] = 0
            # features[f'{name}_skew'] = 0
            # features[f'{name}_kurtosis'] = 0
            features[f'{name}_median'] = 0
            features[f'{name}_min'] = 0
            features[f'{name}_max'] = 0
        else:
            features[f'{name}_mean'] = np.mean(values)
            features[f'{name}_std'] = np.std(values)
            # features[f'{name}_skew'] = stats.skew(values)
            # features[f'{name}_kurtosis'] = stats.kurtosis(values)
            features[f'{name}_median'] = np.median(values)
            features[f'{name}_min'] = np.min(values)
            features[f'{name}_max'] = np.max(values)

    tid = tid_from_path(path)
    features = pd.Series(dtype=np.float32, name=tid)
    
    y, sr = librosa.load(path)
    # print(f'y: {y}\nsr: {sr}')

    # Tempo and Beats
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    # print(f'tempo: {tempo}\nbeats({len(beats)}):\n{beats}')
    features['tempo'] = tempo
    feature_stat('beats', beats)

    stft = np.abs(librosa.stft(y, n_fft=2048, hop_length=512))
    rms = librosa.feature.rms(S=stft)
    feature_stat('rms', rms)
    
    return features

In [None]:
stat_names = ['_mean', '_std', '_median', '_min', '_max']
feature_names = ['beats', 'rms']
col_names = ['tempo']
col_names = col_names + [f+stat for f in feature_names for stat in stat_names]

In [None]:
# More than usable CPUs to be CPU bound, not I/O bound. Beware memory.
# nb_workers = int(1.5 * len(os.sched_getaffinity(0))) # only in ceratin os
nb_workers = int(os.cpu_count())

print(f'Working with {nb_workers} processes.')

pool = multiprocessing.Pool(nb_workers)


audio_paths = get_all_audio_paths(fma_small_path)

# audios that are not able to load
audio_paths.remove('data/fma_small/133/133297.mp3')
audio_paths.remove('data/fma_small/099/099134.mp3')
audio_paths.remove('data/fma_small/108/108925.mp3')

# remove tracks that are already computed 
# bf = pd.read_csv(f'{data_path}/beats_features.csv', index_col=0)
# print(f'start with {len(audio_paths)} tracks')
# tids = ['{:06d}'.format(i) for i in bf.index]
# for path in [f'{fma_small_path}/{i[0:3]}/{i}.mp3' for i in tids]:
#     if path in audio_paths:
#         audio_paths.remove(path)
#     else:
#         print(f'{path} not in.')
# print(f'end with {len(audio_paths)} tracks')

it = pool.imap_unordered(compute_beats_features, audio_paths)
beats_features = pd.DataFrame(columns=col_names)

for i, row in enumerate(tqdm(it, total=len(audio_paths))):
    beats_features.loc[row.name] = row

    if i % 500 == 0:
        beats_features.to_csv(f'{data_path}/beats_features_{datetime.now().strftime("%H:%M")}.csv')

beats_features.to_csv(f'{data_path}/beats_features.csv')


# Beats Nearest Neighbor

In [None]:
beats_features = pd.read_csv(f'{data_path}/beats_features.csv', index_col=0)

scaler = skl.preprocessing.StandardScaler(copy=False)
scaler.fit_transform(beats_features)

beats_i_to_id = beats_features.index
with open(f'{data_path}/beats_i_to_id.pkl', 'wb') as f:
    pickle.dump(beats_i_to_id, f)

beats_nbrs = NearestNeighbors(n_neighbors=11, algorithm='auto').fit(beats_features)
with open(f'{data_path}/beats_features_nn.pkl', 'wb') as f:
    pickle.dump(beats_nbrs, f)

In [None]:
tid = 121346
audio_feature = beats_features[beats_features.index == tid]
distances, indices = beats_nbrs.kneighbors(audio_feature)
print(audio_feature.index)
print([str(e) for e in beats_i_to_id[indices[0]]])
sns.lineplot(x=[str(e) for e in beats_i_to_id[indices[0]]], y=distances[0])
plt.xlabel('index')
plt.ylabel('distance')

# Timbre Feature

In [None]:
features = fma_load(f'{data_path}/features_small.csv')
tracks = fma_load(f'{data_path}/tracks_small.csv')

# train = tracks['set', 'split'] == 'training'
# test = tracks['set', 'split'] == 'test'

# y_train = tracks.loc[train, ('track', 'genre_top')]
# y_test = tracks.loc[test, ('track', 'genre_top')]

# X_train = features.loc[train]
# # X_test = features.loc[test]

timbre_features = features['mfcc']

# print(f'{y_train.size} training examples, {y_test.size} testing examples')
# print(f'{X_train.shape[1]} features, {np.unique(y_train).size} classes')

In [None]:
timbre_i_to_id = timbre_features.index

scaler = skl.preprocessing.StandardScaler(copy=False)
scaler.fit_transform(timbre_features)

with open(f'{data_path}/timbre_i_to_id.pkl', 'wb') as f:
    pickle.dump(timbre_i_to_id, f)

timbre_nbrs = NearestNeighbors(n_neighbors=11, algorithm='auto').fit(timbre_features)
with open(f'{data_path}/timbre_features_nn.pkl', 'wb') as f:
    pickle.dump(timbre_nbrs, f)

In [None]:
tid = 121346
audio_feature = timbre_features[timbre_features.index == tid]
distances, indices = timbre_nbrs.kneighbors(audio_feature)
print(audio_feature.index)
print([str(e) for e in timbre_i_to_id[indices[0]]])
sns.lineplot(x=[str(e) for e in timbre_i_to_id[indices[0]]], y=distances[0])
plt.xlabel('index')
plt.ylabel('distance')