In [None]:
import os

import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
from sklearn.neighbors import NearestNeighbors
from utils import *

import pickle

import librosa
import librosa.display

# Load Data

In [None]:
data_path = 'data'
fma_small_path = f'{data_path}/fma_small'
fma_meta_path = f'{data_path}/fma_metadata'

In [None]:
audio_paths = get_all_audio_paths(fma_small_path)

# Filter out features for small

# features = fma_load(f'{fma_meta_path}/features.csv')
# tracks = fma_load(f'{fma_meta_path}/tracks.csv')
# genres = fma_load(f'{fma_meta_path}/genres.csv')
# echonest = fma_load(f'{fma_meta_path}/echonest.csv')

# small = tracks['set', 'subset'] <= 'small'
# features_small = features.loc[small]
# features_small.to_csv('data/features_small.csv')
# tracks_small = tracks.loc[small]
# tracks_small.to_csv('data/tracks_small.csv')
# genres_small = genres.loc[small]
# genres_small.to_csv('data/genres_small.csv')
# echonest_small = echonest.loc[small]
# echonest_small.to_csv('data/echonest_small.csv')

features = fma_load(f'{data_path}/features_small.csv')
tracks = fma_load(f'{data_path}/tracks_small.csv')

# Select Features

In [None]:
train = tracks['set', 'split'] == 'training'
test = tracks['set', 'split'] == 'test'

y_train = tracks.loc[train, ('track', 'genre_top')]
y_test = tracks.loc[test, ('track', 'genre_top')]

X_train = features.loc[train]
X_test = features.loc[test]

print(f'{y_train.size} training examples, {y_test.size} testing examples')
print(f'{X_train.shape[1]} features, {np.unique(y_train).size} classes')

# Model

In [None]:
X_train, y_train = skl.utils.shuffle(X_train, y_train, random_state=42)

# Standardize features by removing the mean and scaling to unit variance.
scaler = skl.preprocessing.StandardScaler(copy=False)
# scaler.fit_transform(X_train)
# scaler.transform(X_test)

i_to_id = X_train.index
with open(f'{data_path}/i_to_id.pkl', 'wb') as f:
    pickle.dump(i_to_id, f)

nbrs = NearestNeighbors(n_neighbors=11, algorithm='auto').fit(X_train)
with open(f'{data_path}/all_features_nn.pkl', 'wb') as f:
    pickle.dump(nbrs, f)

# Query

In [None]:
# Query one song
tid = 121346
audio_feature = features[features.index == tid]
distances, indices = nbrs.kneighbors(audio_feature)
print(audio_feature.index)
sns.lineplot(x=[str(e) for e in i_to_id[indices[0]]], y=distances[0])
plt.xlabel('index')
plt.ylabel('distance')