In [None]:
import os

import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
from sklearn.neighbors import NearestNeighbors
from utils import *

import librosa
import librosa.display

# Load Data

In [None]:
fma_small_path = 'data/fma_small'
fma_meta_path = 'data/fma_metadata'

In [None]:
audio_paths = get_all_audio_paths(fma_small_path)

features = fma_load(f'{fma_meta_path}/features.csv')
tracks = fma_load(f'{fma_meta_path}/tracks.csv')
genres = fma_load(f'{fma_meta_path}/genres.csv')
echonest = fma_load(f'{fma_meta_path}/echonest.csv')

In [None]:
# Filter out features for small

# small = tracks['set', 'subset'] <= 'small'
# features_small = features.loc[small]
# features_small.to_csv('data/features_small.csv')
# tracks_small = tracks.loc[small]
# tracks_small.to_csv('data/tracks_small.csv')
# genres_small = genres.loc[small]
# genres_small.to_csv('data/genres_small.csv')
# echonest_small = echonest.loc[small]
# echonest_small.to_csv('data/echonest_small.csv')

# Train KNN Model

In [None]:
small = tracks['set', 'subset'] <= 'small'

train = tracks['set', 'split'] == 'training'
test = tracks['set', 'split'] == 'test'

y_train = tracks.loc[small & train, ('track', 'genre_top')]
y_test = tracks.loc[small & test, ('track', 'genre_top')]
X_train = features.loc[small & train, 'mfcc']
X_test = features.loc[small & test, 'mfcc']

print(f'{y_train.size} training examples, {y_test.size} testing examples')
print(f'{X_train.shape[1]} features, {np.unique(y_train).size} classes')

In [None]:
X_train, y_train = skl.utils.shuffle(X_train, y_train, random_state=42)

# Standardize features by removing the mean and scaling to unit variance.
scaler = skl.preprocessing.StandardScaler(copy=False)
scaler.fit_transform(X_train)
scaler.transform(X_test)

# Support vector classification.
clf = skl.svm.SVC()
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print('Accuracy: {:.2%}'.format(score))

In [None]:
nbrs = NearestNeighbors(n_neighbors=10, algorithm='auto').fit(X_train)

# Query

In [None]:
# Query one song
distances, indices = nbrs.kneighbors(X_train.head(1))
sns.lineplot(x=[str(e) for e in indices[0]], y=distances[0])
plt.xlabel('index')
plt.ylabel('distance')

In [None]:
tracks.loc[tracks['set', 'subset'] == 'small'].loc[tracks['track', 'genre_top'].notnull()].loc[tracks['track', 'date_recorded'].notnull()]