In [132]:
import pandas as pd
import sklearn.manifold, sklearn.preprocessing, sklearn.decomposition
import numpy as np
import tqdm
import librosa
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import sys; sys.path.insert(0, '..')

import src
# from src.dataset.dataset import get_dataset

In [137]:
config = {
    'frame_size': 2048,
    'hop_ratio': 1,
    'n_coeff': 20,
    'sr': 10000,
    'sample_duration': 5,
    'feature': 'MFCC_welch',
}

data = pd.read_pickle('../data/processed/dataset_cremona.pkl')

def get_dataset(sample_duration=5):
    config['sample_duration'] = sample_duration
    features = []
    config['hop_size'] = config['frame_size'] // config['hop_ratio']
    for index, row in data.iterrows():
        y, _ = librosa.load(str(row['file']), sr=config['sr'])
        ltas = librosa.stft(y=y, n_fft=config['frame_size'], hop_length=config['hop_size']).T
        ltas = np.log10(np.abs(ltas) ** 2 + 1e-15)
        np.random.shuffle(ltas)

        part_size = config['sr'] * config['sample_duration'] // config['hop_size']
        for part in np.array_split(ltas, np.arange(part_size, ltas.shape[0], part_size)):
            feature = np.median(part, axis=0)
            dic = row.to_dict()
            dic.update(
                features=feature,
            )
            features.append(dic)

    features = pd.DataFrame(features)
    X = np.vstack(features.features)
    y = sklearn.preprocessing.LabelEncoder().fit_transform(features.violin)
    features.violin = y

    return X, y

In [141]:
@interact(n_neighbors=(2,100,1), sample_duration=(1,30,1))
def plot(n_neighbors=5, sample_duration=5):
    X, y = get_dataset(sample_duration=sample_duration)

    # embedding = sklearn.manifold.Isomap(n_components=2, n_neighbors=n_neighbors)
    embedding = sklearn.manifold.TSNE(n_components=2, max_iter=50000)
    # embedding = sklearn.manifold.MDS(n_components=2, max_iter=500)
    # embedding = sklearn.decomposition.PCA(n_components=2)
    X_transformed = embedding.fit_transform(X)
    X_transformed = sklearn.preprocessing.MinMaxScaler().fit_transform(X_transformed)

    fig, ax = plt.subplots(figsize=(8,8))
    # ax = fig.add_subplot(projection='3d')

    for violin in set(features.violin):
        ax.scatter(*X_transformed[y == violin].T,
            color=plt.cm.Spectral(violin / 16),
            # marker=f"${violin}$",
        )
    # ax.colorbar()
    plt.show()

interactive(children=(IntSlider(value=5, description='n_neighbors', min=2), IntSlider(value=5, description='sa…