In [8]:
from sklearn.neighbors import KernelDensity
from utils import paths
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy

In [61]:
piece = 'D960'
deviations_path = os.path.join(paths.get_root_folder(), 'processed data', piece, 'deviations')
deviations_names = paths.get_files(deviations_path)
training_data = pd.DataFrame()

for deviation in deviations_names:
    data_path = os.path.join(deviations_path, deviation)
    performer = deviation.split('-')[0]
    data = pd.read_json(data_path)
    data['performer'] = performer
    training_data = pd.concat([training_data, data])

In [62]:
columns = ['time_onset', 'time_offset', 'velocity_onset', 'velocity_offset','duration']
std_columns = [name + '_standardized' for name in columns]
training_data = training_data[std_columns + ['performer']]
training_data

Unnamed: 0,time_onset_standardized,time_offset_standardized,velocity_onset_standardized,velocity_offset_standardized,duration_standardized,performer
P1-0-5,0.297720,0.401625,0.918947,-0.848752,0.346522,p8
P1-1-1,0.251078,0.532817,1.306164,2.336603,1.770084,p8
P1-1-10,0.145964,0.246683,0.992984,-0.523058,1.445022,p8
P1-1-11,0.117390,0.202265,0.520083,-0.681929,0.414236,p8
P1-1-12,0.143571,0.427097,0.508893,0.301511,0.981600,p8
...,...,...,...,...,...,...
P1-99-4,-0.138567,-0.132073,0.402911,0.390377,1.576119,p1
P1-99-5,-0.133104,-0.137414,1.391434,0.275600,-1.007807,p1
P1-99-6,-0.131065,-0.134615,0.145986,-1.294837,-0.504155,p1
P1-99-7,-0.130829,-0.135929,-1.454349,0.226273,-0.485643,p1


In [63]:
y = training_data['performer']
mi = training_data.min()
ma = training_data.max()
bandwidth = 0.1
n_samples = 100

In [64]:
ds = []
for performer in y.unique():
    mask = training_data['performer'] == performer
    data = training_data[mask]
    performer_ds = []
    print(performer)
    for column in data.columns:
        if column != 'performer':
            X = data[column].dropna().to_numpy().reshape(-1,1)
            kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(X)
            x = np.linspace(mi[column], ma[column], n_samples)
            log = kde.score_samples(x.reshape(-1, 1))
            performer_ds.append(np.exp(log))
    ds.append(np.array(performer_ds))
ds = np.array(ds)


p8
p4
p6
p2
p0
p9
p5
p7
p3
p10
p1


In [72]:
test_performer = deviations_names[0].split('-')[0]
data_path = os.path.join(deviations_path, test_performer + '-0.json')
data = pd.read_json(data_path)
test_data = data[:20]
test_data = test_data[std_columns]
test_performer

'p8'

In [73]:
test_ds = []
for column in test_data.columns:
    X = data[column].dropna().to_numpy().reshape(-1,1)
    kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(X)
    x = np.linspace(mi[column], ma[column], n_samples)
    log = kde.score_samples(x.reshape(-1, 1))
    test_ds.append(np.exp(log))
test_ds = np.array(test_ds)

In [74]:
entropies = [[scipy.stats.entropy(test_ds[i], performer[i]) for i in range(len(test_ds))] for performer in ds]
entropies

[[0.0, 0.0, 0.0, 0.0, 0.0],
 [2.3029495193126603,
  2.3006265043459875,
  0.16756758823638102,
  0.10836309693599945,
  0.05390795836005323],
 [4.115905669975254,
  4.183389059349215,
  0.07270703102610963,
  0.01016908806608825,
  0.26681588230885667],
 [0.6343564935720335,
  0.616597622969336,
  0.0684634005907207,
  0.05788223489070349,
  0.0400646942918585],
 [7.357517864369885,
  6.444772160852242,
  0.20160621358520675,
  0.011243467226391777,
  0.13607489587021657],
 [0.5712352658307402,
  0.5536894322859465,
  0.056536772741936483,
  0.03583085125230707,
  0.06277661175472128],
 [0.48645024468860426,
  0.4705819000810854,
  0.018073775986556302,
  0.035436788465685994,
  0.029118047702749852],
 [1.5867242568379671,
  1.5713008037411746,
  0.07510260883450977,
  0.010118109016032639,
  0.05596976320701887],
 [4.577264005949836,
  4.766343863259529,
  0.3703095756056464,
  0.02236063092810903,
  0.21088019584080303],
 [0.8679396679326177,
  0.8527009746837493,
  0.240324276359426

In [75]:
summed = [sum(e) for e in entropies]
summed

[0.0,
 4.933414667191082,
 8.648986730725523,
 1.4173644463146524,
 14.151214601903941,
 1.2800689338656517,
 1.039660756924682,
 3.2992155416367033,
 9.947158271583922,
 2.073068798200195,
 4.872593718019669]

In [76]:
index = np.argmin(np.array(summed))
classified_performer = y[index]
classified_performer

'p8'