In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import matplotlib.pyplot as plt
import pandas as pd
import librosa
import tqdm
import sys
import sklearn.decomposition, sklearn.compose
import seaborn as sns
sys.path.append('../src')
from features import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [38]:
config = {
    'sr': 10000,
    'frame_size': 1024*2,
    'hop_size': 1024*2,
    'n_coeff': 100,
    'size': 10
}

In [32]:
df = pd.read_pickle('../recordings.pkl')
# df = df[df['violin'].isin([1,2,3,4,5,6])]
# df = df[df['type'] == 'scale']

In [33]:
# Features
data = []
for index, row in tqdm.tqdm(df.iterrows(), total=df.shape[0]):
    y, _    = librosa.load(str(row['file']), sr=config['sr'])
    
    for audio in np.split(y, np.arange(config['sr']*config['size'], len(y), config['sr']*config['size'])):

        features = y
        for step in pipes['MFCC_librosa']:
            features = step(features, **config)
        features_dic = {f'feature_{i}': coeff for i, coeff in enumerate(features)}

        dic = row.to_dict()
        dic['audio'] = audio
        dic.update(features_dic)

        data.append(dic)

features_df = pd.DataFrame(data)

  0%|          | 0/353 [00:00<?, ?it/s]

100%|██████████| 353/353 [00:53<00:00,  6.60it/s]


In [34]:
import sklearn.model_selection
train, test = sklearn.model_selection.train_test_split(features_df, test_size=.1)

In [41]:
import sklearn.pipeline, sklearn.preprocessing, sklearn.neighbors

pipeline = sklearn.pipeline.Pipeline([
    ('scaler', sklearn.preprocessing.StandardScaler()),
    ('classifier', sklearn.neighbors.KNeighborsClassifier())
])

param_grid = [
    {
        'classifier' : [sklearn.neighbors.KNeighborsClassifier()],
        'classifier__n_neighbors' : np.arange(1, 201, 2),
        'classifier__weights' : ['uniform', 'distance'],
    }
]

In [42]:
grid_search = sklearn.model_selection.GridSearchCV(pipeline, param_grid, cv=5, verbose=2, n_jobs=-1)
grid_search.fit(train.loc[:, train.columns.str.startswith('feature')], train['violin'])

Fitting 5 folds for each of 200 candidates, totalling 1000 fits
[CV] END classifier=KNeighborsClassifier(), classifier__n_neighbors=1, classifier__weights=uniform; total time=   0.0s
[CV] END classifier=KNeighborsClassifier(), classifier__n_neighbors=1, classifier__weights=distance; total time=   0.0s
[CV] END classifier=KNeighborsClassifier(), classifier__n_neighbors=1, classifier__weights=uniform; total time=   0.0s
[CV] END classifier=KNeighborsClassifier(), classifier__n_neighbors=1, classifier__weights=uniform; total time=   0.0s
[CV] END classifier=KNeighborsClassifier(), classifier__n_neighbors=1, classifier__weights=uniform; total time=   0.0s
[CV] END classifier=KNeighborsClassifier(), classifier__n_neighbors=1, classifier__weights=distance; total time=   0.0s
[CV] END classifier=KNeighborsClassifier(), classifier__n_neighbors=1, classifier__weights=distance; total time=   0.0s
[CV] END classifier=KNeighborsClassifier(), classifier__n_neighbors=1, classifier__weights=distance;

In [47]:
print(grid_search.best_estimator_)
print(grid_search.best_params_)
print(grid_search.best_score_)
results = pd.DataFrame(grid_search.cv_results_)
results.head()
# plt.plot(param_grid[0]['classifier__n_neighbors'], grid_search.cv_results_['mean_test_score'])

Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier',
                 KNeighborsClassifier(n_neighbors=53, weights='distance'))])
{'classifier': KNeighborsClassifier(), 'classifier__n_neighbors': 53, 'classifier__weights': 'distance'}
0.9943711391153942


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier,param_classifier__n_neighbors,param_classifier__weights,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.003745,0.000886,0.010081,0.005372,KNeighborsClassifier(),1,uniform,"{'classifier': KNeighborsClassifier(), 'classi...",0.954386,0.989474,0.957746,0.985915,0.982394,0.973983,0.014838,99
1,0.00468,0.001041,0.006823,0.001741,KNeighborsClassifier(),1,distance,"{'classifier': KNeighborsClassifier(), 'classi...",0.954386,0.989474,0.957746,0.985915,0.982394,0.973983,0.014838,99
2,0.004545,0.001192,0.013287,0.006161,KNeighborsClassifier(),3,uniform,"{'classifier': KNeighborsClassifier(), 'classi...",0.842105,0.891228,0.862676,0.90493,0.919014,0.883991,0.028022,102
3,0.004138,0.001021,0.005065,0.001632,KNeighborsClassifier(),3,distance,"{'classifier': KNeighborsClassifier(), 'classi...",0.954386,0.989474,0.957746,0.985915,0.982394,0.973983,0.014838,99
4,0.005084,0.001558,0.014132,0.0052,KNeighborsClassifier(),5,uniform,"{'classifier': KNeighborsClassifier(), 'classi...",0.821053,0.82807,0.795775,0.838028,0.841549,0.824895,0.016265,141
