In [1]:
from tools import OUT_PATH, open_meta_df
import pandas as pd
import numpy as np
from skopt import BayesSearchCV
from sklearn.base import clone
from sklearn.neighbors import KNeighborsClassifier
from tqdm.auto import tqdm
from skopt.space import Integer, Categorical, Real
from pprint import pprint
from warnings import filterwarnings
from joblib import dump
from KMeansCustomEstimator import KMeansCustomEstimator
from functools import reduce
from operator import mul
filterwarnings("ignore")

In [14]:
model_name = 'KNN'

In [2]:
meta_df = open_meta_df()
video_id = np.load(OUT_PATH / 'video_id.npy')
landmarks = np.load(OUT_PATH / 'landmarks.npy')
train_idx = np.load(OUT_PATH / 'train_idx.npy')
test_idx = np.load(OUT_PATH / 'test_idx.npy')
meta_df.head()

Unnamed: 0,filename,stem,pose_id,word,sinalizador,repetition
0,/media/eros/BackupMae/datasets/Minds/Sinalizad...,01AcontecerSinalizador01-1,1,Acontecer,1,1
1,/media/eros/BackupMae/datasets/Minds/Sinalizad...,01AcontecerSinalizador01-2,1,Acontecer,1,2
2,/media/eros/BackupMae/datasets/Minds/Sinalizad...,01AcontecerSinalizador01-3,1,Acontecer,1,3
3,/media/eros/BackupMae/datasets/Minds/Sinalizad...,01AcontecerSinalizador01-4,1,Acontecer,1,4
4,/media/eros/BackupMae/datasets/Minds/Sinalizad...,01AcontecerSinalizador01-5,1,Acontecer,1,5


In [3]:
landmarks.shape

(147205, 33, 3)

In [4]:
observations = reduce(mul, landmarks.shape[1:])

stacked_train_landmarks = [landmarks[video_id == i].reshape((-1, observations))  for i in train_idx]
stacked_test_landmarks = [landmarks[video_id == i].reshape((-1, observations)) for i in test_idx]

classes = meta_df['pose_id'].values

y_train = classes[train_idx]
y_test = classes[test_idx]
observations

99

In [5]:
len(y_train), len(y_test), len(y_train) + len(y_test), len(classes)

(793, 265, 1058, 1058)

In [6]:
opt = BayesSearchCV(
    KMeansCustomEstimator(
        KNeighborsClassifier,
        two_dimensions=False,
        kmeans_keys=('n_clusters', ),
        estimator_keys=('n_neighbors', 'weights'),
        n_clusters=8,
        n_neighbors=5,
        weights='uniform',
    ),
    {
        'estimator': Categorical([KNeighborsClassifier]),
        'two_dimensions': Categorical([False]),
        'kmeans_keys': Categorical([frozenset(['n_clusters'])]),
        'estimator_keys': Categorical([frozenset(['n_neighbors', 'weights'])]),
        'n_clusters': Integer(1, 50),
        'n_neighbors': Integer(1, 100),
        'weights': Categorical(['uniform', 'distance']),
    },
    n_iter=100,
    random_state=42,
    cv=3,
    n_jobs=-1,
    verbose=0,
)
opt.fit(stacked_train_landmarks, y_train)
opt.best_score_

0.7364684581665714

In [7]:
opt.best_params_

OrderedDict([('estimator',
              sklearn.neighbors._classification.KNeighborsClassifier),
             ('estimator_keys', frozenset({'n_neighbors', 'weights'})),
             ('kmeans_keys', frozenset({'n_clusters'})),
             ('n_clusters', 1),
             ('n_neighbors', 1),
             ('two_dimensions', False),
             ('weights', 'uniform')])

In [9]:
estimator = clone(opt.best_estimator_)

In [10]:
%%time
estimator.fit(stacked_train_landmarks, y_train)
train_score = estimator.score(stacked_train_landmarks, y_train) 
test_score = estimator.score(stacked_test_landmarks, y_test)

CPU times: user 25.1 s, sys: 454 ms, total: 25.6 s
Wall time: 1.64 s


In [12]:
print('Acurácia de treino:', train_score, '\nAcurácia de teste:', test_score)

Acurácia de treino: 1.0 
Acurácia de teste: 0.8188679245283019


In [13]:
opt.cv_results_

{'mean_fit_time': array([1.23480074, 2.99152446, 1.58220522, 2.50019217, 2.33190815,
        0.91289926, 2.62146219, 2.59780844, 1.48579327, 1.05034963,
        3.01482765, 0.29654638, 0.29834994, 0.29294403, 0.29708076,
        1.29625336, 2.99741379, 0.28895871, 2.71668323, 0.29262249,
        0.29144669, 0.31887396, 0.38354246, 0.29454843, 2.94306922,
        0.29488126, 0.29710539, 0.2942431 , 2.94818147, 0.29107912,
        0.29238868, 3.01382287, 2.91250571, 0.30385955, 2.96678305,
        1.2550288 , 0.48572739, 0.29571907, 0.29346498, 0.29361471,
        0.29425041, 2.94270444, 1.6209271 , 0.37858343, 2.96806407,
        0.60827645, 1.53551658, 2.7954603 , 1.90306012, 1.91735466,
        1.42145022, 2.04934295, 2.74395084, 2.79175417, 2.36548034,
        2.5220743 , 2.14684335, 2.33765133, 0.29662275, 1.993891  ,
        2.98246829, 0.29829049, 0.29467607, 0.29193099, 1.3249778 ,
        1.61173145, 1.41867224, 2.2880067 , 2.59126202, 1.97480122,
        2.17543674, 2.97595239,

In [16]:
dump(opt.cv_results_, OUT_PATH / f'scores/{model_name}_scores.h5', compress=9)
dump(estimator, OUT_PATH / f'Models/{model_name}.h5', compress=9)

['Outs/Models/KNN.h5']