upload your `.csv` files in `data/csv` if you want to build your own dataset

In [8]:
!ls data/csv

extension_0.csv  extension_4.csv  fist_2.csv  flexion_0.csv  flexion_4.csv
extension_1.csv  extension_5.csv  fist_3.csv  flexion_1.csv  flexion_5.csv
extension_2.csv  fist_0.csv	  fist_4.csv  flexion_2.csv
extension_3.csv  fist_1.csv	  fist_5.csv  flexion_3.csv


# build manually-annotated dataset

In [9]:
from constants import NPZ_DATASET_SHARDS_PATH, SAMPLE_LENGTH, SAMPLE_OVERLAP, NP_DATASET_PATH, CSV_DATASET_SHARDS_PATH
from build_ds import build_ds
import numpy as np



X, y = build_ds(NPZ_DATASET_SHARDS_PATH, SAMPLE_LENGTH, SAMPLE_OVERLAP, CSV_DATASET_SHARDS_PATH)
print(X.shape, y.shape, X.dtype, y.dtype)
print(np.unique(y, return_counts=True))
np.savez_compressed(NP_DATASET_PATH, X=X, y=y)

`.gitkeep` doesn't follow the convention. A compliant file name is `gesture-name_id.csv`
(5595, 30, 9) (5595,) int16 uint8
(array([0, 1, 2, 7], dtype=uint8), array([2787,  936,  936,  936]))


# build kmeans-annotated dataset


In [10]:
from clustering import clustering_pipeline
from constants import NP_DATASET_KMEANS_PATH
from utils import get_dataset



X, y = get_dataset(dtype_data=np.float64)
clustering_pipeline.fit(X)
labels = clustering_pipeline._final_estimator.labels_
print('Original labels distribution:')
print(np.unique(y, return_counts=True))
print('\n******\nKmeans labels distribution:')
print(np.unique(labels, return_counts=True))
np.savez_compressed(NP_DATASET_KMEANS_PATH, X=X, y=labels)

Original labels distribution:
(array([0, 1, 2, 7], dtype=uint8), array([2787,  936,  936,  936]))

******
Kmeans labels distribution:
(array([0, 1, 2, 3], dtype=int32), array([ 936, 2893,  957,  809]))


# quick test

In [11]:
from utils import get_dataset
from grid_search import scoring
from pipelines import preprocessing_pipeline
from cross_validation import np2list
from constants import CROSS_VALIDATION_REPORT_PATH
from sklearn.svm import SVC
from sklearn.model_selection import cross_validate
from sklearn.pipeline import make_pipeline
import json
import numpy as np


X, y = get_dataset(NP_DATASET_KMEANS_PATH)
model = SVC()
pipeline = make_pipeline(preprocessing_pipeline, model)
scores = cross_validate(pipeline, X, y, cv=5, scoring=scoring, n_jobs=-1)
np2list(scores)
print(scores)
print(f"\n\n{'*' * 51}\n* See `{CROSS_VALIDATION_REPORT_PATH}` for a better visualization *\n{'*' * 51}")
with open(CROSS_VALIDATION_REPORT_PATH, 'w') as f:
    json.dump(scores, f)

{'fit_time': [0.23417949676513672, 0.2393202781677246, 0.27869129180908203, 0.16337013244628906, 0.14156293869018555], 'score_time': [0.050717830657958984, 0.04937863349914551, 0.051511526107788086, 0.04736733436584473, 0.049300432205200195], 'test_accuracy': [0.9955317247542449, 0.9892761394101877, 0.9919571045576407, 0.9892761394101877, 0.9910634495084897], 'test_f1_macro': [0.9953060959981763, 0.9876063969787936, 0.9919490151201376, 0.9869364102074583, 0.9906565886908859], 'test_f1_micro': [0.9955317247542449, 0.9892761394101877, 0.9919571045576407, 0.9892761394101877, 0.9910634495084897]}


***************************************************
* See `cv_report.json` for a better visualization *
***************************************************
