In [13]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook

import pandas as pd
import numpy as np
from helpers import (
    load_data,
    EMG_CHANNELS,
    EMG_HP_CHANNELS,
    GYRO_CHANNELS,
    ACC_CHANNELS,
    IMU_CHANNELS,
)
import itertools
from feature_extraction import (
    FeatureExtractor,
    process_dataframe,
    zero_crossings,
    mean_crossings,
    spectral_entropy
)
from sklearn.decomposition import KernelPCA
from sklearn.cluster import DBSCAN
from mpl_toolkits.mplot3d import Axes3D

from matplotlib import pyplot as plt

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
dataset = load_data()
dataset

Trimming observations to 967 samples


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_ms,ch0,ch1,ch0_hp,ch1_hp,qx,qy,qz,qw,gx,gy,gz,ax,ay,az
body_label,rep,sample_num,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,1,0.0,1514824.503,12535249,12566283,-11889,17295,0.321960,-0.596619,-0.621826,0.392090,-1.342870,1.063105,0.503576,-0.660156,-10.003906,1.21875
0,1,1.0,1514827.496,12536264,12559246,-2063,13384,0.321960,-0.596741,-0.621826,0.392029,-1.510729,1.175011,0.000000,-0.660156,-10.003906,1.21875
0,1,2.0,1514830.493,12538584,12565279,2757,16008,0.321960,-0.596741,-0.621765,0.391968,-1.230964,1.175011,-0.279765,-0.660156,-10.003906,1.21875
0,1,3.0,1514833.500,12546745,12567024,7504,5644,0.321960,-0.596741,-0.621765,0.391968,-1.230964,1.175011,-0.279765,-0.660156,-10.003906,1.21875
0,1,4.0,1514836.498,12537375,12545467,-3855,-15893,0.321960,-0.596802,-0.621765,0.391907,-1.063105,0.839294,-0.727388,-0.660156,-10.003906,1.21875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,3,962.0,1618110.457,12136089,12370189,-31898,45780,-0.749451,-0.095520,-0.654175,0.034607,-167.187300,-729.625900,-75.424520,-15.074219,-1.500000,-11.37500
4,3,963.0,1618113.448,12165574,12340495,20709,2953,-0.749451,-0.095520,-0.654175,0.034607,-167.187300,-729.625900,-75.424520,-15.074219,-1.500000,-11.37500
4,3,964.0,1618116.437,12200077,12301532,45799,-36427,-0.725403,-0.101318,-0.679504,0.041626,-168.474210,-807.568360,-64.513700,-15.074219,-1.500000,-11.37500
4,3,965.0,1618119.442,12153545,12317293,-14411,-3002,-0.725403,-0.101318,-0.679504,0.041626,-168.474210,-807.568360,-64.513700,-15.074219,-1.500000,-11.37500


In [15]:
# Define feature extractors for each sensor

segment_length = 300
interval = 200

extractors = {}
extractors.update(
    {
        channel: [
            FeatureExtractor("mean", np.mean, segment_length=segment_length, interval=interval),
            FeatureExtractor("var", np.var, segment_length=segment_length, interval=interval),
            FeatureExtractor("mean_xing", mean_crossings),
            FeatureExtractor("min", np.min),
            FeatureExtractor("max", np.max),
        ]
        for channel in EMG_CHANNELS
    }
)


extractors.update(
    {
        channel: [
            FeatureExtractor("mean", np.mean, segment_length=segment_length, interval=interval),
            FeatureExtractor("var", np.var, segment_length=segment_length, interval=interval),
            FeatureExtractor("mean_xing", peak_frequency, segment_length=segment_length, interval=interval),
            FeatureExtractor("rmean", lambda x: np.mean(np.abs(x))),
            FeatureExtractor("spec_entropy", spectral_entropy(256, 64))
            
        ]
        for channel in EMG_HP_CHANNELS
    }
)

extractors.update(
    {
        channel: [
            FeatureExtractor("mean", np.mean, segment_length=segment_length, interval=interval),
            FeatureExtractor("var", np.var, segment_length=segment_length, interval=interval),
            FeatureExtractor("min", np.min),
            FeatureExtractor("max", np.max),
            FeatureExtractor("mean_xing", mean_crossings),
            FeatureExtractor("rmean", lambda x: np.mean(np.abs(x))),
            FeatureExtractor('max_pk_pk', lambda x: np.max(x) - np.min(x))

        ]
        for channel in IMU_CHANNELS
    }
)

extractors.update(
    {
        channel: [
            FeatureExtractor("mean", np.mean, segment_length=segment_length, interval=interval),
            FeatureExtractor("var", np.var, segment_length=segment_length, interval=interval),
            FeatureExtractor("mean_xing", mean_crossings),
            FeatureExtractor("rmean", lambda x: np.mean(np.abs(x))),
            FeatureExtractor('max_pk_pk', lambda x: np.max(x) - np.min(x))

      ]
        for channel in GYRO_CHANNELS
    }
)

extractors.update(
    {
        channel: [
            FeatureExtractor("mean", np.mean, segment_length=segment_length, interval=interval),
            FeatureExtractor("var", np.var, segment_length=segment_length, interval=interval),
            FeatureExtractor("min", np.min),
            FeatureExtractor("max", np.max),
            FeatureExtractor("mean_xing", mean_crossings),
            FeatureExtractor("rmean", lambda x: np.mean(np.abs(x))),
            FeatureExtractor('max_pk_pk', lambda x: np.max(x) - np.min(x))
        ]
        for channel in ACC_CHANNELS
    }
)

# Mixed channel features
extractors.update(
    {
        q_pair: [
            FeatureExtractor("corr", lambda x: np.corrcoef(x.T)[0,1])
        ] for q_pair in itertools.combinations(IMU_CHANNELS, 2)
    }
)
extractors.update(
    {
        q_pair: [
            FeatureExtractor("corr", lambda x: np.corrcoef(x.T)[0,1])
        ] for q_pair in itertools.combinations(GYRO_CHANNELS, 2)
    }
)
extractors.update(
    {
        q_pair: [
            FeatureExtractor("corr", lambda x: np.corrcoef(x.T)[0,1])
        ] for q_pair in itertools.combinations(ACC_CHANNELS, 2)
    }
)

In [18]:
results = process_dataframe(dataset, extractors)
results

Unnamed: 0_level_0,Unnamed: 1_level_0,c.h.0.mean.0.300,c.h.0.mean.200.500,c.h.0.mean.400.700,c.h.0.mean.600.900,c.h.0.var.0.300,c.h.0.var.200.500,c.h.0.var.400.700,c.h.0.var.600.900,c.h.0.mean_xing,c.h.0.min,...,qx.qw.corr,qy.qz.corr,qy.qw.corr,qz.qw.corr,gx.gy.corr,gx.gz.corr,gy.gz.corr,ax.ay.corr,ax.az.corr,ay.az.corr
body_label,rep,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0.0,1.0,0.802236,0.638268,0.236405,0.26967,-0.583682,-0.69024,-0.756767,-0.715512,0.241633,1.058481,...,0.564739,0.615669,-1.559685,-1.073426,0.676815,-1.233287,-1.202402,-0.512415,1.182977,-0.613005
0.0,2.0,-1.111186,0.67319,0.568211,0.90103,-0.054467,0.20905,-0.158889,-0.163269,-0.600185,0.035209,...,-1.136483,-0.465117,-0.046073,-1.79122,0.342397,-1.449769,-0.735144,0.209733,1.218637,-0.805278
0.0,3.0,0.909974,-0.095089,-1.471562,-1.831561,-0.582047,0.12781,1.725635,1.729483,-0.857407,-1.582583,...,1.151648,0.64912,-0.213078,-0.103666,1.299257,0.697247,0.644817,-1.360913,-1.105779,1.631674
1.0,1.0,1.058312,0.628681,-0.000771,-0.226596,-0.581083,-0.688467,-0.755152,-0.708372,1.784966,0.904112,...,-0.949917,0.631387,1.189335,1.059107,0.869282,-1.154457,-1.232854,-0.658567,0.462207,-0.865148
1.0,2.0,0.390894,0.619202,0.107014,0.076032,-0.555173,-0.632793,-0.693095,-0.630707,-0.436498,0.862337,...,-1.146213,-0.670633,0.49229,-1.530179,-0.385945,-1.504309,0.168067,0.018262,1.464027,-0.870535
1.0,3.0,-1.110074,-0.767111,-2.327882,-2.628022,1.413269,2.044058,2.555682,2.326417,-0.950943,-1.368386,...,1.156584,0.649825,0.281531,0.295985,1.362747,0.263542,0.266474,-1.390909,-0.852045,1.847981
2.0,1.0,0.759971,0.455385,-0.002108,-0.004502,-0.56974,-0.676004,-0.753832,-0.717485,0.943148,0.941675,...,-0.923054,0.646236,0.830822,0.725695,-0.732899,0.071992,-1.366913,-0.770998,-1.84511,1.167338
2.0,2.0,0.173244,0.671922,0.150507,0.663593,-0.549489,-0.609401,-0.44446,-0.628266,-0.950943,0.490332,...,-1.154715,-0.287502,-0.39417,-0.899412,-1.223013,1.054928,0.278099,1.9284,-0.751767,-1.180159
2.0,3.0,-2.056773,-0.814277,1.584465,0.447103,2.563827,2.403388,0.677758,1.240101,-0.69372,-1.550883,...,1.070317,0.50257,0.293352,0.458386,0.53237,1.02587,1.341562,0.011297,0.636771,0.03145
3.0,1.0,0.845345,0.144705,-0.336598,1.064602,-0.582861,-0.686727,-0.754363,-0.583,0.966532,0.81885,...,0.687685,-3.244937,-2.196248,1.567154,-0.3743,-0.416954,-1.388309,-0.163946,-0.336217,-0.099849


In [19]:
from sklearn.decomposition import PCA, KernelPCA
from sklearn.cluster import KMeans

for kernel in ['linear', 'poly', 'rbf', 'sigmoid', 'cosine']:
    for n_components in []:
        print(f'{kernel}.{n_components}')
        pca = KernelPCA(kernel=kernel, n_components=n_components)
        pca.fit(results)
        plt.figure()
        plt.plot(pca.eigenvalues_)
        plt.show()