In [1]:
#Run cell to mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# install package to have access to custom functions
%pip install /content/drive/MyDrive/EMG_gestures/ --use-feature=in-tree-build

Processing ./drive/MyDrive/EMG_gestures
Building wheels for collected packages: EMG-gestures
  Building wheel for EMG-gestures (setup.py) ... [?25l[?25hdone
  Created wheel for EMG-gestures: filename=EMG_gestures-0.1.0-py3-none-any.whl size=38576 sha256=03e984f3aea0f0c9eabff016bc6facd9ce6a80da78aecb197a7cabfeacc932d0
  Stored in directory: /tmp/pip-ephem-wheel-cache-fn1cbege/wheels/a2/b7/61/2147fa082a9e51bef5dcc38dd3f0898fe0554d62203c0e383e
Successfully built EMG-gestures
Installing collected packages: EMG-gestures
Successfully installed EMG-gestures-0.1.0


In [3]:
#import necessary packages

#our workhorses
import numpy as np
import pandas as pd
import scipy

#to visualize
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
#style params for figures
sns.set(font_scale = 2)
plt.style.use('seaborn-white')
plt.rc("axes", labelweight="bold")
from IPython.display import display, HTML

#to load files
import os
import sys
import h5py

#import cusotm functions
from EMG_gestures.utils import *
from EMG_gestures.analysis import within_subject_rnn_performance


In [None]:
#define where the data files are located
data_folder = '/content/drive/MyDrive/EMG_gestures/EMG_data/'
results_folder = '/content/drive/MyDrive/EMG_gestures/results_data/single_subject_training/RNN/'

nsubjects = 36


# User-defined parameters
lo_freq = 20 #lower bound of bandpass filter
hi_freq = 450 #upper bound of bandpass filter

win_size = 100 #define window size over which to compute time-domain features
step = win_size #keeping this parameter in case we want to re-run later with some overlap

nreps = 10
exclude = [0,7]#labels to exclude

#for RNN training
verbose = 0
epochs = 40
batch_size = 2
#number of permutations to use for training
n_shuffled_sets = 20
#performance metrics
score_list = ['f1','accuracy']

#subject_id = 1
for subject_id in range(23,nsubjects+1):

    subject_folder = os.path.join(data_folder,'%02d'%(subject_id))
    print('=======================')
    print(subject_folder)

    # Process data and get features 
    #get features across segments and corresponding info
    feature_matrix_sub, target_labels_sub, window_tstamps_sub, \
    block_labels_sub, series_labels_sub = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                                    win_size, step)
    np.random.seed(1)#for reproducibility
    results_df = []#initialize empty array for dataframes


    for rep in range(nreps):
        print('Subject %d|Rep %d'%(subject_id, rep+1))
        print('True Data')
        train_scores, test_scores = within_subject_rnn_performance(feature_matrix_sub, target_labels_sub, block_labels_sub,\
                                                                series_labels_sub, exclude, score_list,\
                                                                n_shuffled_sets = n_shuffled_sets,\
                                                                verbose = verbose, epochs = epochs, batch_size = batch_size)

        n_splits, n_scores = train_scores.shape
        #put testing results in dataframe
        data_dict = {'Fold':np.arange(n_splits)+1,\
                            'Rep':[rep+1 for x in range(n_splits)],\
                            'Type':['Train' for x in range(n_splits)],\
                            'Shuffled':[False for x in range(n_splits)],\
                            'Subject':[subject_id for x in range(n_splits)]}
        for sidx in range(n_scores):
            data_dict['%s_score'%(score_list[sidx])] = train_scores[:,sidx]
        results_df.append(pd.DataFrame(data_dict))

        data_dict = {'Fold':np.arange(n_splits)+1,\
                            'Rep':[rep+1 for x in range(n_splits)],\
                            'Type':['Test' for x in range(n_splits)],\
                            'Shuffled':[False for x in range(n_splits)],\
                            'Subject':[subject_id for x in range(n_splits)]}
        for sidx in range(n_scores):
            data_dict['%s_score'%(score_list[sidx])] = test_scores[:,sidx]
        results_df.append(pd.DataFrame(data_dict))
        print('Subject %d|Rep %d'%(subject_id, rep+1))
        print('Permuted Data')
        target_labels_sub_perm = permute_class_within_sub(target_labels_sub, block_labels_sub, np.ones((target_labels_sub.size,)), exclude)
        train_scores, test_scores = within_subject_rnn_performance(feature_matrix_sub, target_labels_sub_perm, block_labels_sub,\
                                                                series_labels_sub, exclude, score_list,\
                                                                n_shuffled_sets = n_shuffled_sets,\
                                                                verbose = verbose, epochs = epochs, batch_size = batch_size)
        n_splits, n_scores = train_scores.shape
        #put testing results in dataframe
        data_dict = {'Fold':np.arange(n_splits)+1,\
                            'Rep':[rep+1 for x in range(n_splits)],\
                            'Type':['Train' for x in range(n_splits)],\
                            'Shuffled':[True for x in range(n_splits)],\
                            'Subject':[subject_id for x in range(n_splits)]}
        for sidx in range(n_scores):
            data_dict['%s_score'%(score_list[sidx])] = train_scores[:,sidx]
        results_df.append(pd.DataFrame(data_dict))

        data_dict = {'Fold':np.arange(n_splits)+1,\
                            'Rep':[rep+1 for x in range(n_splits)],\
                            'Type':['Test' for x in range(n_splits)],\
                            'Shuffled':[True for x in range(n_splits)],\
                            'Subject':[subject_id for x in range(n_splits)]}
        for sidx in range(n_scores):
            data_dict['%s_score'%(score_list[sidx])] = test_scores[:,sidx]
        results_df.append(pd.DataFrame(data_dict))

    results_df = pd.concat(results_df, axis = 0)
    # #save results to file
    # results_fn = 'subject_%02d_within_subject_results.h5'%(subject_id)
    # results_df.to_hdf(os.path.join(results_folder,results_fn), key='results_df', mode='w')
    results_fn = 'subject_%02d_within_subject_%d_train_sets_results.h5'%(subject_id,n_shuffled_sets)
    results_df.to_hdf(os.path.join(results_folder,results_fn), key='results_df', mode='w')
    



/content/drive/MyDrive/EMG_gestures/EMG_data/23
Subject 23|Rep 1
True Data
Split Count: 1
Training Model


In [None]:
results_df.groupby(['Type','Shuffled']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Fold,Rep,Subject,f1_score,accuracy_score
Type,Shuffled,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Test,False,1.5,5.5,36.0,0.91905,0.920422
Test,True,1.5,5.5,36.0,0.137796,0.13707
Train,False,1.5,5.5,36.0,0.972319,0.972384
Train,True,1.5,5.5,36.0,0.888497,0.891476


In [None]:
#define where the data files are located
data_folder = '/content/drive/MyDrive/EMG_gestures/EMG_data/'
results_folder = '/content/drive/MyDrive/EMG_gestures/results_data/single_subject_training/RNN_mv/'

nsubjects = 36


# User-defined parameters
lo_freq = 20 #lower bound of bandpass filter
hi_freq = 450 #upper bound of bandpass filter

win_size = 100 #define window size over which to compute time-domain features
step = win_size #keeping this parameter in case we want to re-run later with some overlap

nreps = 10
exclude = [0,7]#labels to exclude

#for RNN training
verbose = 0
epochs = 40
batch_size = 2
#number of permutations to use for training
n_shuffled_sets = 10
#performance metrics
score_list = ['f1','accuracy']

#subject_id = 1
for subject_id in range(18,nsubjects+1):

    subject_folder = os.path.join(data_folder,'%02d'%(subject_id))
    print('=======================')
    print(subject_folder)

    # Process data and get features 
    #get features across segments and corresponding info
    feature_matrix_sub, target_labels_sub, window_tstamps_sub, \
    block_labels_sub, series_labels_sub = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                                    win_size, step)
    np.random.seed(1)#for reproducibility
    results_df = []#initialize empty array for dataframes


    for rep in range(nreps):
        print('Subject %d|Rep %d'%(subject_id, rep+1))
        print('True Data')
        train_scores, test_scores = within_subject_rnn_performance(feature_matrix_sub, target_labels_sub, block_labels_sub,\
                                                                series_labels_sub, exclude, score_list,\
                                                                n_shuffled_sets = n_shuffled_sets,\
                                                                verbose = verbose, epochs = epochs, batch_size = batch_size, mv = 5)

        n_splits, n_scores = train_scores.shape
        #put testing results in dataframe
        data_dict = {'Fold':np.arange(n_splits)+1,\
                            'Rep':[rep+1 for x in range(n_splits)],\
                            'Type':['Train' for x in range(n_splits)],\
                            'Shuffled':[False for x in range(n_splits)],\
                            'Subject':[subject_id for x in range(n_splits)]}
        for sidx in range(n_scores):
            data_dict['%s_score'%(score_list[sidx])] = train_scores[:,sidx]
        results_df.append(pd.DataFrame(data_dict))

        data_dict = {'Fold':np.arange(n_splits)+1,\
                            'Rep':[rep+1 for x in range(n_splits)],\
                            'Type':['Test' for x in range(n_splits)],\
                            'Shuffled':[False for x in range(n_splits)],\
                            'Subject':[subject_id for x in range(n_splits)]}
        for sidx in range(n_scores):
            data_dict['%s_score'%(score_list[sidx])] = test_scores[:,sidx]
        results_df.append(pd.DataFrame(data_dict))
        print('Subject %d|Rep %d'%(subject_id, rep+1))
        print('Permuted Data')
        target_labels_sub_perm = permute_class_within_sub(target_labels_sub, block_labels_sub, np.ones((target_labels_sub.size,)), exclude)
        train_scores, test_scores = within_subject_rnn_performance(feature_matrix_sub, target_labels_sub_perm, block_labels_sub,\
                                                                series_labels_sub, exclude, score_list,\
                                                                n_shuffled_sets = n_shuffled_sets,\
                                                                verbose = verbose, epochs = epochs, batch_size = batch_size, mv = 5)
        n_splits, n_scores = train_scores.shape
        #put testing results in dataframe
        data_dict = {'Fold':np.arange(n_splits)+1,\
                            'Rep':[rep+1 for x in range(n_splits)],\
                            'Type':['Train' for x in range(n_splits)],\
                            'Shuffled':[True for x in range(n_splits)],\
                            'Subject':[subject_id for x in range(n_splits)]}
        for sidx in range(n_scores):
            data_dict['%s_score'%(score_list[sidx])] = train_scores[:,sidx]
        results_df.append(pd.DataFrame(data_dict))

        data_dict = {'Fold':np.arange(n_splits)+1,\
                            'Rep':[rep+1 for x in range(n_splits)],\
                            'Type':['Test' for x in range(n_splits)],\
                            'Shuffled':[True for x in range(n_splits)],\
                            'Subject':[subject_id for x in range(n_splits)]}
        for sidx in range(n_scores):
            data_dict['%s_score'%(score_list[sidx])] = test_scores[:,sidx]
        results_df.append(pd.DataFrame(data_dict))

    results_df = pd.concat(results_df, axis = 0)
    # #save results to file
    results_fn = 'subject_%02d_within_subject_results.h5'%(subject_id)
    results_df.to_hdf(os.path.join(results_folder,results_fn), key='results_df', mode='w')



/content/drive/MyDrive/EMG_gestures/EMG_data/18
Subject 18|Rep 1
True Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Subject 18|Rep 1
Permuted Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Subject 18|Rep 2
True Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Subject 18|Rep 2
Permuted Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Subject 18|Rep 3
True Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Subject 18|Rep 3
Permuted Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Subject 18|Rep 4
True Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Subject 18|Rep 4
Permuted Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
