In [1]:
#Run cell to mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# install package to have access to custom functions
%pip install /content/drive/MyDrive/EMG_gestures/ --use-feature=in-tree-build

Processing ./drive/MyDrive/EMG_gestures
Building wheels for collected packages: EMG-gestures
  Building wheel for EMG-gestures (setup.py) ... [?25l[?25hdone
  Created wheel for EMG-gestures: filename=EMG_gestures-0.1.0-py3-none-any.whl size=45275 sha256=1b02117d413a379e84822104433b31c258750355e41e07a653736fc983e28888
  Stored in directory: /tmp/pip-ephem-wheel-cache-bqvjjkjv/wheels/a2/b7/61/2147fa082a9e51bef5dcc38dd3f0898fe0554d62203c0e383e
Successfully built EMG-gestures
Installing collected packages: EMG-gestures
Successfully installed EMG-gestures-0.1.0


In [3]:
#import necessary packages

#our workhorses
import numpy as np
import pandas as pd
import scipy

#to visualize
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
#style params for figures
sns.set(font_scale = 2)
plt.style.use('seaborn-white')
plt.rc("axes", labelweight="bold")
from IPython.display import display, HTML

#to load files
import os
import sys
import h5py

#import cusotm functions
from EMG_gestures.utils import *
from EMG_gestures.analysis import within_subject_nn_performance


In [4]:
#define hyper params for each model
model_dict = {0:{'fe_layers':0, 'fe_activation':''},\
              1:{'fe_layers':1, 'fe_activation':'tanh'},\
              2:{'fe_layers':1, 'fe_activation':'relu'},\
              3:{'fe_layers':2, 'fe_activation':'tanh'},\
              4:{'fe_layers':2, 'fe_activation':'relu'},\
              }


In [None]:
#define where the data files are located
data_folder = '/content/drive/MyDrive/EMG_gestures/EMG_data/'
results_folder = '/content/drive/MyDrive/EMG_gestures/results_data/single_subject_training/NN/'

nsubjects = 36


# User-defined parameters
lo_freq = 20 #lower bound of bandpass filter
hi_freq = 450 #upper bound of bandpass filter

win_size = 100 #define window size over which to compute time-domain features
step = win_size #keeping this parameter in case we want to re-run later with some overlap

nreps = 10
exclude = [0,7]#labels to exclude

#for NN training
verbose = 0
epochs = 1000
batch_size = 2
es_patience = 5

#performance metrics
score_list = ['f1','accuracy']
model_id = 3
#for model_id in range(4,5):
subject_id = 36
#    for subject_id in range(1,nsubjects+1):

subject_folder = os.path.join(data_folder,'%02d'%(subject_id))
print('=======================')
print(subject_folder)

# Process data and get features 
#get features across segments and corresponding info
feature_matrix_sub, target_labels_sub, window_tstamps_sub, \
block_labels_sub, series_labels_sub = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                                win_size, step)
np.random.seed(42)#for reproducibility
results_df = []#initialize empty array for dataframes
for rep in range(nreps):
    print('Model %d|Subject %d|Rep %d'%(model_id,subject_id, rep+1))

    print('True Data')
    train_scores, test_scores, dummy, train_info_dict = within_subject_nn_performance(feature_matrix_sub, target_labels_sub, series_labels_sub,model_dict[model_id],\
                                                                        exclude, score_list,\
                                                                        verbose = 0, epochs = epochs, batch_size = batch_size, es_patience = es_patience)
    
    n_splits, n_scores = train_scores.shape
    #put testing results in dataframe
    data_dict = {'Fold':np.arange(n_splits)+1,\
                        'Rep':[rep+1 for x in range(n_splits)],\
                        'Type':['Train' for x in range(n_splits)],\
                        'Shuffled':[False for x in range(n_splits)],\
                        'Subject':[subject_id for x in range(n_splits)],\
                'Epochs':[epochs for x in range(n_splits)],\
                'Batch_Size':[batch_size for x in range(n_splits)],\
                'Train_Loss':train_info_dict['train_loss'],\
                    'Val_Loss':train_info_dict['val_loss'],\
                    'Epochs_Trained':train_info_dict['epochs_trained'],\
                }
    for sidx in range(n_scores):
        data_dict['%s_score'%(score_list[sidx])] = train_scores[:,sidx]
    results_df.append(pd.DataFrame(data_dict))

    data_dict = {'Fold':np.arange(n_splits)+1,\
                        'Rep':[rep+1 for x in range(n_splits)],\
                        'Type':['Test' for x in range(n_splits)],\
                        'Shuffled':[False for x in range(n_splits)],\
                        'Subject':[subject_id for x in range(n_splits)],\
                    'Epochs':[epochs for x in range(n_splits)],\
                'Batch_Size':[batch_size for x in range(n_splits)],\
                'Train_Loss':train_info_dict['train_loss'],\
                    'Val_Loss':train_info_dict['val_loss'],\
                    'Epochs_Trained':train_info_dict['epochs_trained'],\
                }
    for sidx in range(n_scores):
        data_dict['%s_score'%(score_list[sidx])] = test_scores[:,sidx]
    results_df.append(pd.DataFrame(data_dict))


    
    print('Permuted Data')
    target_labels_sub_perm = permute_class_within_sub(target_labels_sub, block_labels_sub, series_labels_sub, exclude)
    train_scores, test_scores, dummy, train_info_dict = within_subject_nn_performance(feature_matrix_sub, target_labels_sub_perm, series_labels_sub,model_dict[model_id], exclude, score_list,\
                                                                        verbose = 0, epochs = epochs, batch_size = batch_size, es_patience = es_patience)
    n_splits, n_scores = train_scores.shape
    #put testing results in dataframe
    data_dict = {'Fold':np.arange(n_splits)+1,\
                        'Rep':[rep+1 for x in range(n_splits)],\
                        'Type':['Train' for x in range(n_splits)],\
                        'Shuffled':[True for x in range(n_splits)],\
                        'Subject':[subject_id for x in range(n_splits)],\
                    'Epochs':[epochs for x in range(n_splits)],\
                'Batch_Size':[batch_size for x in range(n_splits)],\
                'Train_Loss':train_info_dict['train_loss'],\
                    'Val_Loss':train_info_dict['val_loss'],\
                    'Epochs_Trained':train_info_dict['epochs_trained'],\
                }
    for sidx in range(n_scores):
        data_dict['%s_score'%(score_list[sidx])] = train_scores[:,sidx]
    results_df.append(pd.DataFrame(data_dict))

    data_dict = {'Fold':np.arange(n_splits)+1,\
                        'Rep':[rep+1 for x in range(n_splits)],\
                        'Type':['Test' for x in range(n_splits)],\
                        'Shuffled':[True for x in range(n_splits)],\
                        'Subject':[subject_id for x in range(n_splits)],\
                    'Epochs':[epochs for x in range(n_splits)],\
                'Batch_Size':[batch_size for x in range(n_splits)],\
                'Train_Loss':train_info_dict['train_loss'],\
                    'Val_Loss':train_info_dict['val_loss'],\
                    'Epochs_Trained':train_info_dict['epochs_trained'],\
                }
    for sidx in range(n_scores):
        data_dict['%s_score'%(score_list[sidx])] = test_scores[:,sidx]
    results_df.append(pd.DataFrame(data_dict))

results_df = pd.concat(results_df, axis = 0)
# #save results to file
results_fn = 'subject_%02d_model_%d_within_subject_results.h5'%(subject_id, model_id)
results_df.to_hdf(os.path.join(results_folder,results_fn), key='results_df', mode='w')


/content/drive/MyDrive/EMG_gestures/EMG_data/36
Model 3|Subject 36|Rep 1
True Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Permuted Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Model 3|Subject 36|Rep 2
True Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Permuted Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Model 3|Subject 36|Rep 3
True Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Permuted Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Model 3|Subject 36|Rep 4
True Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Permuted Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Model 3|Subject 36|Rep 5
True Data
S

In [None]:
results_df.groupby(['Shuffled','Type']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Fold,Rep,Subject,f1_score,accuracy_score
Shuffled,Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
False,Test,1.5,5.5,36.0,0.922077,0.923213
False,Train,1.5,5.5,36.0,0.989461,0.989475
True,Test,1.5,5.5,36.0,0.139313,0.143898
True,Train,1.5,5.5,36.0,0.701098,0.71622


In [None]:
#define where the data files are located
data_folder = '/content/drive/MyDrive/EMG_gestures/EMG_data/'
results_folder = '/content/drive/MyDrive/EMG_gestures/results_data/single_subject_training/NN/batch_size_comparison'

nsubjects = 36


# User-defined parameters
lo_freq = 20 #lower bound of bandpass filter
hi_freq = 450 #upper bound of bandpass filter

win_size = 100 #define window size over which to compute time-domain features
step = win_size #keeping this parameter in case we want to re-run later with some overlap

nreps = 10
exclude = [0,7]#labels to exclude

#for NN training
verbose = 0
epochs = 1000
#batch_size = 5
es_patience = 5

#performance metrics
score_list = ['f1','accuracy']
model_id = 1
for batch_size in [5, 10,20]:

    for subject_id in range(1,nsubjects+1):

        subject_folder = os.path.join(data_folder,'%02d'%(subject_id))
        print('=======================')
        print(subject_folder)

        # Process data and get features 
        #get features across segments and corresponding info
        feature_matrix_sub, target_labels_sub, window_tstamps_sub, \
        block_labels_sub, series_labels_sub = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                                        win_size, step)
        np.random.seed(42)#for reproducibility
        results_df = []#initialize empty array for dataframes
        for rep in range(nreps):
            print('Batch Size %d|Subject %d|Rep %d'%(batch_size,subject_id, rep+1))

            print('True Data')
            train_scores, test_scores, dummy, train_info_dict = within_subject_nn_performance(feature_matrix_sub, target_labels_sub, series_labels_sub,model_dict[model_id],\
                                                                                exclude, score_list,\
                                                                                verbose = 0, epochs = epochs, batch_size = batch_size, es_patience = es_patience)
            
            n_splits, n_scores = train_scores.shape
            #put testing results in dataframe
            data_dict = {'Fold':np.arange(n_splits)+1,\
                                'Rep':[rep+1 for x in range(n_splits)],\
                                'Type':['Train' for x in range(n_splits)],\
                                'Shuffled':[False for x in range(n_splits)],\
                                'Subject':[subject_id for x in range(n_splits)],\
                        'Epochs':[epochs for x in range(n_splits)],\
                        'Batch_Size':[batch_size for x in range(n_splits)],\
                        'Train_Loss':train_info_dict['train_loss'],\
                            'Val_Loss':train_info_dict['val_loss'],\
                            'Epochs_Trained':train_info_dict['epochs_trained'],\
                        }
            for sidx in range(n_scores):
                data_dict['%s_score'%(score_list[sidx])] = train_scores[:,sidx]
            results_df.append(pd.DataFrame(data_dict))

            data_dict = {'Fold':np.arange(n_splits)+1,\
                                'Rep':[rep+1 for x in range(n_splits)],\
                                'Type':['Test' for x in range(n_splits)],\
                                'Shuffled':[False for x in range(n_splits)],\
                                'Subject':[subject_id for x in range(n_splits)],\
                            'Epochs':[epochs for x in range(n_splits)],\
                        'Batch_Size':[batch_size for x in range(n_splits)],\
                        'Train_Loss':train_info_dict['train_loss'],\
                            'Val_Loss':train_info_dict['val_loss'],\
                            'Epochs_Trained':train_info_dict['epochs_trained'],\
                        }
            for sidx in range(n_scores):
                data_dict['%s_score'%(score_list[sidx])] = test_scores[:,sidx]
            results_df.append(pd.DataFrame(data_dict))


            
            print('Permuted Data')
            target_labels_sub_perm = permute_class_within_sub(target_labels_sub, block_labels_sub, series_labels_sub, exclude)
            train_scores, test_scores, dummy, train_info_dict = within_subject_nn_performance(feature_matrix_sub, target_labels_sub_perm, series_labels_sub,model_dict[model_id], exclude, score_list,\
                                                                                verbose = 0, epochs = epochs, batch_size = batch_size, es_patience = es_patience)
            n_splits, n_scores = train_scores.shape
            #put testing results in dataframe
            data_dict = {'Fold':np.arange(n_splits)+1,\
                                'Rep':[rep+1 for x in range(n_splits)],\
                                'Type':['Train' for x in range(n_splits)],\
                                'Shuffled':[True for x in range(n_splits)],\
                                'Subject':[subject_id for x in range(n_splits)],\
                            'Epochs':[epochs for x in range(n_splits)],\
                        'Batch_Size':[batch_size for x in range(n_splits)],\
                        'Train_Loss':train_info_dict['train_loss'],\
                            'Val_Loss':train_info_dict['val_loss'],\
                            'Epochs_Trained':train_info_dict['epochs_trained'],\
                        }
            for sidx in range(n_scores):
                data_dict['%s_score'%(score_list[sidx])] = train_scores[:,sidx]
            results_df.append(pd.DataFrame(data_dict))

            data_dict = {'Fold':np.arange(n_splits)+1,\
                                'Rep':[rep+1 for x in range(n_splits)],\
                                'Type':['Test' for x in range(n_splits)],\
                                'Shuffled':[True for x in range(n_splits)],\
                                'Subject':[subject_id for x in range(n_splits)],\
                            'Epochs':[epochs for x in range(n_splits)],\
                        'Batch_Size':[batch_size for x in range(n_splits)],\
                        'Train_Loss':train_info_dict['train_loss'],\
                            'Val_Loss':train_info_dict['val_loss'],\
                            'Epochs_Trained':train_info_dict['epochs_trained'],\
                        }
            for sidx in range(n_scores):
                data_dict['%s_score'%(score_list[sidx])] = test_scores[:,sidx]
            results_df.append(pd.DataFrame(data_dict))

        results_df = pd.concat(results_df, axis = 0)
        # #save results to file
        results_fn = 'subject_%02d_batch_size_%02d_within_subject_results.h5'%(subject_id, batch_size)
        results_df.to_hdf(os.path.join(results_folder,results_fn), key='results_df', mode='w')


/content/drive/MyDrive/EMG_gestures/EMG_data/01
Batch Size 5|Subject 1|Rep 1
True Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Permuted Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Batch Size 5|Subject 1|Rep 2
True Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Permuted Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Batch Size 5|Subject 1|Rep 3
True Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Permuted Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Batch Size 5|Subject 1|Rep 4
True Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Permuted Data
Split Count: 1
Training Model
Evaluate Model
Split Count: 2
Training Model
Evaluate Model
Batch Size 5|Subject