In [1]:
#Run cell to mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# install package to have access to custom functions
%pip install /content/drive/MyDrive/EMG_gestures/ --use-feature=in-tree-build

Processing ./drive/MyDrive/EMG_gestures
Building wheels for collected packages: EMG-gestures
  Building wheel for EMG-gestures (setup.py) ... [?25l[?25hdone
  Created wheel for EMG-gestures: filename=EMG_gestures-0.1.0-py3-none-any.whl size=45275 sha256=9dbd7e640795a05a38f74b5a1a9266b37e7ed5dd8d908f15eb8a8e1e62f08667
  Stored in directory: /tmp/pip-ephem-wheel-cache-m3jkc9no/wheels/a2/b7/61/2147fa082a9e51bef5dcc38dd3f0898fe0554d62203c0e383e
Successfully built EMG-gestures
Installing collected packages: EMG-gestures
Successfully installed EMG-gestures-0.1.0


In [3]:
#import necessary packages

#our workhorses
import numpy as np
import pandas as pd
import scipy

#to visualize
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
#style params for figures
sns.set(font_scale = 2)
plt.style.use('seaborn-white')
plt.rc("axes", labelweight="bold")
from IPython.display import display, HTML

#to load files
import os
import sys
import h5py
import pickle
from tensorflow import keras

#append repo folder to search path
#import cusotm functions
from EMG_gestures.utils import *
from EMG_gestures.analysis import *



In [19]:
 from sklearn.manifold import TSNE
from sklearn.model_selection import KFold


from tensorflow import keras
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.models import Sequential, Model, load_model, Sequential, save_model
from tensorflow. keras.layers import Dense, Activation, Dropout, Input,  TimeDistributed, GRU, Masking, LSTM
from keras.callbacks import EarlyStopping

from tensorflow.keras.utils import to_categorical
 
 
 def nn_xsubject_joint_data_train_frac_subjects(feature_matrix, target_labels, sub_labels, block_labels, model_dict, exclude,\
                                                    score_list, n_splits = 4,\
                                                    verbose = 0, epochs = 1000, batch_size = 5, es_patience = 5,validation_split = 0.25,\
                                                mv = False, permute = False):
    """
    train and validate a vanilla neural net model using data from multiple subjects 
    train and validate model performance by splitting subjects into a train and test set
    """

    #subjects in list. there are the units over which we will do train/test split
    subs = np.unique(sub_labels)

    if permute:
        #permute while ignoring excluded blocks
        target_labels = permute_class_within_sub(target_labels, block_labels, sub_labels, exclude)


    #initialize object for k-fold cross-validation
    kf = KFold(n_splits=n_splits,shuffle = True)
    #initialize empty arrays

    n_scores = len(score_list)
    train_scores_all = np.empty((n_splits,n_scores))
    test_scores_all = np.empty((n_splits,n_scores))
    train_history = dict()

    train_info_dict = {'val_loss': np.empty((n_splits,)),\
                    'train_loss': np.empty((n_splits,)),\
                    'epochs_trained':np.empty((n_splits,))}

    for split_count, (subs_train_idxs, subs_test_idxs) in enumerate(kf.split(subs)):
        print('Split Count: %i'% (split_count+1))

        #get train and test indices
        train_subs = subs[subs_train_idxs]
        test_subs = subs[subs_test_idxs]
        train_idxs = np.where(np.isin(sub_labels,train_subs, invert = False))[0]
        test_idxs = np.where(np.isin(sub_labels,test_subs, invert = False))[0]

        #get trained model
        train_scores, trained_model, scaler, history = get_trained_model(feature_matrix, target_labels, train_idxs, model_dict, exclude,\
                                                                            score_list,\
                                                                            verbose = verbose, epochs = epochs, batch_size = batch_size,\
                                                                            es_patience = es_patience,\
                                                                            validation_split = validation_split,\
                                                                            mv = mv)
        #save training details to dict
        train_info_dict['train_loss'][split_count] = history.history['loss'][-1]
        train_info_dict['val_loss'][split_count] = history.history['val_loss'][-1]
        train_info_dict['epochs_trained'][split_count] = len(history.history['val_loss'])

        #Evaluating on held-out subjects
        test_scores = evaluate_trained_nn(feature_matrix, target_labels, test_idxs, exclude, trained_model, score_list,scaler, mv = mv)

        #put scores in array
        train_scores_all[split_count,:] = train_scores
        test_scores_all[split_count,:] = test_scores


    #put in data frame
    results_df = []
    data_dict = {'Fold':np.arange(n_splits)+1,\
                    'Type':['Train' for x in range(n_splits)],\
                    'Epochs':[epochs for x in range(n_splits)],\
                'Batch_Size':[batch_size for x in range(n_splits)],\
                'Train_Loss':train_info_dict['train_loss'],\
                    'Val_Loss':train_info_dict['val_loss'],\
                    'Epochs_Trained':train_info_dict['epochs_trained'],\
                }
    for sidx in range(n_scores):
        data_dict['%s_score'%(score_list[sidx])] = train_scores_all[:,sidx]
    results_df.append(pd.DataFrame(data_dict))

    data_dict = {'Fold':np.arange(n_splits)+1,\
                    'Type':['Test' for x in range(n_splits)],\
                    'Epochs':[epochs for x in range(n_splits)],\
                'Batch_Size':[batch_size for x in range(n_splits)],\
                'Train_Loss':train_info_dict['train_loss'],\
                    'Val_Loss':train_info_dict['val_loss'],\
                    'Epochs_Trained':train_info_dict['epochs_trained'],\
                }
    for sidx in range(n_scores):
        data_dict['%s_score'%(score_list[sidx])] = test_scores_all[:,sidx]
    results_df.append(pd.DataFrame(data_dict))

    results_df = pd.concat(results_df,axis = 0)

    return results_df


def nn_xsubject_joint_data_train_all_subjects(feature_matrix, target_labels, sub_labels, block_labels, model_dict,exclude,\
                                                     score_list, verbose = 0, epochs = 40, batch_size = 2, validation_split = 0.25,\
                                              es_patience = 5, mv = False, permute = False):
    """
    train and validate a logistic regression model using data from multiple subjects 
    train on all subjects
    """

    #subjects in list. there are the units over which we will do train/test split
    subs = np.unique(sub_labels)

    if permute:
        #permute while ignoring excluded blocks
        target_labels = permute_class_within_sub(target_labels, block_labels, sub_labels, exclude)



    n_scores = len(score_list)


    train_subs = subs
    train_idxs = np.where(np.isin(sub_labels,train_subs, invert = False))[0]
    #get trained model
    train_scores, trained_model, scaler, history = get_trained_model(feature_matrix, target_labels, train_idxs, model_dict, exclude,\
                                                                        score_list,\
                                                                        verbose = verbose, epochs = epochs, batch_size = batch_size,\
                                                                        es_patience = es_patience,\
                                                                        validation_split = validation_split,\
                                                                        mv = mv)


    #put in data frame
    data_dict = {'Type':'Train',\
                  'Batch_Size':batch_size,\
             'Train_Loss': history.history['loss'][-1],\
                 'Val_Loss': history.history['loss'][-1],\
                 'Epochs_Trained': len(history.history['loss'][-1]),\
                 }
    for sidx in range(n_scores):
        data_dict['%s_score'%(score_list[sidx])] = train_scores[sidx]
    results_df = pd.DataFrame(data_dict, index = [0])


    return results_df, trained_model, scaler

In [13]:
#debug here
feature_matrix = feature_matrix_all.copy()
target_labels = target_labels_all.copy()
sub_labels = subject_id_all.copy()
block_labels =  block_labels_all.copy()
model_dict = {'fe_layers':1, 'fe_activation':'tanh'}
exclude = [0,7]
score_list = ['f1']
n_splits = 4
verbose = 0
epochs = 1000
batch_size = 2
es_patience = 5
mv = None
permute = False
validation_split = 0.25

Split Count: 1
Training Model
Evaluate Model on Trained Data
Evaluate Model
Split Count: 2
Training Model
Evaluate Model on Trained Data
Evaluate Model
Split Count: 3
Training Model
Evaluate Model on Trained Data
Evaluate Model
Split Count: 4
Training Model
Evaluate Model on Trained Data
Evaluate Model


In [21]:
#define hyper params for each model
model_dict = {0:{'fe_layers':0, 'fe_activation':''},\
              1:{'fe_layers':1, 'fe_activation':'tanh'},\
              2:{'fe_layers':1, 'fe_activation':'relu'},\
              3:{'fe_layers':2, 'fe_activation':'tanh'},\
              4:{'fe_layers':2, 'fe_activation':'relu'},\
              }


In [6]:
#define where the data files are located
data_folder = '/content/drive/MyDrive/EMG_gestures/EMG_data/'

nsubjects = 36

#randomly-selected subjects to use as hold-out test data 
test_subjects = [10, 12, 20, 14, 23, 34,  0]

# User-defined parameters
lo_freq = 20 #lower bound of bandpass filter
hi_freq = 450 #upper bound of bandpass filter

win_size = 100 #define window size over which to compute time-domain features
step = win_size #keeping this parameter in case we want to re-run later with some overlap


In [7]:
#intialize empty lists
feature_matrix_all = np.empty((0,0))
target_labels_all = np.empty((0,))
window_tstamps_all = np.empty((0,))
block_labels_all  = np.empty((0,))
subject_id_all = np.empty((0,))
block_count = 0

for subject_id in range(1,nsubjects+1):
    if subject_id not in test_subjects:
        subject_folder = os.path.join(data_folder,'%02d'%(subject_id))
        print('=======================')
        print(subject_folder)

        # Process data and get features 
        #get features across segments and corresponding info
        feature_matrix, target_labels, window_tstamps, \
        block_labels, series_labels = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                                        win_size, step)

        #prevent repeat of block labels by increasing block count
        block_labels = block_labels+block_count
        block_count = np.max([block_count, np.max(block_labels)])

        # concatenate lists
        feature_matrix_all = np.vstack((feature_matrix_all,feature_matrix)) if feature_matrix_all.size else feature_matrix
        target_labels_all = np.hstack((target_labels_all,target_labels))
        window_tstamps_all = np.hstack((window_tstamps_all,window_tstamps))
        block_labels_all = np.hstack((block_labels_all,block_labels))
        subject_id_all = np.hstack((subject_id_all,np.ones((block_labels.size))*subject_id))
        

/content/drive/MyDrive/EMG_gestures/EMG_data/01
/content/drive/MyDrive/EMG_gestures/EMG_data/02
/content/drive/MyDrive/EMG_gestures/EMG_data/03
/content/drive/MyDrive/EMG_gestures/EMG_data/04
/content/drive/MyDrive/EMG_gestures/EMG_data/05
/content/drive/MyDrive/EMG_gestures/EMG_data/06
/content/drive/MyDrive/EMG_gestures/EMG_data/07
/content/drive/MyDrive/EMG_gestures/EMG_data/08
/content/drive/MyDrive/EMG_gestures/EMG_data/09
/content/drive/MyDrive/EMG_gestures/EMG_data/11
/content/drive/MyDrive/EMG_gestures/EMG_data/13
/content/drive/MyDrive/EMG_gestures/EMG_data/15
/content/drive/MyDrive/EMG_gestures/EMG_data/16
/content/drive/MyDrive/EMG_gestures/EMG_data/17
/content/drive/MyDrive/EMG_gestures/EMG_data/18
/content/drive/MyDrive/EMG_gestures/EMG_data/19
/content/drive/MyDrive/EMG_gestures/EMG_data/21
/content/drive/MyDrive/EMG_gestures/EMG_data/22
/content/drive/MyDrive/EMG_gestures/EMG_data/24
/content/drive/MyDrive/EMG_gestures/EMG_data/25
/content/drive/MyDrive/EMG_gestures/EMG_

In [23]:
# results_folder = '/content/drive/MyDrive/EMG_gestures/results_data/xsubject_joint_data/log_reg/'
# figure_folder = '/content/drive/MyDrive/EMG_gestures/figures/training_history/xsubject_joint_data/log_reg'


#RNN training args - all other arguments are the same
verbose = 0
epochs = 1000
batch_size = 5
es_patience = 5

# experiment params
n_splits = 4
nreps = 1

#excluded labels
exclude = [0,7]
#performance metrics
score_list = ['f1','accuracy']

model_id = 1
#for model_id in range(1,5+1):
np.random.seed(1)# Set seed for replicability
results_df = []
for rep in range(nreps):
    print('Model %d | Rep %d'%(model_id, rep+1))
    print('--True Data--')

    rep_results_df = nn_xsubject_joint_data_train_frac_subjects(feature_matrix_all, target_labels_all, subject_id_all,\
                                                                                    block_labels_all, model_dict[model_id], exclude,\
                                                                                    score_list = score_list,\
                                                                                    n_splits = n_splits,\
                                                                                    verbose = 0, epochs = epochs, batch_size = batch_size,\
                                                                                    es_patience = es_patience, mv = None, permute = False)
    #add details and concatenate dataframe
    rep_results_df['Shuffled'] = False
    rep_results_df['Rep'] =  rep+1
    results_df.append(rep_results_df)




    #repeat with shuffled data
    print('Model %d | Rep %d'%(model_id, rep+1))
    print('--Permuted Data--')
    rep_results_df  = nn_xsubject_joint_data_train_frac_subjects(feature_matrix_all, target_labels_all, subject_id_all,\
                                                                                    block_labels_all, model_dict[model_id], exclude,\
                                                                                    score_list = score_list,\
                                                                                    n_splits = n_splits,\
                                                                                    verbose = 0, epochs = epochs, batch_size = batch_size,\
                                                                                    es_patience = es_patience, mv = None,permute = True)
    #add details and concatenate dataframe
    rep_results_df['Shuffled'] = True
    rep_results_df['Rep'] =  rep+1
    results_df.append(rep_results_df)

#concatenate all data frames
results_df = pd.concat(results_df,axis = 0)

# # #save results to file
# results_fn = 'model_%02d_results.h5'%(model_id)
# results_df.to_hdf(os.path.join(results_folder,results_fn), key='results_df', mode='w')


Model 1 | Rep 1
--True Data--
Split Count: 1
Training Model
Evaluate Model on Trained Data
Evaluate Model
Split Count: 2
Training Model
Evaluate Model on Trained Data
Evaluate Model
Split Count: 3
Training Model
Evaluate Model on Trained Data
Evaluate Model
Split Count: 4
Training Model
Evaluate Model on Trained Data
Evaluate Model
Model 1 | Rep 1
--Permuted Data--
Split Count: 1
Training Model
Evaluate Model on Trained Data
Evaluate Model
Split Count: 2
Training Model
Evaluate Model on Trained Data
Evaluate Model
Split Count: 3
Training Model
Evaluate Model on Trained Data
Evaluate Model
Split Count: 4
Training Model
Evaluate Model on Trained Data
Evaluate Model


In [24]:
results_folder = '/content/drive/MyDrive/EMG_gestures/results_data/xsubject_joint_data/NN/'
model_dir = '/content/drive/MyDrive/EMG_gestures/model_data/xsubject_joint_data/NN/'

#network training args 
verbose = 0
epochs = 1000
batch_size = 2
es_patience = 5
validation_split = 0.25
nreps = 10

exclude = [0,7]

score_list = ['f1','accuracy']#performance metrics

model_id = 1
results_df = []
np.random.seed(1)
for rep in range(nreps):

    
    print('Model %i || Rep %02d'%(model_id, rep+1))
    print('----True Data----')
    rep_results_df,trained_model, scaler = nn_xsubject_joint_data_train_all_subjects(feature_matrix_all, target_labels_all,\
                                                                                     subject_id_all,\
                                                                                     block_labels_all, model_dict[model_id],exclude,\
                                                                                     score_list = score_list,\
                                                                                     verbose = verbose, epochs = epochs, batch_size = batch_size,\
                                                                                     es_patience = es_patience, validation_split = validation_split,\
                                                                                     mv = None, permute = False)
    #add details and concatenate dataframe
    rep_results_df['Shuffled'] = False
    rep_results_df['Rep'] =  rep+1
    results_df.append(rep_results_df)


    #save trained model
    model_fn = os.path.join(model_folder, 'trained_model_rep_%i_all_train_data.h5'%(rep))
    keras.models.save_model(trained_model, model_fn, save_format= 'h5')

    print('Model %i || Rep %02d'%(model_id, rep+1))
    print('----Permuted Data----')
    rep_results_df, b, c = nn_xsubject_joint_data_train_all_subjects(feature_matrix_all, target_labels_all,\
                                                                                     subject_id_all,\
                                                                                     block_labels_all, model_dict[model_id],exclude,\
                                                                                     score_list = score_list,\
                                                                                     verbose = verbose, epochs = epochs, batch_size = batch_size,\
                                                                                     es_patience = es_patience, validation_split = validation_split,\
                                                                                     mv = None, permute = False)
    #add details and concatenate dataframe
    rep_results_df['Shuffled'] = True
    rep_results_df['Rep'] =  rep+1
    results_df.append(rep_results_df)

results_df = pd.concat(results_df, axis = 0).reset_index()

results_fn = 'nn_joint_training_results.h5'%(model_id)
results_df.to_hdf(os.path.join(results_folder,results_fn), key='results_df', mode='w')

#save scaler (outisde of rep loop b/c it will always be the same)
scaler_fn = 'trained_scaler_all_training_data.pkl'#
with open(os.path.join(model_dir,scaler_fn), "wb") as output_file:
    pickle.dump(scaler, output_file)

Model 1 || Rep 01
----True Data----
Training Model
Evaluate Model on Trained Data


TypeError: ignored