In [2]:
#Run cell to mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# install package to have access to custom functions
%pip install /content/drive/MyDrive/EMG_gestures/ --use-feature=in-tree-build

Processing ./drive/MyDrive/EMG_gestures
Building wheels for collected packages: EMG-gestures
  Building wheel for EMG-gestures (setup.py) ... [?25l[?25hdone
  Created wheel for EMG-gestures: filename=EMG_gestures-0.1.0-py3-none-any.whl size=38021 sha256=7317a85bb51a8223dbb31381c631f60884cd1a128506797371cb8a7664185ffc
  Stored in directory: /tmp/pip-ephem-wheel-cache-izh82i4h/wheels/a2/b7/61/2147fa082a9e51bef5dcc38dd3f0898fe0554d62203c0e383e
Successfully built EMG-gestures
Installing collected packages: EMG-gestures
Successfully installed EMG-gestures-0.1.0


In [4]:
#import necessary packages

#our workhorses
import numpy as np
import pandas as pd
import scipy

#to visualize
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
#style params for figures
sns.set(font_scale = 2)
plt.style.use('seaborn-white')
plt.rc("axes", labelweight="bold")
from IPython.display import display, HTML

#to load files
import os
import sys
import h5py
import pickle
from tensorflow import keras

#append repo folder to search path
#import cusotm functions
from EMG_gestures.utils import *

In [5]:
#define hyper params for each model
model_dict = {1:{'n_grus':24, 'n_dense_pre':1, 'activation':'linear'},\
              2:{'n_grus':24, 'n_dense_pre':1, 'activation':'tanh'},\
              3:{'n_grus':24, 'n_dense_pre':1, 'activation':'relu'},\
              4:{'n_grus':24, 'n_dense_pre':2, 'activation':'tanh'},\
              5:{'n_grus':24, 'n_dense_pre':2, 'activation':'relu'},\
              6:{'n_grus':24, 'n_dense_pre':3, 'activation':'tanh'},\
              7:{'n_grus':24, 'n_dense_pre':3, 'activation':'relu'},\
}


In [6]:
#define where the data files are located
data_folder = '/content/drive/MyDrive/EMG_gestures/EMG_data/'

nsubjects = 5

#randomly-selected subjects to use as hold-out test data 
test_subjects = [17, 23,  7,  8,  3]

# User-defined parameters
lo_freq = 20 #lower bound of bandpass filter
hi_freq = 450 #upper bound of bandpass filter

win_size = 100 #define window size over which to compute time-domain features
step = win_size #keeping this parameter in case we want to re-run later with some overlap

In [7]:
#intialize empty lists
feature_matrix_all = np.empty((0,0))
target_labels_all = np.empty((0,))
window_tstamps_all = np.empty((0,))
block_labels_all  = np.empty((0,))
series_labels_all  = np.empty((0,))
subject_id_all = np.empty((0,))
block_count = 0

for subject_id in range(1,nsubjects+1):
    if subject_id not in test_subjects:
        subject_folder = os.path.join(data_folder,'%02d'%(subject_id))
        print('=======================')
        print(subject_folder)

        # Process data and get features 
        #get features across segments and corresponding info
        feature_matrix, target_labels, window_tstamps, \
        block_labels, series_labels = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                                        win_size, step)

        #prevent repeat of block labels by increasing block count
        block_labels = block_labels+block_count
        block_count = np.max([block_count, np.max(block_labels)])

        # concatenate lists
        feature_matrix_all = np.vstack((feature_matrix_all,feature_matrix)) if feature_matrix_all.size else feature_matrix
        target_labels_all = np.hstack((target_labels_all,target_labels))
        window_tstamps_all = np.hstack((window_tstamps_all,window_tstamps))
        block_labels_all = np.hstack((block_labels_all,block_labels))
        series_labels_all = np.hstack((series_labels_all,series_labels))
        subject_id_all = np.hstack((subject_id_all,np.ones((block_labels.size))*subject_id))

/content/drive/MyDrive/EMG_gestures/EMG_data/01
/content/drive/MyDrive/EMG_gestures/EMG_data/02
/content/drive/MyDrive/EMG_gestures/EMG_data/04
/content/drive/MyDrive/EMG_gestures/EMG_data/05


In [11]:
#network training args 
verbose = 0
epochs = 30
batch_size = 5

#validation scheme args
n_train_splits = 4
n_val_splits = 2
nreps = 2
nsets_training = 10

#excluded labels
exclude = [0,7]
#performance metrics
score_list = ['f1','accuracy']


results_folder = '/content/drive/MyDrive/EMG_gestures/results_data/xsubject_transform_module/RNN/'
model_id = 1

#for model_id in range(1,5+1):
results_model_df = []
np.random.seed(1)#to replicate results
for rep in range(nreps):

    print('Model %i || Rep %02d'%(model_id, rep+1))
    print('----True Data----')
    rep_results_df = rnn_xsubject_transform_module_train_frac_subjects(feature_matrix_all, target_labels_all, subject_id_all, block_labels_all,\
                                            series_labels_all, exclude, model_dict[model_id],score_list,\
                                            n_train_splits = n_train_splits,n_val_splits = n_val_splits,\
                                            nsets_training = nsets_training, verbose = verbose, epochs = epochs, batch_size = batch_size,\
                                            permute = False)
    #add details and concatenate dataframe
    rep_results_df['Shuffled'] = False
    rep_results_df['Rep'] =  rep+1
    rep_results_df['Model'] = model_id
    results_model_df.append(rep_results_df)

    print('Model %i || Rep %02d'%(model_id, rep+1))
    print('----Permuted Data----')
    rep_results_df = rnn_xsubject_transform_module_train_frac_subjects(feature_matrix_all, target_labels_all, subject_id_all, block_labels_all,\
                                                    series_labels_all, exclude, model_dict[model_id],score_list,\
                                                    n_train_splits = n_train_splits,n_val_splits = n_val_splits,\
                                                    nsets_training = nsets_training, verbose = verbose, epochs = epochs, batch_size = batch_size, \
                                                    permute = True)
    # add details and concatenate dataframe
    rep_results_df['Shuffled'] = True
    rep_results_df['Rep'] =  rep+1
    rep_results_df['Model'] = model_id
    results_model_df.append(rep_results_df)

results_model_df = pd.concat(results_model_df,axis = 0)
#     #save results to file
#     results_fn = 'model_%02d_results.h5'%(model_id)
#     results_model_df.to_hdf(os.path.join(results_folder,results_fn), key='results_df', mode='w')
# print('***Finished!**')

Model 1 || Rep 01
----True Data----
-------Split Count: 1-------
Training: Subject 01 out of 03
Training Model
Evaluate Model on Trained Data
Training: Subject 02 out of 03
Training Model
Evaluate Model on Trained Data
Training: Subject 03 out of 03
Training Model
Evaluate Model on Trained Data
Validation: Subject 01 out of 01
-------Split Count: 2-------
Training: Subject 01 out of 03
Training Model
Evaluate Model on Trained Data
Training: Subject 02 out of 03
Training Model
Evaluate Model on Trained Data
Training: Subject 03 out of 03
Training Model
Evaluate Model on Trained Data
Validation: Subject 01 out of 01
-------Split Count: 3-------
Training: Subject 01 out of 03
Training Model
Evaluate Model on Trained Data
Training: Subject 02 out of 03
Training Model
Evaluate Model on Trained Data
Training: Subject 03 out of 03
Training Model
Evaluate Model on Trained Data
Validation: Subject 01 out of 01
-------Split Count: 4-------
Training: Subject 01 out of 03
Training Model
Evaluate M

In [13]:
results_model_df.groupby(['Shuffled','Type']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Subject,Fold,f1_score,accuracy_score,Rep,Model
Shuffled,Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
False,Train,4.0,2.5,0.826343,0.834566,1.5,1.0
False,Val_Test,4.0,2.5,0.738671,0.736481,1.5,1.0
False,Val_Train,4.0,2.5,0.964272,0.964166,1.5,1.0
True,Train,4.0,2.5,0.342371,0.385475,1.5,1.0
True,Val_Test,4.0,2.5,0.303264,0.323043,1.5,1.0
True,Val_Train,4.0,2.5,0.849205,0.856673,1.5,1.0


In [8]:
model_dict = model_dict[1]

In [16]:
feature_matrix = feature_matrix_all.copy()
target_labels = target_labels_all.copy()
sub_labels = subject_id_all.copy()
block_labels = block_labels_all.copy()
series_labels = series_labels_all.copy()

permute = False
mv = None

In [81]:
results_df.groupby('Type').mean()

Unnamed: 0_level_0,Subject,Fold,f1_score,accuracy_score
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Train,4.0,2.5,0.81512,0.832551
Val_Test,4.0,2.5,0.722993,0.72984
Val_Train,4.0,2.5,0.968468,0.968482


Validation: Subject 01 out of 01


In [77]:
results_df.groupby

Unnamed: 0,Subject,Fold,Type,f1_score,accuracy_score
0,3.0,4,Train,0.713112,0.736952
1,6.0,4,Train,0.894133,0.894775
2,5.0,4,Train,0.913993,0.914367
0,2.0,4,Val_Train,0.976725,0.976744
1,2.0,4,Val_Train,0.970112,0.957746
0,2.0,4,Val_Test,0.726306,0.746479
1,2.0,4,Val_Test,0.965126,0.965116


In [9]:
#ML packages
from sklearn.linear_model import  LogisticRegression
from sklearn.metrics import accuracy_score, f1_score,make_scorer, log_loss
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.manifold import TSNE
from sklearn.model_selection import KFold


from tensorflow import keras
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.models import Sequential, Model, load_model, Sequential, save_model
from tensorflow. keras.layers import Dense, Activation, Dropout, Input,  TimeDistributed, GRU, Masking, LSTM

from tensorflow.keras.utils import to_categorical

In [None]:
def rnn_xsubject_transform_module_train_all_subjects(feature_matrix, target_labels, sub_labels, block_labels,\
                                                         train_idxs, test_idxs, exclude, model_dict, score_list,\
                                                         figure_folder = '', model_folder = '', nsets_training = 10,\
                                                         verbose = 0, epochs = 40, batch_size = 2, mv = None, permute = False):
    """
    train and validate an RNN model with a transform module for domain adaptation. 
    validate model performance by holding out indicated samples for each subject

    """

    #default values
    if 'n_dense_post' not in model_dict.keys():
        model_dict['n_dense_post'] = 0
    if 'n_grus' not in model_dict.keys():
        model_dict['n_grus'] = 24

    results_df = []
    subs = np.unique(sub_labels)

    if permute:
        #permute while ignoring excluded blocks
        target_labels = permute_class_within_sub(target_labels, block_labels, sub_labels, exclude)

    in_samples = np.where(np.isin(target_labels,exclude, invert = True))[0]
    #get training data cubes

    X_train_cube, Y_train_cube, scaler = prepare_data_for_RNN(feature_matrix, target_labels, train_idxs, exclude, train = True,\
                                                                block_labels = block_labels, nsets = nsets_training)
    sub_labels_train = sub_labels[np.intersect1d(train_idxs,in_samples)]

    #testfor equal number of samples
    assert X_train_cube.shape[0] == Y_train_cube.shape[0]
    #testfor equal number of timepoints
    assert X_train_cube.shape[1] == Y_train_cube.shape[1]
    n_features, n_outputs = X_train_cube.shape[2], Y_train_cube.shape[2]

    if test_idxs.size>0:
        #get testing data cubes
        X_test_cube, Y_test_cube, scaler = prepare_data_for_RNN(feature_matrix, target_labels, test_idxs, exclude, train = False, scaler = scaler)
        sub_labels_test = sub_labels[np.intersect1d(test_idxs,in_samples)]

    # permute order in which subjects' data is used for training
    subs_perm = np.random.permutation(subs)

    #initialize empty list
    n_scores = len(score_list)
    train_scores = np.empty((subs.size,n_scores))
    test_scores = np.empty((subs.size,n_scores))

    # --- Training Stage ---
    # Define model architecture

    #setting timestep dimension to None 
    model = get_rnn_model((None,n_features,),n_outputs,n_dense_pre=model_dict['n_dense_pre'], n_dense_post=model_dict['n_dense_post'],\
                            n_grus = model_dict['n_grus'], activation=model_dict['activation'])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    #model.summary
    # # Get transform module template

    transform_module_template = get_transform_module(model, Input(shape = (None,n_features)),3 + model_dict['n_dense_post'])


    # iterate thorugh subjects' data
    for sub_idx, train_sub in enumerate(subs_perm):
        print('Training: Subject %02d out of %02d'%(sub_idx+1, subs_perm.size))
        # get subject-specific samples
        train_sub_idxs = np.where(sub_labels_train == train_sub)[0]

        X_cube_sub = X_train_cube[:,train_sub_idxs,:]
        Y_cube_sub = Y_train_cube[:,train_sub_idxs,]
        # initialize weights of the transform module
        model = tm_template_weights_to_model(transform_module_template, model)

        print('Training Model')
        # fit network
        history = model.fit(X_cube_sub, Y_cube_sub, epochs=epochs, batch_size=batch_size, verbose=verbose)
        if figure_folder:
            #plot training loss
            fig_title = 'Subject %02d'%(train_sub)
            fig_fn = os.path.join(figure_folder,'rnn_model_subject_%02d_all_train_data_permuted_%s_loss.png'%(train_sub,str(permute)))
            plot_train_loss(history, fig_title, fig_fn)

        #copy weights to a transfer module template, save if wanted
        trained_transfer_module = model_weights_to_tm_template(transform_module_template, model)
        if model_folder:
            #save trained transfer module to file
            model_fn = os.path.join(model_folder, 'transform_module_subject_%02d_all_train_data_permuted_%s.h5'%(train_sub, str(permute)))
            keras.models.save_model(trained_transfer_module, model_fn, save_format= 'h5')
        # # evaluate trained network
        print('Evaluate Model on Trained Data')

        if mv:
            # get f1 score after applying majority voting scheme to model predictions
            train_scores[sub_idx,:]  = apply_mv_and_get_scores(feature_matrix, target_labels,\
                                                                np.intersect1d(np.where(sub_labels==train_sub)[0],train_idxs), exclude,\
                                                                scaler, model, mv, score_list, rnn = True)
            if test_idxs.size>0:
                test_scores[sub_idx,:] = apply_mv_and_get_scores(feature_matrix, target_labels, \
                                                                    np.intersect1d(np.where(sub_labels==train_sub)[0],test_idxs), exclude,\
                                                                    scaler, model, mv, score_list, rnn = True)
        else:
            #get score for training data
            train_scores[sub_idx,:]  = get_scores(X_cube_sub, Y_cube_sub, model, score_list, rnn = True)
            if test_idxs.size>0:
                #get score for test data
                test_sub_idxs = np.where(sub_labels_test == train_sub)[0]
                test_scores[sub_idx,:]  = get_scores(X_test_cube[:,test_sub_idxs,:], Y_test_cube[:,test_sub_idxs,:], model, score_list, rnn = True)

    #put results in dataframe
    data_dict = {'Subject':subs_perm,\
                 'Type':['Train' for x in range(subs_perm.size)]}
    for sidx in range(n_scores):
        data_dict['%s_score'%(score_list[sidx])] = train_scores[:,sidx]
    results_df.append(pd.DataFrame(data_dict))

    if test_idxs.size>0:
            data_dict = {'Subject':subs_perm,\
                         'Type':['Train_val' for x in range(subs_perm.size)]}
            for sidx in range(n_scores):
                data_dict['%s_score'%(score_list[sidx])] = test_scores[:,sidx]
            results_df.append(pd.DataFrame(data_dict))
        
    results_df = pd.concat(results_df,axis = 0)

    if model_folder:
        #save complete model to file
        model_fn = os.path.join(model_folder, 'trained_model_all_train_data_permuted_%s.h5'%(str(permute)))
        keras.models.save_model(model, model_fn, save_format= 'h5')
    return results_df, scaler