In [62]:
#Run cell to mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [63]:
# install package to have access to custom functions
%pip install /content/drive/Othercomputers/'My MacBook Pro'/EMG_gestures/ --use-feature=in-tree-build

Processing ./drive/Othercomputers/My MacBook Pro/EMG_gestures
Building wheels for collected packages: EMG-gestures
  Building wheel for EMG-gestures (setup.py) ... [?25l[?25hdone
  Created wheel for EMG-gestures: filename=EMG_gestures-0.1.0-py3-none-any.whl size=30703 sha256=6242eb780fa92c3e04df027b0a43d6eb977e21b1daccde9218a5e1caad5085ef
  Stored in directory: /tmp/pip-ephem-wheel-cache-478vyp39/wheels/74/96/87/ceb916fceabb875209ae993e697bf574966ab592f4167a4958
Successfully built EMG-gestures
Installing collected packages: EMG-gestures
  Attempting uninstall: EMG-gestures
    Found existing installation: EMG-gestures 0.1.0
    Uninstalling EMG-gestures-0.1.0:
      Successfully uninstalled EMG-gestures-0.1.0
Successfully installed EMG-gestures-0.1.0


In [64]:
#import necessary packages

#our workhorses
import numpy as np
import pandas as pd
import scipy

#to visualize
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
#style params for figures
sns.set(font_scale = 2)
plt.style.use('seaborn-white')
plt.rc("axes", labelweight="bold")
from IPython.display import display, HTML

#to load files
import os
import sys
import h5py
import pickle

#import cusotm functions
from EMG_gestures.utils import *
#from EMG_gestures.analysis

In [65]:
from sklearn.model_selection import KFold


from tensorflow import keras
from keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.models import Sequential, Model, load_model, Sequential, save_model
from tensorflow. keras.layers import Dense, Activation, Dropout, Input,  TimeDistributed, GRU, Masking, LSTM

from tensorflow.keras.utils import to_categorical


from EMG_gestures.utils import *
from EMG_gestures.models import DANN, EarlyStopping_Custom

In [92]:
def get_trained_DANN(X, Y, train_idxs, test_idxs, model_dict = {}, exclude = [], score_list = ['f1'], verbose = 0, epochs = 40, batch_size = 2,\
                      es_patience = 5, validation_split = 0.25, mv = False):


    # #default values
    if 'fe_layers' not in model_dict.keys():
        model_dict['fe_layers'] = 1
    if 'activation' not in model_dict.keys():
        model_dict['activation'] = 'tanh'
    if 'dp_layers' not in model_dict.keys():
        model_dict['dp_layers'] = 0


    #exclude indicated labels
    in_samples = np.where(np.isin(Y,exclude, invert = True))[0]
    train_idxs_orig = train_idxs.copy()
    train_idxs = np.intersect1d(train_idxs,in_samples)

    test_idxs_orig = test_idxs.copy()
    in_samples = np.where(np.isin(Y,exclude, invert = True))[0]
    test_idxs = np.intersect1d(test_idxs,in_samples)

    #get training and testing data cubes
    X_train_cube, Y_train_cube, scaler = prepare_data_for_TF(X,Y, train_idxs, exclude, train = True)
    X_test_cube, Y_test_cube, scaler = prepare_data_for_TF(X,Y, test_idxs, exclude, scaler = scaler)

    #testfor equal number of samples
    assert X_train_cube.shape[0] == Y_train_cube.shape[0]
    n_features, n_outputs = X_train_cube.shape[1], Y_train_cube.shape[1]


    #define and compile model
    input_shape = (n_features,)
    dann_model = DANN(input_shape, n_outputs, fe_layers = model_dict['fe_layers'], dp_layers = model_dict['dp_layers'], activation = model_dict['activation'])
    dann_model.compile(loss='categorical_crossentropy')

    #train on source labels
    print('Training on Source Subject')
    es = EarlyStopping_Custom(mode='min', min_delta=.01, patience=es_patience)
    dann_model.train_label_pred(X_train_cube, Y_train_cube, validation_split = 0.25,\
                                epochs=epochs, batch_size=batch_size, verbose=verbose, callback = es)


    # # evaluate trained network
    print('Evaluate Model on Trained Data')

    if mv:
        train_scores = apply_mv_and_get_scores(X, Y, train_idxs_orig, exclude,\
                                                scaler, dann_model.predict_label, mv, score_list)
        test_scores = apply_mv_and_get_scores(X, Y, test_idxs_orig, exclude,\
                                                scaler, dann_model.predict_label, mv, score_list)
    else:
        #get score for training data
        train_scores = get_scores(X_train_cube, Y_train_cube, dann_model.predict_label, score_list)
        test_scores = get_scores(X_test_cube, Y_test_cube, dann_model.predict_label, score_list)

    return train_scores,test_scores, dann_model, scaler

In [81]:
 data_folder = '/content/drive/Othercomputers/My MacBook Pro/EMG_gestures/EMG_data/'
 src_subject_id = 11
 nsubjects = 3
 nreps = 1
 # User-defined parameters
lo_freq = 20 #lower bound of bandpass filter
hi_freq = 450 #upper bound of bandpass filter

win_size = 100 #define window size over which to compute time-domain features
step = win_size #keeping this parameter in case we want to re-run later with some overlap

exclude = [0,7]
score_list = ['f1']
model_dict = {'fe_layers':1,'activation':'tanh','dp_layers':1}

#network training args 
verbose = 0
epochs = 200
batch_size = 2
es_patience = 5

mv = False
permute = False

rep = 0

In [82]:
subject_folder = os.path.join(data_folder,'%02d'%(src_subject_id))
print('=======================')
print(subject_folder)

# Process data and get features 
#get features across segments and corresponding info
feature_matrix_src, target_labels_src, window_tstamps_src, \
block_labels_src, series_labels_src = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                            win_size, step)
target_labels_src_orig = target_labels_src.copy()#keep originals before permuting
train_idxs = np.arange(target_labels_src.size)
np.random.seed(1)#for reproducibility

results_df = []#initialize empty array for dataframes
n_scores = len(score_list)

/content/drive/Othercomputers/My MacBook Pro/EMG_gestures/EMG_data/11


In [83]:
rep = 0
print('Subject %d|Rep %d'%(src_subject_id, rep+1))
#for rep in range(nreps):
if permute:
    #permute while ignoring excluded blocks
    target_labels_src = permute_class_within_sub(target_labels_src_orig, block_labels_src, np.ones((target_labels_src.size,)), exclude)

Subject 11|Rep 1


In [84]:
#initialize object for k-fold cross-validation
n_splits = np.unique(series_labels_src).size
kf = KFold(n_splits=n_splits,shuffle = True)

src_train_scores_all = np.empty((n_splits,n_scores))
src_test_scores_all = np.empty((n_splits,n_scores))

for split_count, (series_train, series_test) in enumerate(kf.split(np.unique(series_labels_src))):
    print('Split Count: %i'% (split_count+1))
    #get train and test idxs
    src_train_idxs = np.where(series_labels_src==series_train)[0]
    src_test_idxs = np.where(series_labels_src==series_test)[0]

    

    

Split Count: 1
Split Count: 2


In [85]:

src_train_scores, src_test_scores, trained_model, scaler = get_trained_DANN(feature_matrix_src, target_labels_src, src_train_idxs, src_test_idxs, model_dict, \
                                                                        exclude, score_list = score_list,\
                                                                verbose = verbose, epochs = epochs, batch_size = batch_size,\
                                                                es_patience = es_patience, mv = mv)
src_train_scores_all[split_count,:] = src_train_scores
src_test_scores_all[split_count,:] = src_test_scores

Training on Source Subject
Early Stopping after 33 epochs
Evaluate Model on Trained Data


In [89]:
# test on all other subjects- with domain adapt
# initialize empty lists
test_scores_all = np.empty((0,0))
targ_subject_list = []
targ_subject_id = 1
#for targ_subject_id in range(1,nsubjects+1):
#    if targ_subject_id != src_subject_id:

subject_folder = os.path.join(data_folder,'%02d'%(targ_subject_id))
print('Target Subject :%s'%(subject_folder))

# Process data and get features 
#get features across segments and corresponding info
feature_matrix_targ, target_labels_targ, window_tstamps_targ, \
block_labels_targ, series_labels_targ = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                        win_size, step)


Target Subject :/content/drive/Othercomputers/My MacBook Pro/EMG_gestures/EMG_data/01


In [91]:
#initialize object for k-fold cross-validation
nsplits_targ = np.unique(series_labels_src).size
kf_targ = KFold(n_splits=nsplits_targ,shuffle = True)

targ_train_scores_all = np.empty((nsplits_targ,n_scores))
targ_test_scores_all = np.empty((nsplits_targ,n_scores))

for split_count_targ, (targ_series_train, targ_series_test) in enumerate(kf_targ.split(np.unique(series_labels_targ))):
    print('Split Count-Target: %i'% (split_count_targ+1))
    #get train and test idxs for target suject
    targ_train_idxs = np.where(series_labels_targ==targ_series_train)[0]
    targ_test_idxs = np.where(series_labels_targ==targ_series_test)[0]

Split Count-Target: 1
Split Count-Target: 2


In [96]:
X_src = feature_matrix_src.copy()
Y_src = target_labels_src.copy()
X_targ = feature_matrix_targ.copy()
Y_targ = target_labels_targ.copy()
###some fxn

In [103]:
src_train_X, src_train_Y, scaler = prepare_data_for_TF(X_src, Y_src, src_train_idxs, exclude, train = False, scaler = scaler)
src_test_X, src_test_Y, scaler =  prepare_data_for_TF(X_src, Y_src, src_test_idxs, exclude, train = False, scaler = scaler)
targ_train_X, targ_train_Y, scaler =  prepare_data_for_TF(X_targ, Y_targ, targ_train_idxs, exclude, train = False, scaler = scaler)
targ_test_X, targ_test_Y, scaler =  prepare_data_for_TF(X_targ, Y_targ, targ_test_idxs, exclude, train = False, scaler = scaler)

In [None]:
print('Adapting to target Domain')
es = EarlyStopping_Custom(mode='max', min_delta=.05, patience=es_patience)
trained_model.train_domain_adapt(X_source_cube, Y_source_cube, X_target_cube, validation_split = 0.25,\
                                epochs = epochs*3, batch_size = batch_size, verbose = 0, callback = es)

In [None]:
#to copy model initialize new model and transfer weights

In [61]:
trained_model.save_weights('tmp')

In [59]:
trained_model.get_layer('feature_extractor').summary()

Model: "feature_extractor"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_21 (InputLayer)        [(None, 16)]              0         
_________________________________________________________________
dense_8 (Dense)              (None, 16)                272       
_________________________________________________________________
dropout_8 (Dropout)          (None, 16)                0         
Total params: 272
Trainable params: 272
Non-trainable params: 0
_________________________________________________________________


Target Subject :/content/drive/Othercomputers/My MacBook Pro/EMG_gestures/EMG_data/01


In [21]:
test_scores = evaluate_trained_nn(feature_matrix_targ, target_labels_targ, test_idxs, exclude, trained_model,\
                                        score_list, scaler, mv = mv)
#append to list
test_scores_all = np.vstack((test_scores_all, test_scores)) if test_scores_all.size else test_scores
targ_subject_list.append(targ_subject_id)

0.8935321733657101

ValueError: ignored

In [None]:
source_train_idxs
source_test_idxs

domain_train_idxs
domain_test_idxs

In [43]:
#def adapt_and_evaluate_trained_DANN(X, Y, test_idxs, exclude, trained_model, score_list = ['f1'],scaler = None, mv = None):
    #exclude indicated labels
test_idxs_orig = test_idxs.copy()
in_samples = np.where(np.isin(Y,exclude, invert = True))[0]
test_idxs = np.intersect1d(test_idxs,in_samples)

#domain adapt
X_source_cube, Y_source_cube, scaler = prepare_data_for_TF(X_source,Y_source, train_idxs, exclude, train = False, scaler = scaler)
 # get testing data cubes
X_target_cube, Y_target_cube, scaler = prepare_data_for_TF(X_target,Y_target, test_idxs, exclude, train = False, scaler = scaler)


In [44]:
dann_model = keras.models.clone_model(trained_model)

ValueError: ignored

Adapting to target Domain
Early Stopping after 11 epochs


In [49]:
get_scores(X_source_cube, Y_source_cube, trained_model.predict_label, score_list)

0.979084444402611

In [46]:
test_scores = get_scores(X_test_cube, Y_test_cube, trained_model.predict_label, score_list)

In [47]:
print(test_scores)

0.5405535725104433


In [None]:
#score on source data
adapt_source_test_scores
#score on target data
adapt_target_train_scores
adapt_target_test_scores

In [None]:
    print('Evaluate Model')
    if mv:
         test_scores = apply_mv_and_get_scores(X, Y, test_idxs_orig, exclude,\
                                               scaler, trained_model.predict_label, mv, score_list)
    else:

       
        #get score for testing data
        test_scores = get_scores(X_test_cube, Y_test_cube, trained_model.predict_label, score_list)
    return test_scores

In [None]:
def across_subject_nn_performance(data_folder, src_subject_id, nsubjects, nreps, lo_freq, hi_freq, win_size, step, model_dict, exclude, score_list = ['f1'], \
                          verbose = 0, epochs = 40, batch_size = 2, es_patience = 5, mv = False, permute = False):
    
    
    subject_folder = os.path.join(data_folder,'%02d'%(src_subject_id))
    print('=======================')
    print(subject_folder)

    # Process data and get features 
    #get features across segments and corresponding info
    feature_matrix_src, target_labels_src, window_tstamps_src, \
    block_labels_src, series_labels_src = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                                win_size, step)
    target_labels_src_orig = target_labels_src.copy()#keep originals before permuting
    train_idxs = np.arange(target_labels_src.size)
    np.random.seed(1)#for reproducibility

    results_df = []#initialize empty array for dataframes
    n_scores = len(score_list)
    train_scores_all = np.empty((nreps,n_scores))
    train_info_dict = {'val_loss': np.empty((nreps,)),\
                    'train_loss': np.empty((nreps,)),\
                    'epochs_trained':np.empty((nreps,))}
    for rep in range(nreps):
        if permute:
            #permute while ignoring excluded blocks
            target_labels_src = permute_class_within_sub(target_labels_src_orig, block_labels_src, np.ones((target_labels_src.size,)), exclude)

        print('Subject %d|Rep %d'%(src_subject_id, rep+1))
        train_scores, trained_model, scaler, train_history = get_trained_model(feature_matrix_src, target_labels_src, train_idxs, model_dict, \
                                                                               exclude, score_list = score_list,\
                                                                        verbose = verbose, epochs = epochs, batch_size = batch_size,\
                                                                        es_patience = es_patience, mv = mv)
        print('Epochs Trained: %d'%(len(train_history.history['val_loss'])))
        train_scores_all[rep,:] = train_scores

        #save training details to dict
        train_info_dict['train_loss'][rep] = train_history.history['loss'][-1]
        train_info_dict['val_loss'][rep] = train_history.history['val_loss'][-1]
        train_info_dict['epochs_trained'][rep] = len(train_history.history['val_loss'])
        
        # test on all other subjects
        # initialize empty lists
        test_scores_all = np.empty((0,0))
        targ_subject_list = []
        for targ_subject_id in range(1,nsubjects+1):
            if targ_subject_id != src_subject_id:

                subject_folder = os.path.join(data_folder,'%02d'%(targ_subject_id))
                print('Target Subject :%s'%(subject_folder))

                # Process data and get features 
                #get features across segments and corresponding info
                feature_matrix_targ, target_labels_targ, window_tstamps_targ, \
                block_labels_targ, series_labels_targ = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                                        win_size, step)
                test_idxs = np.arange(target_labels_targ.size)

                test_scores = evaluate_trained_nn(feature_matrix_targ, target_labels_targ, test_idxs, exclude, trained_model,\
                                                       score_list, scaler, mv = mv)
                #append to list
                test_scores_all = np.vstack((test_scores_all, test_scores)) if test_scores_all.size else test_scores
                targ_subject_list.append(targ_subject_id)

        #put testing results in dataframe
        data_dict = {'Type':['Test' for x in range(nsubjects-1)],\
                     'Rep':[rep+1 for x in range(nsubjects-1)],\
                     'Test_Subject':targ_subject_list,\
                     'Epochs':[epochs for x in range(nsubjects-1)],\
                'Batch_Size':[batch_size for x in range(nsubjects-1)],\
                'Train_Loss':[train_info_dict['train_loss'][rep] for x in range(nsubjects-1)],\
                    'Val_Loss':[train_info_dict['val_loss'][rep] for x in range(nsubjects-1)],\
                    'Epochs_Trained':[train_info_dict['epochs_trained'][rep] for x in range(nsubjects-1)],\
                     }
        for sidx in range(n_scores):
            data_dict['%s_score'%(score_list[sidx])] = test_scores_all[:,sidx]
        results_df.append(pd.DataFrame(data_dict))


    # #put training results in dataframe
    data_dict = {'Type':['Train' for x in range(nreps)],\
                 'Rep':[x+1 for x in range(nreps)],\
                 'Test_Subject':[src_subject_id for x in range(nreps)],\
                 'Epochs':[epochs for x in range(nreps)],\
                'Batch_Size':[batch_size for x in range(nreps)],\
                'Train_Loss':train_info_dict['train_loss'],\
                    'Val_Loss':train_info_dict['val_loss'],\
                    'Epochs_Trained':train_info_dict['epochs_trained'],\
                 }
    for sidx in range(n_scores):
        data_dict['%s_score'%(score_list[sidx])] = train_scores_all[:,sidx]
    results_df.append(pd.DataFrame(data_dict))

    
    results_df = pd.concat(results_df, axis = 0).reset_index(drop = True)

    return results_df

In [None]:
#define where the data files are located


nsubjects = 10

#randomly-selected subjects to use as hold-out test data 
test_subjects = [10, 12, 20, 14, 23, 34,  0]



In [None]:
#intialize empty lists
feature_matrix_all = np.empty((0,0))
target_labels_all = np.empty((0,))
window_tstamps_all = np.empty((0,))
block_labels_all  = np.empty((0,))
series_labels_all  = np.empty((0,))
subject_id_all = np.empty((0,))
block_count = 0

for subject_id in range(1,nsubjects+1):
    if subject_id not in test_subjects:
        subject_folder = os.path.join(data_folder,'%02d'%(subject_id))
        print('=======================')
        print(subject_folder)

        # Process data and get features 
        #get features across segments and corresponding info
        feature_matrix, target_labels, window_tstamps, \
        block_labels, series_labels = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                                        win_size, step)

        #prevent repeat of block labels by increasing block count
        block_labels = block_labels+block_count
        block_count = np.max([block_count, np.max(block_labels)])


        # concatenate lists
        feature_matrix_all = np.vstack((feature_matrix_all,feature_matrix)) if feature_matrix_all.size else feature_matrix
        target_labels_all = np.hstack((target_labels_all,target_labels))
        window_tstamps_all = np.hstack((window_tstamps_all,window_tstamps))
        block_labels_all = np.hstack((block_labels_all,block_labels))
        series_labels_all = np.hstack((series_labels_all,series_labels))
        subject_id_all = np.hstack((subject_id_all,np.ones((block_labels.size))*subject_id))

In [None]:
#debug here
feature_matrix = feature_matrix_all.copy()
target_labels = target_labels_all.copy()
sub_labels = subject_id_all.copy()
block_labels =  block_labels_all.copy()
series_labels = series_labels_all.copy()
model_dict = {'tm_layers':0,'tm_activation':'linear','fe_layers':1, 'fe_activation':'tanh'}
exclude = [0,7]
score_list = ['f1']
n_train_splits = 4
n_val_splits = 2
verbose = 0
epochs = 1000
batch_size = 2
es_patience = 5
validation_split = 0.25
mv = None
permute = False

In [None]:
#iterate through all subjects as 

#train on source subject

# use remaining subjcts as target
#train same model wih domain labels of target

#held-out subjects will be used 