In [None]:
#Run cell to mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# install package to have access to custom functions
%pip install /content/drive/MyDrive/EMG_gestures/ --use-feature=in-tree-build

In [None]:
#import necessary packages

#our workhorses
import numpy as np
import pandas as pd
import scipy

#to visualize
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
#style params for figures
sns.set(font_scale = 2)
plt.style.use('seaborn-white')
plt.rc("axes", labelweight="bold")
from IPython.display import display, HTML

#to load files
import os
import sys
import h5py
import pickle
from tensorflow import keras

#append repo folder to search path
#import cusotm functions
from EMG_gestures.utils import *

In [None]:
#define hyper params for each model
model_dict = {1:{'n_grus':24, 'n_dense_pre':0, 'n_dense_post': 0, 'activation':''},\
              2:{'n_grus':24, 'n_dense_pre':1, 'n_dense_post': 0, 'activation':'tanh'},\
              3:{'n_grus':24, 'n_dense_pre':2, 'n_dense_post': 0, 'activation':'tanh'},\
              4:{'n_grus':24, 'n_dense_pre':3, 'n_dense_post': 0, 'activation':'tanh'},\
              5:{'n_grus':24, 'n_dense_pre':1, 'n_dense_post': 0, 'activation':'relu'},\
              6:{'n_grus':24, 'n_dense_pre':2, 'n_dense_post': 0, 'activation':'relu'},\
              7:{'n_grus':24, 'n_dense_pre':3, 'n_dense_post': 0, 'activation':'relu'},\
              8:{'n_grus':24, 'n_dense_pre':0, 'n_dense_post': 1, 'activation':'tanh'},\
              9:{'n_grus':24, 'n_dense_pre':0, 'n_dense_post': 2, 'activation':'tanh'},\
              10:{'n_grus':24, 'n_dense_pre':0, 'n_dense_post': 3, 'activation':'tanh'},\
              11:{'n_grus':24, 'n_dense_pre':0, 'n_dense_post': 1, 'activation':'relu'},\
              12:{'n_grus':24, 'n_dense_pre':0, 'n_dense_post': 2, 'activation':'relu'},\
              13:{'n_grus':24, 'n_dense_pre':0, 'n_dense_post': 3, 'activation':'relu'},\
              14:{'n_grus':24, 'n_dense_pre':2, 'n_dense_post': 2, 'activation':'tanh'},\
              15:{'n_grus':24, 'n_dense_pre':2, 'n_dense_post': 2, 'activation':'relu'},\
              }

In [None]:
#define where the data files are located
data_folder = '/content/drive/MyDrive/EMG_gestures/EMG_data/'

nsubjects = 36

#randomly-selected subjects to use as hold-out test data 
test_subjects = [17, 23,  7,  8,  3]

# User-defined parameters
lo_freq = 20 #lower bound of bandpass filter
hi_freq = 450 #upper bound of bandpass filter

win_size = 100 #define window size over which to compute time-domain features
step = win_size #keeping this parameter in case we want to re-run later with some overlap

In [None]:
#intialize empty lists
feature_matrix_all = np.empty((0,0))
target_labels_all = np.empty((0,))
window_tstamps_all = np.empty((0,))
block_labels_all  = np.empty((0,))
subject_id_all = np.empty((0,))
block_count = 0

for subject_id in range(1,nsubjects+1):
    if subject_id not in test_subjects:
        subject_folder = os.path.join(data_folder,'%02d'%(subject_id))
        print('=======================')
        print(subject_folder)

        # Process data and get features 
        #get features across segments and corresponding info
        feature_matrix, target_labels, window_tstamps, \
        block_labels, series_labels = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                                        win_size, step)

        #prevent repeat of block labels by increasing block count
        block_labels = block_labels+block_count
        block_count = np.max([block_count, np.max(block_labels)])

        # concatenate lists
        feature_matrix_all = np.vstack((feature_matrix_all,feature_matrix)) if feature_matrix_all.size else feature_matrix
        target_labels_all = np.hstack((target_labels_all,target_labels))
        window_tstamps_all = np.hstack((window_tstamps_all,window_tstamps))
        block_labels_all = np.hstack((block_labels_all,block_labels))
        subject_id_all = np.hstack((subject_id_all,np.ones((block_labels.size))*subject_id))

In [None]:
results_folder = '/content/drive/MyDrive/EMG_gestures/results_data/xsubject_joint_data/RNN/'
figure_folder = '/content/drive/MyDrive/EMG_gestures/figures/training_history/xsubject_joint_data/RNN'


#RNN training args - all other arguments are the same
verbose = 0
epochs = 30
batch_size = 5
validation_split = 0.1
# experiment params
n_splits = 4
nsets_training = 10
nreps = 10

#excluded labels
exclude = [0,7]
#performance metrics
score_list = ['f1','accuracy']

model_id = 1
rep = 0
#for model_id in range(1,5+1):
np.random.seed(1)# Set seed for replicability
results_df = []
   # for rep in range(nreps):
print('Model %d | Rep %d'%(model_id, rep+1))
print('--True Data--')
rep_results_df, train_history = log_reg_rnn_joint_data_train_frac_subjects(feature_matrix_all, target_labels_all, subject_id_all,\
                                                                                block_labels_all, exclude,\
                                                                                model_dict = model_dict[model_id], score_list = score_list,\
                                                                                n_splits = n_splits, nsets_training = nsets_training,\
                                                                                verbose = verbose, epochs = epochs, batch_size = batch_size,\
                                                                                validation_split = validation_split, mv = None, permute = False)
    #     #add details and concatenate dataframe
    #     rep_results_df['Shuffled'] = False
    #     rep_results_df['Rep'] =  rep+1
    #     results_df.append(rep_results_df)


    #     #plot training history
    #     fig_title = 'Log reg model %02d; rep %i'%(model_id,rep)
    #     fig_fn = os.path.join(figure_folder,'log_reg_model_%02d_rep_%i_loss.png'%(model_id,rep))
    #     plot_training_history(train_history, fig_title,fig_fn)

    #     #repeat with shuffled data
    #     print('Model %d | Rep %d'%(model_id, rep+1))
    #     print('--Permuted Data--')
    #     rep_results_df, train_history = log_reg_xsubject_joint_data_train_frac_subjects(feature_matrix_all, target_labels_all, subject_id_all,\
    #                                                                                     block_labels_all, exclude,\
    #                                                                                     model_dict = model_dict[model_id], score_list = score_list,\
    #                                                                                     n_splits = n_splits, nsets_training = nsets_training,\
    #                                                                                     verbose = verbose, epochs = epochs, batch_size = batch_size,\
    #                                                                                     validation_split = validation_split, mv = None, permute = True)
    #     #add details and concatenate dataframe
    #     rep_results_df['Shuffled'] = True
    #     rep_results_df['Rep'] =  rep+1
    #     results_df.append(rep_results_df)

    # #concatenate all data frames
    # results_df = pd.concat(results_df,axis = 0)

    # # #save results to file
    # results_fn = 'model_%02d_results.h5'%(model_id)
    # results_df.to_hdf(os.path.join(results_folder,results_fn), key='results_df', mode='w')


In [None]:
# def rnn_xsubject_joint_data_train_frac_subjects(feature_matrix, target_labels, sub_labels, block_labels, exclude,\
#                                                     model_dict, score_list, n_splits = 4, nsets_training = 10,\
#                                                     verbose = 0, epochs = 40, batch_size = 2, validation_split = 0.1, mv = False, permute = False):
#     """
#     train and validate a RNN model using data from multiple subjects 
#     train and validate model performance by splitting subjects into a train and test set
#     """

#subjects in list. there are the units over which we will do train/test split
subs = np.unique(sub_labels)

if permute:
    #permute while ignoring excluded blocks
    target_labels = permute_class_within_sub(target_labels, block_labels, sub_labels, exclude)


#initialize object for k-fold cross-validation
kf = KFold(n_splits=n_splits,shuffle = True)
#initialize empty arrays

n_scores = len(score_list)
train_scores_all = np.empty((n_splits,n_scores))
test_scores_all = np.empty((n_splits,n_scores))
train_history = dict()
train_history['loss'] = np.empty((0,0))
train_history['val_loss'] = np.empty((0,0))

for split_count, (subs_train_idxs, subs_test_idxs) in enumerate(kf.split(subs)):
    print('Split Count: %i'% (split_count+1))

    #get train and test indices
    train_subs = subs[subs_train_idxs]
    test_subs = subs[subs_test_idxs]
    train_idxs = np.where(np.isin(sub_labels,train_subs, invert = False))[0]
    test_idxs = np.where(np.isin(sub_labels,test_subs, invert = False))[0]

    #get trained model
    train_scores, trained_model, scaler, history = get_trained_rnn_model(feature_matrix, target_labels, train_idxs, block_labels, nsets_training,\
                                                                        exclude, model_dict, score_list,\
                                                                        verbose = verbose, epochs = epochs, batch_size = batch_size,\
                                                                        validation_split = validation_split,\
                                                                        mv = mv)
    #Evaluating on held-out subjects
    test_scores = evaluate_trained_rnn(feature_matrix, target_labels, test_idxs, exclude, trained_model, score_list,scaler, mv = mv)

    #put scores in array
    train_scores_all[split_count,:] = train_scores
    test_scores_all[split_count,:] = test_scores

    #append history
    train_history['loss'] = np.vstack((train_history['loss'],history.history['loss'])) if train_history['loss'].size else np.array(history.history['loss'])
    if validation_split>0:
        train_history['val_loss'] = np.vstack((train_history['val_loss'],history.history['val_loss'])) if train_history['val_loss'].size else np.array(history.history['val_loss']) 

#put in data frame
results_df = []
data_dict = {'Fold':np.arange(n_splits)+1,\
                'Type':['Train' for x in range(n_splits)]}
for sidx in range(n_scores):
    data_dict['%s_score'%(score_list[sidx])] = train_scores_all[:,sidx]
results_df.append(pd.DataFrame(data_dict))

data_dict = {'Fold':np.arange(n_splits)+1,\
                'Type':['Test' for x in range(n_splits)]}
for sidx in range(n_scores):
    data_dict['%s_score'%(score_list[sidx])] = test_scores_all[:,sidx]
results_df.append(pd.DataFrame(data_dict))

results_df = pd.concat(results_df,axis = 0)

return results_df, train_history

In [None]:
def rnn_xsubject_joint_data_train_all_subjects(feature_matrix, target_labels, sub_labels, block_labels, exclude,\
                                                    model_dict, score_list,
                                                    verbose = 0, epochs = 40, batch_size = 2, validation_split = 0.1, mv = False, permute = False):
    """
    train and validate a RNN model using data from multiple subjects 
    train on all subjects
    """

    #subjects in list. there are the units over which we will do train/test split
    subs = np.unique(sub_labels)

    if permute:
        #permute while ignoring excluded blocks
        target_labels = permute_class_within_sub(target_labels, block_labels, sub_labels, exclude)


    n_scores = len(score_list)
    train_subs = subs
    train_idxs = np.where(np.isin(sub_labels,train_subs, invert = False))[0]

    #get trained model
    train_scores, trained_model, scaler, history = get_trained_rnn_model(feature_matrix, target_labels, train_idxs, block_labels, nsets_training,\
                                                                        exclude, model_dict, score_list,\
                                                                        verbose = verbose, epochs = epochs, batch_size = batch_size,\
                                                                        validation_split = validation_split,\
                                                                        mv = mv)


    #put in data frame
    data_dict = {'Type':'Train'}
    for sidx in range(n_scores):
        data_dict['%s_score'%(score_list[sidx])] = train_scores[sidx]
    results_df = pd.DataFrame(data_dict, index = [0])


    return results_df, history, trained_model, scaler