In [None]:
#Run cell to mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#import necessary packages

#our workhorses
import numpy as np
import pandas as pd
import scipy

#to visualize
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
#style params for figures
sns.set(font_scale = 2)
plt.style.use('seaborn-white')
plt.rc("axes", labelweight="bold")
from IPython.display import display, HTML

#to load files
import os
import sys
import h5py
import pickle

#append repo folder to search path
sys.path.append('/content/drive/MyDrive/limb-position-EMG-Repo/')
from utils import *

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import KFold

In [None]:
def get_rnn_model(input_shape, n_outputs, n_grus = 24, n_dense_pre = 0, n_dense_post = 0, drop_prob = 0.5, activation = 'tanh', mask_value = -100):
    """
    Create simple RNN model
    
    Args:
        input_shape
        n_outputs: number of output classes
        mask_value: value indicating which timepoints to mask out
            
    Returns:
        model
    """
    
    #define model architecture
    X_input = Input(shape = input_shape)
    X = Masking(mask_value=mask_value)(X_input)
    for n in range(n_dense_pre):
        X = TimeDistributed(Dense(input_shape[1],activation = activation))(X)
        X = Dropout(drop_prob)(X)
    X = GRU(n_grus, return_sequences= True, stateful = False)(X)
    X = Dropout(drop_prob)(X)
    for n in range(n_dense_post):
        X = TimeDistributed(Dense(n_grus,activation = activation))(X)
        X = Dropout(drop_prob)(X)
    X = TimeDistributed(Dense(n_outputs,activation = 'softmax'))(X)
    model = Model(inputs = X_input, outputs = X)
    return model

def RNN_on_labeled_data(feature_matrix, target_labels, window_tstamps, block_labels, model, n_splits = 4,\
                       verbose = 0, epochs = 40, batch_size = 2, permute = False):
    """
    Train and evaluate RNN model on labeled data
    
    Args:
        feature_matrix: 2D nuumpy array with data, dimensions [features, samples]
        window_blocks: 1D numpy array indicating block of provenance for input segment values
        train: Boolean indicating whether the input data is training data
        scaler: StandardScaler to transform data
        magic_value: integer indicating value with which to pad samples
            
    Returns:
        train_f1_scores: training scores for each split
        test_f1_scores: test scores for each split


    """
    
    # transpose data
    #feature_matrix = feature_matrix.T
    
    #initialize empty array
    train_f1_scores = np.empty((n_splits,))
    test_f1_scores = np.empty((n_splits,))


    #get block_ids and corresponding classes in block. there are the units over which we will do train/test split
    blocks = np.array([k for k,g in groupby(block_labels)])
    classes = np.array([k for k,g in groupby(target_labels) if k!=0])
    
    #permute class labels, if indicated
    if permute:
        #using indexing tricks to have this work out
        classes_perm = np.random.permutation(classes)
        target_labels_shuffled = np.empty((0,))
        for i,b in enumerate(blocks):
            idxs = np.where(block_labels==b)[0]
            target_labels_shuffled = np.hstack((target_labels_shuffled,classes_perm[i]*np.ones((idxs.size,))))
        target_labels = target_labels_shuffled
        classes = classes_perm
     
    
    #stratify split to retain ratio of class labels
    skf = StratifiedKFold(n_splits=n_splits,shuffle = True)
    print(block_labels.shape)
    print(target_labels.shape)
    print(blocks.shape,classes.shape)

    #systematically use one fold of the data as a held-out test set
    for split_count, (blocks_train_idxs, blocks_test_idxs) in enumerate(skf.split(blocks, classes)):
        print('Split Count: %i'% (split_count+1))

        #get train and test indices
        blocks_train = blocks[blocks_train_idxs]
        blocks_test = blocks[blocks_test_idxs]
        train_idxs =np.where(np.isin(block_labels,blocks_train))[0]
        test_idxs =np.where(np.isin(block_labels,blocks_test))[0]

        # select training data and pad to get an array where each sample has same number of timesteps
        X_train = feature_matrix[:,train_idxs]
        y_train = target_labels[train_idxs]
        #one-hot encoding of class labels
        y_train = to_categorical(y_train-np.min(y_train))
        #get block labels of given samples
        win_blocks_train = block_labels[train_idxs]

        #get cube
        X_train_cube, Y_train_cube, scaler = get_data_cube(X_train, y_train,win_blocks_train, train = True, magic_value = -100)
        print(X_train_cube.shape, Y_train_cube.shape)

        # select test data and pad to get an array where each sample has same number of timesteps
        X_test = feature_matrix[:,test_idxs]
        y_test = target_labels[test_idxs]
        #one-hot encoding of class labels
        y_test = to_categorical(y_test-np.min(y_test))
        #get block labels of given samples
        win_blocks_test = block_labels[test_idxs]
        #get data cube
        X_test_cube, Y_test_cube, scaler = get_data_cube(X_test, y_test, win_blocks_test, train = False, scaler = scaler, magic_value = -100)
        print(X_test_cube.shape, Y_test_cube.shape)

        n_timesteps, n_features, n_outputs = X_train_cube.shape[1], X_train_cube.shape[2], Y_test_cube.shape[2]

        # #setting timestep dimension to None 
        # model = many_to_many_model((None,n_features),n_outputs,mask_value = -100)
        # model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        # #model.summary

        print('Training Model')
        # fit network
        model.fit(X_train_cube, Y_train_cube, epochs=epochs, batch_size=batch_size, verbose=verbose)

        print('Evaluating Model')
        #evaluate model on train and test data
        train_f1_scores[split_count] = get_RNN_f1(X_train_cube, Y_train_cube, model)
        test_f1_scores[split_count] = get_RNN_f1(X_test_cube, Y_test_cube, model)

    return train_f1_scores, test_f1_scores

def RNN_xsubject_joint_data(feature_matrix, target_labels, sub_labels, block_labels, model_dict, n_splits = 4,\
                       verbose = 0, epochs = 40, batch_size = 2, validation_split = 0.1, permute = False):
    
    #subjects in list. there are the units over which we will do train/test split
    subs = np.unique(sub_labels)
    #permute class labels, if indicated
    if permute:
        #permute labels within each subject
        class_perm = np.empty((0,))
        for s in subs:
            sub_idxs = np.where(sub_labels==s)[0]
            blocks_sub = block_labels[sub_idxs]
            class_sub = target_labels[sub_idxs]

            class_sub_perm, dummy = permute_class_blocks(blocks_sub, class_sub)
            class_perm = np.hstack((class_perm, class_sub_perm))
        target_labels = class_perm.copy()

    #initialize object for k-fold cross-validation
    kf = KFold(n_splits=n_splits,shuffle = True)
    #initialize empty array
    train_f1_scores = np.empty((n_splits,))
    test_f1_scores = np.empty((n_splits,))
    train_history = dict()
    train_history['loss'] = np.empty((0,0))
    train_history['val_loss'] = np.empty((0,0))

    for split_count, (subs_train_idxs, subs_test_idxs) in enumerate(kf.split(subs)):
        print('Split Count: %i'% (split_count+1))


        #get train and test indices
        train_subs = subs[subs_train_idxs]
        test_subs = subs[subs_test_idxs]
        train_idxs = np.where(np.isin(sub_labels,train_subs, invert = False))[0]
        test_idxs = np.where(np.isin(sub_labels,test_subs, invert = False))[0]

        # select training data and pad to get an array where each sample has same number of timesteps
        X_train = feature_matrix[:,train_idxs]
        y_train = target_labels[train_idxs]
        #one-hot encoding of class labels
        y_train = to_categorical(y_train-np.min(y_train))
        #get block labels of given samples
        win_blocks_train = block_labels[train_idxs]

        #get cube
        X_train_cube, Y_train_cube, scaler = get_data_cube(X_train, y_train,win_blocks_train, train = True, magic_value = -100)
        print(X_train_cube.shape, Y_train_cube.shape)

        # select test data and pad to get an array where each sample has same number of timesteps
        X_test = feature_matrix[:,test_idxs]
        y_test = target_labels[test_idxs]
        #one-hot encoding of class labels
        y_test = to_categorical(y_test-np.min(y_test))
        #get block labels of given samples
        win_blocks_test = block_labels[test_idxs]
        #get data cube
        X_test_cube, Y_test_cube, scaler = get_data_cube(X_test, y_test, win_blocks_test, train = False, scaler = scaler, magic_value = -100)
        print(X_test_cube.shape, Y_test_cube.shape)
        n_timesteps, n_features, n_outputs = X_train_cube.shape[1], X_train_cube.shape[2], Y_test_cube.shape[2]

        # Define model architecture
        # setting timestep dimension to None 
        model = get_rnn_model((None,n_features), n_outputs, n_grus = model_dict['n_grus'], n_dense_pre = model_dict['n_dense_pre'],\
                            n_dense_post =  model_dict['n_dense_post'], activation = model_dict['activation'],\
                            mask_value = -100)
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        print('Training Model')
        # fit network
        history = model.fit(X_train_cube, Y_train_cube, validation_split = validation_split, epochs=epochs, batch_size=batch_size, verbose=verbose)

        print('Evaluating Model')
        #evaluate model on train and test data
        train_f1_scores[split_count] = get_RNN_f1(X_train_cube, Y_train_cube, model)
        test_f1_scores[split_count] = get_RNN_f1(X_test_cube, Y_test_cube, model)

        #append history
        train_history['loss'] = np.vstack((train_history['loss'],history.history['loss'])) if train_history['loss'].size else np.array(history.history['loss'])
        train_history['val_loss'] = np.vstack((train_history['val_loss'],history.history['val_loss'])) if train_history['val_loss'].size else np.array(history.history['val_loss'])

    return train_f1_scores, test_f1_scores, train_history

def get_data_cube(X, Y, window_blocks, train = True, scaler = None, magic_value = -100):
    """
    Create data cube for use with Keras RNN. Standardize data then pad and reshape data to have
    [samples, timesteps, features] dimensions with an equal number of timesteps for each slice
    I use a Masking layer in the RNN architecture to allow for sequences of different length
    
    Args:
        X: 2D nuumpy array with data, dimensions [features, samples]
        window_blocks: 1D numpy array indicating block of provenance for input segment values
        train: Boolean indicating whether the input data is training data
        scaler: StandardScaler to transform data
        magic_value: integer indicating value with which to pad samples
            
    Returns:
        X_cube: 3D numpy array of size [samples, timesteps, features]
        scaler: 1D numpy array with class label for each segment
    """
    #standardize across each feature dimension
    if train:
        scaler = StandardScaler()
        scaler = scaler.fit(X.T)
        X = scaler.transform(X.T).T
    else:
        #for testing data, we want to use same transform as was fit to training data
        X = scaler.transform(X.T).T

    # common number of time steps
    common_timesteps = np.max(np.bincount(window_blocks.astype('int')))
    
    # get each block, pad, and stack to form a data cube
    X_cube = []
    Y_cube = []
    sample_blocks = []
    for b_count, b_idx in enumerate(np.unique(window_blocks)):
        sample_blocks.append(b_idx)
       #slice
        X_slice = X[:,np.where(window_blocks==b_idx)[0]]
        Y_slice = Y[np.where(window_blocks==b_idx)[0],:].T
        #pad - can just use keras padding function
        pad_size = common_timesteps-X_slice.shape[1]
        X_slice_pad = np.pad(X_slice,pad_width=((0,0),(0,pad_size)), mode='constant', constant_values= magic_value)
        Y_slice_pad = np.pad(Y_slice,pad_width=((0,0),(0,pad_size)), mode='constant', constant_values= 0)
        #stack
        if b_count == 0:
            X_cube  = X_slice_pad
            Y_cube = Y_slice_pad
        else:
            X_cube = np.dstack((X_cube,X_slice_pad))
            Y_cube = np.dstack((Y_cube,Y_slice_pad))
    # swap dimension to get [samples, timesteps, features]
    X_cube = np.swapaxes(X_cube,0,2)
    Y_cube = np.swapaxes(Y_cube,0,2)
    
    return X_cube, Y_cube, np.array(sample_blocks), scaler


In [None]:
#define hyper params for each model
model_dict = {1:{'n_grus':24, 'n_dense_pre':1, 'activation':'linear'},\
              2:{'n_grus':24, 'n_dense_pre':1, 'activation':'tanh'},\
              3:{'n_grus':24, 'n_dense_pre':1, 'activation':'relu'},\
              4:{'n_grus':24, 'n_dense_pre':2, 'activation':'tanh'},\
              5:{'n_grus':24, 'n_dense_pre':2, 'activation':'relu'},\
              6:{'n_grus':24, 'n_dense_pre':3, 'activation':'tanh'},\
              7:{'n_grus':24, 'n_dense_pre':3, 'activation':'relu'},\
}


In [None]:
#define where the data files are located
data_folder = '/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/'

nsubjects = 36

#randomly-selected subjects to use as hold-out test data 
test_subjects = [17, 23,  7,  8,  3]

# User-defined parameters
lo_freq = 20 #lower bound of bandpass filter
hi_freq = 450 #upper bound of bandpass filter

win_size = 100 #define window size over which to compute time-domain features
step = win_size #keeping this parameter in case we want to re-run later with some overlap

#excluded labels
exclude = [0,7]

In [None]:
#intialize empty lists
feature_matrix_all = np.empty((0,0))
target_labels_all = np.empty((0,))
window_tstamps_all = np.empty((0,))
block_labels_all  = np.empty((0,))
series_labels_all  = np.empty((0,))
subject_id_all = np.empty((0,))
block_count = 0

for subject_id in range(1,nsubjects+1):
    if subject_id not in test_subjects:
        subject_folder = os.path.join(data_folder,'%02d'%(subject_id))
        print('=======================')
        print(subject_folder)

        # Process data and get features 
        #get features across segments and corresponding info
        feature_matrix, target_labels, window_tstamps, \
        block_labels, series_labels = get_subject_data_for_classification(subject_folder, lo_freq, hi_freq, \
                                                                        win_size, step)

        #prevent repeat of block labels by increasing block count
        block_labels = block_labels+block_count
        block_count = np.max([block_count, np.max(block_labels)])
        #exclude indicated samples
        in_samples = np.where(np.isin(target_labels,exclude, invert = True))[0]
        feature_matrix_in = feature_matrix[in_samples,:]
        target_labels_in = target_labels[in_samples]
        window_tstamps_in = window_tstamps[in_samples]
        block_labels_in = block_labels[in_samples]
        series_labels_in = series_labels[in_samples]
        # concatenate lists
        feature_matrix_all = np.vstack((feature_matrix_all,feature_matrix_in)) if feature_matrix_all.size else feature_matrix_in
        target_labels_all = np.hstack((target_labels_all,target_labels_in))
        window_tstamps_all = np.hstack((window_tstamps_all,window_tstamps_in))
        block_labels_all = np.hstack((block_labels_all,block_labels_in))
        series_labels_all = np.hstack((series_labels_all,series_labels_in))
        subject_id_all = np.hstack((subject_id_all,np.ones((block_labels_in.size))*subject_id))
        

/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/01
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/02
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/04
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/05
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/06
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/09
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/10
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/11
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/12
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/13
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/14
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/15
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/16
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/18
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/19
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/20
/content/drive/MyDrive/limb-position-EMG-Repo/EMG_data/21
/content/drive

In [None]:
def permute_class_within_sub(target_labels, block_labels, sub_labels):
    subs = np.unique(sub_labels)
    #permute labels within each subject
    class_perm = np.empty((0,))
    for s in subs:
        sub_idxs = np.where(sub_labels==s)[0]
        blocks_sub = block_labels[sub_idxs]
        class_sub = target_labels[sub_idxs]

        class_sub_perm, dummy = permute_class_blocks(blocks_sub, class_sub)
        class_perm = np.hstack((class_perm, class_sub_perm))
    target_labels = class_perm.copy()
    return target_labels
    
def get_transform_module(model,input_layer):
    model1 = keras.models.clone_model(model)
    X = input_layer
    for layer in model1.layers[1:]:
        if isinstance(layer, GRU):
            break
        X = layer(X)
    transform_module = Model(inputs = input_layer, outputs = X)

    return transform_module

def tm_template_weights_to_model(transform_module, model):
    for lidx, layer in enumerate(transform_module.layers):
        tm_weights = transform_module.get_layer(index = lidx).get_weights()
        model.get_layer(index = lidx).set_weights(tm_weights)
    return model

def model_weights_to_tm_template(transform_module, model):  
    transform_module1 = keras.models.clone_model(transform_module)
    for lidx, layer in enumerate(transform_module1.layers):
        m_weights = model.get_layer(index = lidx).get_weights()
        transform_module1.get_layer(index = lidx).set_weights(m_weights)
    return transform_module1

def RNN_xsubject_transform_module(feature_matrix, target_labels, sub_labels, block_labels, model_dict, nreps = 10, n_train_splits = 4,\
                                  n_val_splits = 2,verbose = 0, epochs = 40, batch_size = 2, permute = False):

    results_df = []

    # Getting shape info
    n_features, n_samples = feature_matrix.shape
    n_outputs = np.unique(target_labels).size

    #subjects in list. there are the units over which we will do train/test split
    subs = np.unique(sub_labels)
    #permute class labels, if indicated
    if permute:
        #permute labels within each subject
        class_perm = np.empty((0,))
        for s in subs:
            sub_idxs = np.where(sub_labels==s)[0]
            blocks_sub = block_labels[sub_idxs]
            class_sub = target_labels[sub_idxs]

            class_sub_perm, dummy = permute_class_blocks(blocks_sub, class_sub)
            class_perm = np.hstack((class_perm, class_sub_perm))
        target_labels = class_perm.copy()

    #initialize object for k-fold cross-validation
    kf = KFold(n_splits=n_train_splits,shuffle = True)


    for split_count, (subs_train_idxs, subs_test_idxs) in enumerate(kf.split(subs)):
        print('-------Split Count: %i-------'% (split_count+1))
        #get train and test indices
        train_subs = subs[subs_train_idxs]
        test_subs = subs[subs_test_idxs]
        train_idxs = np.where(np.isin(sub_labels,train_subs, invert = False))[0]
        test_idxs = np.where(np.isin(sub_labels,test_subs, invert = False))[0]

        #get train and test indices
        train_subs = subs[subs_train_idxs]
        test_subs = subs[subs_test_idxs]
        train_idxs = np.where(np.isin(sub_labels,train_subs, invert = False))[0]
        test_idxs = np.where(np.isin(sub_labels,test_subs, invert = False))[0]

        # select training data and pad to get an array where each sample has same number of timesteps
        X_train = feature_matrix[:,train_idxs]
        y_train = target_labels[train_idxs]
        #one-hot encoding of class labels
        y_train = to_categorical(y_train-np.min(y_train))
        #get block and subject labels of given samples
        win_blocks_train = block_labels[train_idxs]
        win_sub_train = sub_labels[train_idxs]

        #get cube
        X_train_cube, Y_train_cube, train_blocks,scaler = get_data_cube(X_train, y_train,win_blocks_train, train = True, magic_value = -100)
        print(X_train_cube.shape, Y_train_cube.shape)

        # select test data and pad to get an array where each sample has same number of timesteps
        X_test = feature_matrix[:,test_idxs]
        y_test = target_labels[test_idxs]
        #one-hot encoding of class labels
        y_test = to_categorical(y_test-np.min(y_test))
        #get block and subject labels of given samples
        win_blocks_test = block_labels[test_idxs]
        win_sub_test = sub_labels[test_idxs]
        #get data cube
        X_test_cube, Y_test_cube, test_blocks, scaler = get_data_cube(X_test, y_test, win_blocks_test, train = False, scaler = scaler, magic_value = -100)
        print(X_test_cube.shape, Y_test_cube.shape)
        n_timesteps, n_features, n_outputs = X_train_cube.shape[1], X_train_cube.shape[2], Y_test_cube.shape[2]


        for rep in range(nreps):
            print('***** Rep: %i *****'% (rep+1))
            # permute order in which subjects' data is used for training
            train_subs_perm = np.random.permutation(train_subs)
            #initialize empty list
            train_f1_scores = np.empty((train_subs.size))

            # --- Training Stage ---
            # Define model architecture
            # setting timestep dimension to None 
            model = get_rnn_model((None,n_features), n_outputs, n_grus = model_dict['n_grus'], n_dense_pre = model_dict['n_dense_pre'],\
                                activation = model_dict['activation'],\
                                mask_value = -100)
            model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

            # Get transform module template
            transform_module_template = get_transform_module(model, Input(shape = (None,n_features)))

            # iterate thorugh subjects' data
            for sub_idx, train_sub in enumerate(train_subs_perm):
                print('Training: Subject %02d out of %02d'%(sub_idx+1, train_subs.size))

                # get subject-specific samples
                train_sub_idxs = np.where(sub_labels == train_sub)[0]
                train_sub_blocks = np.unique(block_labels[train_sub_idxs])
                train_sub_segment_idxs = np.where(np.isin(train_blocks,train_sub_blocks))[0]
                X_cube_sub = X_train_cube[train_sub_segment_idxs,:,:]
                Y_cube_sub = Y_train_cube[train_sub_segment_idxs,:,:]

                # initialize weights of the transform module
                model = tm_template_weights_to_model(transform_module_template, model)

                print('Training Model')
                # fit network
                history = model.fit(X_cube_sub, Y_cube_sub, epochs=epochs, batch_size=batch_size, verbose=verbose)

                #copy weights to a transfer module template, save if wanted
                trained_transfer_module = model_weights_to_tm_template(transform_module_template, model)
                # evaluate on training data
                train_f1_scores[sub_idx] = get_RNN_f1(X_cube_sub, Y_cube_sub, model)
            #put results in dataframe
            results_df.append(pd.DataFrame({'F1_score':train_f1_scores,\
                                            'Subject':train_subs_perm+1,\
                                            'Rep':[rep+1 for x in range(train_f1_scores.size)],\
                                            'Fold':[split_count+1 for x in range(train_f1_scores.size)],\
                                            'Type':['Train' for x in range(train_f1_scores.size)],\
                                            }))
            
            
            # --- Validation Stage ---
            #freeze top layers
            pass_flag = False
            for layer in model.layers:
                if isinstance(layer, GRU):
                    pass_flag = True
                if pass_flag:
                    layer.trainable = False

            #initialize mepty lists


            # iterate through test subjects
            for sub_idx, test_sub in enumerate(test_subs):
                print('Validation: Subject %02d out of %02d'%(sub_idx+1, test_subs.size))

                #get relevant subject samples
                test_sub_idxs = np.where(sub_labels == test_sub)[0]
                test_sub_blocks = np.unique(block_labels[test_sub_idxs])
                test_sub_segment_idxs = np.where(np.isin(test_blocks,test_sub_blocks))[0]

                X_cube_sub = X_test_cube[test_sub_segment_idxs,:,:]
                Y_cube_sub = Y_test_cube[test_sub_segment_idxs,:,:]
                test_sub_labels = np.argmax(Y_cube_sub,2)[:,0]

                #stratify split to retain ratio of class labels
                skf = StratifiedKFold(n_splits=n_val_splits,shuffle = True)

                val_train_f1 = np.empty((n_val_splits,))
                val_test_f1 = np.empty((n_val_splits,))

                #systematically use one fold of the data as a held-out test set
                for split_count_val, (blocks_test_idxs, blocks_train_idxs) in enumerate(skf.split(test_sub_blocks, test_sub_labels)):
                    
                    #split data cubes into train and test subsets
                    X_train_cube_sub = X_cube_sub[blocks_train_idxs,:,:]
                    Y_train_cube_sub = Y_cube_sub[blocks_train_idxs,:,:]

                    X_test_cube_sub = X_cube_sub[blocks_test_idxs,:,:]
                    Y_test_cube_sub = Y_cube_sub[blocks_test_idxs,:,:]

                    #initialize transform module
                    model = tm_template_weights_to_model(transform_module_template, model)
                    #train
                    model.fit(X_train_cube_sub, Y_train_cube_sub, epochs=epochs, batch_size=batch_size, verbose=verbose)

                    #copy weights to a transfer module template, save if wanted
                    trained_transfer_module = model_weights_to_tm_template(transform_module_template, model)

                    #evaluate on training and testing
                    val_train_f1[split_count_val] = get_RNN_f1(X_train_cube_sub, Y_train_cube_sub, model)
                    val_test_f1[split_count_val] = get_RNN_f1(X_test_cube_sub, Y_test_cube_sub, model)

                #put results in dataframe
                results_df.append(pd.DataFrame({'F1_score':val_train_f1,\
                                                'Subject':[test_sub+1 for x in range(val_train_f1.size)],\
                                                'Rep':[rep+1 for x in range(val_train_f1.size)],\
                                                'Fold':[split_count+1 for x in range(val_train_f1.size)],\
                                                'Type':['Val_Train' for x in range(val_train_f1.size)],\
                                                }))
                
                results_df.append(pd.DataFrame({'F1_score':val_test_f1,\
                                                'Subject':[test_sub+1 for x in range(val_test_f1.size)],\
                                                'Rep':[rep+1 for x in range(val_test_f1.size)],\
                                                'Fold':[split_count+1 for x in range(val_test_f1.size)],\
                                                'Type':['Val_Test' for x in range(val_test_f1.size)],\
                                                }))
                
    results_df = pd.concat(results_df,axis = 0)

    return results_df

In [None]:
def plot_train_loss(train_history, fig_title,fig_fn):
    #make df

    fig,ax = plt.subplots(1,1,figsize=(11,5))
    ax.plot(train_history.history['loss'])
    if 'val_loss' in train_history.history.keys():
        ax.plot(train_history.history['val_loss'])
    #label axes
    ax.set_ylabel('Loss')
    ax.set_xlabel('Epoch')
    ax.legend(['train', 'validation'], bbox_to_anchor=(1, 1), loc='upper left')
    #despine
    sns.despine(fig= plt.gcf(), left = False, right = True, top = True, bottom = True)
    fig.tight_layout()
    fig.suptitle(fig_title, y = 1.05)
    fig.savefig(fig_fn,dpi = 300)
    plt.close()

def RNN_xsubject_transform_module_model(feature_matrix, target_labels, sub_labels, block_labels, model_dict, train_idxs, test_idxs,\
                                    figure_folder, model_folder,\
                                    verbose = 0, epochs = 40, batch_size = 2, permute = False):

    if permute:
        target_labels = permute_class_within_sub(target_labels, block_labels, sub_labels)

    # select training data and pad to get an array where each sample has same number of timesteps
    X_train = feature_matrix[:,train_idxs]
    y_train = target_labels[train_idxs]
    #one-hot encoding of class labels
    y_train = to_categorical(y_train-np.min(y_train))
    #get block and subject labels of given samples
    win_blocks_train = block_labels[train_idxs]
    win_sub_train = sub_labels[train_idxs]

    #get cube
    X_train_cube, Y_train_cube, train_blocks,scaler = get_data_cube(X_train, y_train,win_blocks_train, train = True, magic_value = -100)
    print(X_train_cube.shape, Y_train_cube.shape)
    n_timesteps, n_features, n_outputs = X_train_cube.shape[1], X_train_cube.shape[2], Y_train_cube.shape[2]

    # select test data and pad to get an array where each sample has same number of timesteps
    X_test = feature_matrix[:,test_idxs]
    y_test = target_labels[test_idxs]
    #one-hot encoding of class labels
    y_test = to_categorical(y_test-np.min(y_test))
    #get block and subject labels of given samples
    win_blocks_test = block_labels[test_idxs]
    win_sub_test = sub_labels[test_idxs]
    #get data cube
    X_test_cube, Y_test_cube, test_blocks, scaler = get_data_cube(X_test, y_test, win_blocks_test, train = False, scaler = scaler, magic_value = -100)
    print(X_test_cube.shape, Y_test_cube.shape)



    # --- Training Stage ---
    # Define model architecture
    # setting timestep dimension to None 
    model = get_rnn_model((None,n_features), n_outputs, n_grus = model_dict['n_grus'], n_dense_pre = model_dict['n_dense_pre'],\
                        activation = model_dict['activation'],\
                        mask_value = -100)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Get transform module template
    transform_module_template = get_transform_module(model, Input(shape = (None,n_features)))

    # iterate thorugh subjects' data
    train_subs = np.unique(sub_labels)
    train_subs = np.random.permutation(train_subs)
    train_f1_scores = np.empty((train_subs.size,))
    test_f1_scores = np.empty((train_subs.size,))

    for sub_idx, train_sub in enumerate(train_subs):
        print('Training: Subject %02d out of %02d'%(sub_idx+1, train_subs.size))

        # get subject-specific samples
        train_sub_idxs = np.where(sub_labels == train_sub)[0]
        train_sub_blocks = np.unique(block_labels[train_sub_idxs])
        train_sub_segment_idxs = np.where(np.isin(train_blocks,train_sub_blocks))[0]
        X_cube_sub = X_train_cube[train_sub_segment_idxs,:,:]
        Y_cube_sub = Y_train_cube[train_sub_segment_idxs,:,:]

        # initialize weights of the transform module
        model = tm_template_weights_to_model(transform_module_template, model)

        print('Training Model')
        # fit network
        history = model.fit(X_cube_sub, Y_cube_sub, epochs=epochs, batch_size=batch_size, verbose=verbose)

        #plot training loss
        fig_title = 'Subject %02d'%(train_sub)
        fig_fn = os.path.join(figure_folder,'rnn_model_subject_%02d_all_train_data_permuted_%s_loss.png'%(train_sub,str(permute)))
        plot_train_loss(history, fig_title, fig_fn)

        #copy weights to a transfer module template, save if wanted
        trained_transfer_module = model_weights_to_tm_template(transform_module_template, model)
        #save trained transfer module to file
        model_fn = os.path.join(model_folder, 'transform_module_subject_%02d_all_train_data_permuted_%s.h5'%(train_sub, str(permute)))
        keras.models.save_model(trained_transfer_module, model_fn, save_format= 'h5')
        # evaluate on training data
        train_f1_scores[sub_idx] = get_RNN_f1(X_cube_sub, Y_cube_sub, model)

        # get subject-specific samples
        test_sub_segment_idxs = np.where(np.isin(test_blocks,train_sub_blocks))[0]
        X_cube_sub = X_test_cube[test_sub_segment_idxs,:,:]
        Y_cube_sub = Y_test_cube[test_sub_segment_idxs,:,:]
        test_f1_scores[sub_idx] = get_RNN_f1(X_cube_sub, Y_cube_sub, model)
    #save complete model to file
    model_fn = os.path.join(model_folder, 'trained_model_all_train_data_permuted_%s.h5'%(str(permute)))
    keras.models.save_model(model, model_fn, save_format= 'h5')

    return train_f1_scores, test_f1_scores, train_sub, scaler


In [None]:
results_folder = '/content/drive/MyDrive/limb-position-EMG-Repo/results_data/xsubject_transform_module/RNN/'
model_dir = '/content/drive/MyDrive/limb-position-EMG-Repo/model_data/xsubject_transform_module/RNN/'
figure_dir = '/content/drive/MyDrive/limb-position-EMG-Repo/figures/training_history/xsubject_transform_module/RNN/'



model_id = 1
nreps = 10


#RNN training args 
verbose = 0
epochs = 40
batch_size = 2

#use first series to train (let it be input)
train_idxs = np.where(series_labels_all==0)[0]
test_idxs = np.where(series_labels_all==1)[0]

np.random.seed(1)#set seed for replicability
results_df = []# initialize empty array
for rep in range(nreps):

    figure_folder = os.path.join(figure_dir,'rep_%i'%(rep))
    if not os.path.isdir(figure_folder):
        os.makedirs(figure_folder)
    model_folder = os.path.join(model_dir,'rep_%i'%(rep))
    if not os.path.isdir(model_folder):
        os.makedirs(model_folder)

    print('Rep %02d'%(rep))
    train_f1_scores, test_f1_scores, sub_order, scaler = RNN_xsubject_transform_module_model(feature_matrix_all.T, target_labels_all, subject_id_all, block_labels_all,\
                                        model_dict[model_id], train_idxs, test_idxs, figure_folder, model_folder,\
                                        verbose = verbose, epochs = epochs, batch_size = batch_size, permute = False)

    #put results in dataframe
    results_df.append(pd.DataFrame({'F1_score':train_f1_scores,\
                                    'Subject': sub_order,\
                                    'Rep':[rep+1 for x in range(train_f1_scores.size)],\
                                    'Type':['Train' for x in range(train_f1_scores.size)],\
                                    'Shuffled':[False for x in range(train_f1_scores.size)]\
                                    }))

    results_df.append(pd.DataFrame({'F1_score':test_f1_scores,\
                                    'Subject': sub_order,\
                                    'Rep':[rep+1 for x in range(test_f1_scores.size)],\
                                    'Type':['Train_val' for x in range(test_f1_scores.size)],\
                                    'Shuffled':[False for x in range(test_f1_scores.size)]\
                                    }))
    
    print('Permuted Data')
    train_f1_scores, test_f1_scores, sub_order, scaler = RNN_xsubject_transform_module_model(feature_matrix_all.T, target_labels_all, subject_id_all, block_labels_all,\
                                        model_dict[model_id], train_idxs, test_idxs, figure_folder, model_folder,\
                                        verbose = verbose, epochs = epochs, batch_size = batch_size, permute = True)


    #put results in dataframe
    results_df.append(pd.DataFrame({'F1_score':train_f1_scores,\
                                    'Subject': sub_order,\
                                    'Rep':[rep+1 for x in range(train_f1_scores.size)],\
                                    'Type':['Train' for x in range(train_f1_scores.size)],\
                                    'Shuffled':[True for x in range(train_f1_scores.size)]
                                    }))

    results_df.append(pd.DataFrame({'F1_score':test_f1_scores,\
                                    'Subject': sub_order,\
                                    'Rep':[rep+1 for x in range(test_f1_scores.size)],\
                                    'Type':['Train_val' for x in range(test_f1_scores.size)],\
                                    'Shuffled':[True for x in range(test_f1_scores.size)]\
                                    }))
    
#concatenate
results_df = pd.concat(results_df,axis = 0)

#save results to file
results_fn = 'model_all_training_data_results.h5'
results_df.to_hdf(os.path.join(results_folder,results_fn), key='results_df', mode='w')

#save scaler
scaler_fn = 'scaler_all_training_data.pkl'
with open(os.path.join(model_dir,scaler_fn), "wb") as output_file:
    pickle.dump(scaler, output_file)

Rep 00
(372, 45, 16) (372, 45, 6)
(372, 40, 16) (372, 40, 6)
Training: Subject 01 out of 31
Training Model
Training: Subject 02 out of 31
Training Model
Training: Subject 03 out of 31
Training Model
Training: Subject 04 out of 31
Training Model
Training: Subject 05 out of 31
Training Model
Training: Subject 06 out of 31
Training Model
Training: Subject 07 out of 31
Training Model
Training: Subject 08 out of 31
Training Model
Training: Subject 09 out of 31
Training Model
Training: Subject 10 out of 31
Training Model
Training: Subject 11 out of 31
Training Model
Training: Subject 12 out of 31
Training Model
Training: Subject 13 out of 31
Training Model
Training: Subject 14 out of 31
Training Model
Training: Subject 15 out of 31
Training Model
Training: Subject 16 out of 31
Training Model
Training: Subject 17 out of 31
Training Model
Training: Subject 18 out of 31
Training Model
Training: Subject 19 out of 31
Training Model
Training: Subject 20 out of 31
Training Model
Training: Subject 2

In [None]:
results_df.head()

Unnamed: 0,F1_score,Subject,Rep,Type,Shuffled
0,1.0,9.0,1,Train,False
1,1.0,9.0,1,Train,False
2,1.0,9.0,1,Train,False
3,0.980364,9.0,1,Train,False
4,0.994178,9.0,1,Train,False


In [None]:
results_df.groupby(['Type','Shuffled']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,F1_score,Subject,Rep
Type,Shuffled,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Train,False,0.962778,20.0,5.5
Train,True,0.371298,27.6,5.5
