In [36]:
import pickle
X_hr,y_stress,y_participant,X_time = pickle.load(open('../data/tabular_data_60_seconds_ppg_rr.p','rb'))

In [37]:
X_hr = 60/X_hr

In [38]:
from typing import List
import numpy as np
import scipy.signal as signal
import datetime
from scipy.stats import iqr
import numpy as np
from scipy.stats import iqr
from scipy import interpolate, signal
import matplotlib.pyplot as plt
from matplotlib import style
import matplotlib.patches as mpatches
from collections import OrderedDict

def frequencyDomain(tmStamps,RRints, lf_bw = 0.11, hf_bw = 0.1):
    
    #Remove ectopic beats
    #RR intervals differing by more than 20% from the one proceeding it are removed
    NNs = []
    tss = []
    for c, rr in enumerate(RRints):        
        if abs(rr - RRints[c-1]) <= 0.20 * RRints[c-1]:
            NNs.append(rr)
            tss.append(tmStamps[c])
            
            
    frequency_range = np.linspace(0.001, 1, 10000)
    NNs = np.array(NNs)
    NNs = NNs - np.mean(NNs)
    result = signal.lombscargle(tss, NNs, frequency_range)
        
    #Pwelch w/ zero pad     
    fxx = frequency_range 
    pxx = result 
    
    vlf= (0.003, 0.04)
    lf = (0.04, 0.15)
    hf = (0.15, 0.4)
    
    plot_labels = ['VLF', 'LF', 'HF']
        
    df = fxx[1] - fxx[0]
    vlf_power = np.trapz(pxx[np.logical_and(fxx >= vlf[0], fxx < vlf[1])], dx = df)      
    lf_power = np.trapz(pxx[np.logical_and(fxx >= lf[0], fxx < lf[1])], dx = df)            
    hf_power = np.trapz(pxx[np.logical_and(fxx >= hf[0], fxx < hf[1])], dx = df)             
    totalPower = vlf_power + lf_power + hf_power
    
    #Normalize and take log
    vlf_NU_log = np.log((vlf_power / (totalPower - vlf_power)) + 1)
    lf_NU_log = np.log((lf_power / (totalPower - vlf_power)) + 1)
    hf_NU_log = np.log((hf_power / (totalPower - vlf_power)) + 1)
    lfhfRation_log = np.log((lf_power / hf_power) + 1)   
    
    freqDomainFeats = {'VLF_Power': vlf_NU_log, 'LF_Power': lf_NU_log,
                       'HF_Power': hf_NU_log, 'LF/HF': lfhfRation_log}
                       
    return freqDomainFeats



def ecg_feature_computation(a):
    b = np.arange(0,60,2)
    c =  [np.var(a),iqr(a),np.mean(a),np.median(a),np.percentile(a,80),np.percentile(a,20),np.median(a)]+list(frequencyDomain(b,a).values())
    return np.array(c)

from joblib import Parallel, delayed

X_feature = np.array(Parallel(n_jobs=30,verbose=1)(delayed(ecg_feature_computation)(a) for a in X_hr))

[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done 141 tasks      | elapsed:    1.5s
[Parallel(n_jobs=30)]: Done 1516 tasks      | elapsed:    2.7s
[Parallel(n_jobs=30)]: Done 7116 tasks      | elapsed:    6.7s
[Parallel(n_jobs=30)]: Done 14316 tasks      | elapsed:   11.7s
[Parallel(n_jobs=30)]: Done 23116 tasks      | elapsed:   17.8s
[Parallel(n_jobs=30)]: Done 33516 tasks      | elapsed:   25.2s
[Parallel(n_jobs=30)]: Done 42928 out of 42928 | elapsed:   31.9s finished


In [39]:
n_feature = 11
X_hr = X_feature.reshape(-1,n_feature,1)

In [40]:
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,LeaveOneGroupOut,LeavePGroupsOut
from sklearn.metrics import accuracy_score
from tensorflow import keras
from tensorflow.keras import backend as K 
import matplotlib.pyplot as plt
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
import numpy as np
%matplotlib notebook
from keras.layers import Conv1D,Reshape,BatchNormalization,TimeDistributed, \
Dropout,Input,MaxPooling1D,Flatten,Dense,Input, GaussianNoise,LSTM, Bidirectional, Input
from keras.models import Model, Sequential
from sklearn.preprocessing import OneHotEncoder, LabelBinarizer,LabelEncoder
from sklearn.metrics.pairwise import euclidean_distances

In [41]:
train_participant = y_participant.copy()

In [42]:
np.unique(y_participant),train_participant.shape

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), (42928,))

In [43]:
train_x = X_hr
train_y = y_participant

In [44]:
train_x, test_x, train_y, test_y,participant_ids_train, participant_ids_test = train_test_split(X_hr,
                                                            train_participant,
                                                            y_participant,
                                                            test_size = 0.2,
                                                            random_state=41,
                                                            stratify=y_participant)
train_x, val_x, train_y, val_y, participant_ids_train, participant_ids_val  = train_test_split(train_x,
                                                            train_y,
                                                            participant_ids_train,
                                                            test_size = 0.1,
                                                            random_state=41,
                                                            stratify=participant_ids_train)
train_x.shape,test_x.shape,val_x.shape,train_y.shape,test_y.shape,val_y.shape,participant_ids_train.shape

((30907, 11, 1),
 (8586, 11, 1),
 (3435, 11, 1),
 (30907,),
 (8586,),
 (3435,),
 (30907,))

In [55]:
def get_model(input_shape=(30,1),act='relu',loss="categorical_crossentropy",opt='adam',n_classes=350):
    model =  Sequential()
    model.add(Conv1D(100,10,input_shape=input_shape,activation='linear',kernel_initializer='normal',padding='same'))
    model.add(Conv1D(100,10,input_shape=input_shape,activation='relu',kernel_initializer='normal',padding='same'))
    model.add(MaxPooling1D(2))
    model.add(BatchNormalization())
    model.add(Dropout(.2))
    model.add(Conv1D(200,10,activation='relu',kernel_initializer='normal',padding='same'))
    model.add(MaxPooling1D(2))
    model.add(BatchNormalization())
    model.add(Dropout(.2))
    model.add(Conv1D(300,10,activation='relu',kernel_initializer='normal',padding='same'))
    model.add(MaxPooling1D(2))
    model.add(BatchNormalization())
    model.add(Conv1D(50,2,activation='relu',kernel_initializer='normal',padding='same'))
    model.add(MaxPooling1D(2))
    model.add(BatchNormalization())
    model.add(Dropout(.2))
    model.add(Flatten())
    model.add(Dense(n_classes//2,activation=None))
    model.add(Dense(n_classes,activation=None))
#     model.add(Dense(n_classes,activation='softmax',kernel_initializer='normal'))
    model.add(tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1)))
#     model.compile(loss=loss,optimizer=opt,metrics=['acc'])

    return model

def get_model_conv_dense(input_shape=(11,),act='relu',loss="categorical_crossentropy",opt='adam',n_classes=350):
    model =  Sequential()
    model.add(Input(shape=input_shape))
    model.add(Conv1D(100,2,activation='relu',kernel_initializer='normal',padding='same'))
    model.add(Conv1D(200,2,activation='relu',kernel_initializer='normal',padding='same'))
    model.add(Flatten())
    model.add(Dense(100,activation=act))
    model.add(BatchNormalization())
    model.add(Dense(200,activation=act))
    model.add(Dropout(.2))
#     model.add(Flatten())
    model.add(Dense(n_classes//2,activation=None))
    model.add(Dense(n_classes,activation=None))
    model.add(tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1)))
    return model


In [56]:
n_classes = 1000
model = get_model_conv_dense(input_shape=(11,1),n_classes=n_classes)

In [57]:
model.compile(optimizer='adam',loss=tfa.losses.TripletSemiHardLoss())

In [58]:
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_13 (Conv1D)           (None, 11, 100)           300       
_________________________________________________________________
conv1d_14 (Conv1D)           (None, 11, 200)           40200     
_________________________________________________________________
flatten_9 (Flatten)          (None, 2200)              0         
_________________________________________________________________
dense_24 (Dense)             (None, 100)               220100    
_________________________________________________________________
batch_normalization_5 (Batch (None, 100)               400       
_________________________________________________________________
dense_25 (Dense)             (None, 200)               20200     
_________________________________________________________________
dropout_3 (Dropout)          (None, 200)              

In [59]:
from keras.models import load_model
filepath = '../model_files/base_cnn_60_seconds_ppg_hrv_wesad.hdf5'

In [60]:
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=40)
callbacks_list = [es,checkpoint]
history = model.fit(train_x,train_y,validation_data=(val_x,val_y), epochs=400, batch_size=200,
          callbacks=callbacks_list,shuffle=True)

Epoch 1/400
Epoch 00001: val_loss improved from inf to 0.99626, saving model to ../model_files/base_cnn_60_seconds_ppg_hrv_wesad.hdf5
Epoch 2/400
Epoch 00002: val_loss improved from 0.99626 to 0.98798, saving model to ../model_files/base_cnn_60_seconds_ppg_hrv_wesad.hdf5
Epoch 3/400
Epoch 00003: val_loss did not improve from 0.98798
Epoch 4/400
Epoch 00004: val_loss did not improve from 0.98798
Epoch 5/400
Epoch 00005: val_loss did not improve from 0.98798
Epoch 6/400
Epoch 00006: val_loss did not improve from 0.98798
Epoch 7/400
Epoch 00007: val_loss did not improve from 0.98798
Epoch 8/400
Epoch 00008: val_loss improved from 0.98798 to 0.98624, saving model to ../model_files/base_cnn_60_seconds_ppg_hrv_wesad.hdf5
Epoch 9/400
Epoch 00009: val_loss did not improve from 0.98624
Epoch 10/400
Epoch 00010: val_loss did not improve from 0.98624
Epoch 11/400
Epoch 00011: val_loss did not improve from 0.98624
Epoch 12/400
Epoch 00012: val_loss did not improve from 0.98624
Epoch 13/400
Epoch 0

Epoch 37/400
Epoch 00037: val_loss improved from 0.98513 to 0.98486, saving model to ../model_files/base_cnn_60_seconds_ppg_hrv_wesad.hdf5
Epoch 38/400
Epoch 00038: val_loss did not improve from 0.98486
Epoch 39/400
Epoch 00039: val_loss did not improve from 0.98486
Epoch 40/400
Epoch 00040: val_loss did not improve from 0.98486
Epoch 41/400
Epoch 00041: val_loss did not improve from 0.98486
Epoch 42/400
Epoch 00042: val_loss did not improve from 0.98486
Epoch 43/400
Epoch 00043: val_loss did not improve from 0.98486
Epoch 44/400
Epoch 00044: val_loss did not improve from 0.98486
Epoch 45/400
Epoch 00045: val_loss did not improve from 0.98486
Epoch 46/400
Epoch 00046: val_loss did not improve from 0.98486
Epoch 47/400
Epoch 00047: val_loss improved from 0.98486 to 0.98430, saving model to ../model_files/base_cnn_60_seconds_ppg_hrv_wesad.hdf5
Epoch 48/400
Epoch 00048: val_loss did not improve from 0.98430
Epoch 49/400
Epoch 00049: val_loss did not improve from 0.98430
Epoch 50/400
Epoch

Epoch 00073: val_loss did not improve from 0.98394
Epoch 74/400
Epoch 00074: val_loss did not improve from 0.98394
Epoch 75/400
Epoch 00075: val_loss did not improve from 0.98394
Epoch 76/400
Epoch 00076: val_loss did not improve from 0.98394
Epoch 77/400
Epoch 00077: val_loss did not improve from 0.98394
Epoch 78/400
Epoch 00078: val_loss did not improve from 0.98394
Epoch 79/400
Epoch 00079: val_loss did not improve from 0.98394
Epoch 80/400
Epoch 00080: val_loss did not improve from 0.98394
Epoch 81/400
Epoch 00081: val_loss did not improve from 0.98394
Epoch 82/400
Epoch 00082: val_loss improved from 0.98394 to 0.98369, saving model to ../model_files/base_cnn_60_seconds_ppg_hrv_wesad.hdf5
Epoch 83/400
Epoch 00083: val_loss did not improve from 0.98369
Epoch 84/400
Epoch 00084: val_loss did not improve from 0.98369
Epoch 85/400
Epoch 00085: val_loss did not improve from 0.98369
Epoch 86/400
Epoch 00086: val_loss did not improve from 0.98369
Epoch 87/400
Epoch 00087: val_loss did not

Epoch 110/400
Epoch 00110: val_loss did not improve from 0.98355
Epoch 111/400
Epoch 00111: val_loss did not improve from 0.98355
Epoch 112/400
Epoch 00112: val_loss did not improve from 0.98355
Epoch 113/400
Epoch 00113: val_loss improved from 0.98355 to 0.98275, saving model to ../model_files/base_cnn_60_seconds_ppg_hrv_wesad.hdf5
Epoch 114/400
Epoch 00114: val_loss did not improve from 0.98275
Epoch 115/400
Epoch 00115: val_loss did not improve from 0.98275
Epoch 116/400
Epoch 00116: val_loss did not improve from 0.98275
Epoch 117/400
Epoch 00117: val_loss did not improve from 0.98275
Epoch 118/400
Epoch 00118: val_loss did not improve from 0.98275
Epoch 119/400
Epoch 00119: val_loss did not improve from 0.98275
Epoch 120/400
Epoch 00120: val_loss did not improve from 0.98275
Epoch 121/400
Epoch 00121: val_loss did not improve from 0.98275
Epoch 122/400
Epoch 00122: val_loss did not improve from 0.98275
Epoch 123/400
Epoch 00123: val_loss did not improve from 0.98275
Epoch 124/400
E

Epoch 147/400
Epoch 00147: val_loss did not improve from 0.98275
Epoch 148/400
Epoch 00148: val_loss did not improve from 0.98275
Epoch 149/400
Epoch 00149: val_loss did not improve from 0.98275
Epoch 150/400
Epoch 00150: val_loss did not improve from 0.98275
Epoch 151/400
Epoch 00151: val_loss did not improve from 0.98275
Epoch 152/400
Epoch 00152: val_loss did not improve from 0.98275
Epoch 153/400
Epoch 00153: val_loss did not improve from 0.98275
Epoch 00153: early stopping


In [61]:
model = load_model(filepath)
train_embeddings = model.predict(train_x)

In [27]:
def give_rank_accuracy(train_embeddings,train_y,rank = 10):
    distance_matrix = euclidean_distances(train_embeddings)
    distance_matrix_2d = np.zeros((distance_matrix.shape[0],
                               distance_matrix.shape[0],
                               2))
    distance_matrix_2d[:,:,0] = distance_matrix
    train_y_matrix = np.concatenate([train_y.reshape(1,-1)]*distance_matrix.shape[0])
    distance_matrix_2d[:,:,1] = train_y_matrix
    match = 0
    for i in range(distance_matrix.shape[0]):
        row = distance_matrix_2d[i]
        row = row[row[:,0].argsort(),:]
        if rank==1:
            if train_y[i]==row[1,1]:
                match+=1
        else:
            row = set(row[1:(rank+1)][:,1])
            if train_y[i] in row:
                match+=1
    return match/distance_matrix.shape[0]

In [28]:
rank_1 = give_rank_accuracy(train_embeddings,train_y,rank = 1)
rank_5 = give_rank_accuracy(train_embeddings,train_y,rank = 5)
rank_10 = give_rank_accuracy(train_embeddings,train_y,rank = 10)
print(rank_1,rank_5,rank_10)

0.597210987802116 0.8237292522729479 0.894975248325622


In [29]:
test_embeddings = model.predict(test_x)

In [30]:
rank_1 = give_rank_accuracy(test_embeddings,test_y,rank = 1)
rank_5 = give_rank_accuracy(test_embeddings,test_y,rank = 5)
rank_10 = give_rank_accuracy(test_embeddings,test_y,rank = 10)
print(rank_1,rank_5,rank_10)

0.4133473095737247 0.7111576985790822 0.8232005590496156


In [62]:
test_x,y_stress,test_y,X_time = pickle.load(open('../data/tabular_data_60_seconds_ppg_rr_dalia.p','rb'))
from joblib import Parallel, delayed
test_x = np.array(Parallel(n_jobs=30,verbose=1)(delayed(ecg_feature_computation)(a) for a in test_x))

[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done 141 tasks      | elapsed:    1.5s
[Parallel(n_jobs=30)]: Done 1044 tasks      | elapsed:    2.4s
[Parallel(n_jobs=30)]: Done 1439 out of 1439 | elapsed:    3.3s finished


In [63]:
test_x = test_x.reshape(-1,n_feature,1)

In [64]:
test_embeddings = model.predict(test_x)
rank_1 = give_rank_accuracy(test_embeddings,test_y,rank = 1)
rank_5 = give_rank_accuracy(test_embeddings,test_y,rank = 5)
rank_10 = give_rank_accuracy(test_embeddings,test_y,rank = 10)
print(rank_1,rank_5,rank_10)

0.07852675469075747 0.32175121612230717 0.5357887421820708
