In [1]:
from scipy.signal import spectrogram
from statistics import mean

#Create training data from braking event EMG via these steps:
#Get segments of braking event EMG.
#Covert to PSD.
#Store PSD components of each segment in variable for training.
def createDatasetFromEMGEvents(timestamps, data, samplingRate, numberOfPSDComponents = 4):
    dt = 1/samplingRate #Time increment in seconds
    
    dt1_index = 0
    dt2_index = int(100/1000/dt) #Covert timestamps to seconds and divde by time increment to get index of datapoint at 100 ms.
    baselineCorrection_emg = mean(data[dt1_index:dt2_index+1])
    
    #Define variables to split data in first 1/2 for training and second 1/2 for validation
    brakingEvent_emg_PSD_train = []
    brakingEvent_emg_PSD_val = []

    dt = 1/samplingRate #Time increment in seconds

    for time in timestamps: #Iterate through event timestamps in milliseconds
        #index = int(time/1000/dt) #Covert timestamps to seconds and divde by time increment to get index of datapoint
        dt1_index = int((time-300)/1000/dt) #Index of datapoint 300 ms before event datapoint.
        dt2_index = int((time+1200)/1000/dt) #Index of datapoint 1200 ms after event datapoint.

        brakingEvent_emg = data[dt1_index:dt2_index+1]-baselineCorrection_emg
        #Normalize signal data WRT to max and find generate power spectral density 
        freq_data, time_data, pwr_spectral_density_data = spectrogram(
                                                            np.array([brakingEvent_emg]),
                                                            samplingRate
                                                            )
        if time < len(data)*1000*dt/2:
            brakingEvent_emg_PSD_train.append(np.sort(np.sum(pwr_spectral_density_data[0],1))[-numberOfPSDComponents:None].tolist())
            continue
        brakingEvent_emg_PSD_val.append(np.sort(np.sum(pwr_spectral_density_data[0],1))[-numberOfPSDComponents:None].tolist())
    return brakingEvent_emg_PSD_train, brakingEvent_emg_PSD_val

#Create baseline training EMG data containing no braking event EMG via these steps:
#Get 100 ms EMG segment at beginning of data to use for baseline correction.
#Get segments of EMG without braking events and subract 100 ms EMG segment.
#Covert to PSD.
#Store PSD components of each segment in variable for training.

def createDatasetFromEMGWithoutEvents(timestamps, data, samplingRate, numberOfPSDComponents=4):
    dt = 1/samplingRate #Time increment in seconds
    
    dt1_index = 0
    dt2_index = int(100/1000/dt) #Covert timestamps to seconds and divde by time increment to get index of datapoint at 100 ms.
    baselineCorrection_emg = mean(data[dt1_index:dt2_index+1])

    noEvent_emg_PSD_train = []
    noEvent_emg_PSD_val = []

    for i in range(0, len(timestamps)): #Iterate through all event timestamps in milliseconds
        if timestamps[i][0] < 4500: #Skip iteration if there is not enough time to get an emg segment between time of first datapoint and time of first event. 
            continue

        if i > 0:
            if timestamps[i][0]-timestamps[i-1][0] < 7500: #Skip iteration if there is not enough time to get emg segment between current and previous timestamps.
                continue

        numberOfSegments = int((timestamps[i][0]-timestamps[i-1][0]-6000)/2000) #Calculate how many user-specified EMG segments can fit between two events.
        
        for segmentNum in range(0, numberOfSegments):
            #Add 500 ms between each EMG segment, except for segment closest in time to event
            dt1_index = int((timestamps[i][0]-5000-(2000*segmentNum)-500)/1000/dt) #500 represents 500 ms offset between each EEG segment.
            dt2_index = int((timestamps[i][0]-3000-(2000*segmentNum))/1000/dt)
            
            noEvent_emg = data[dt1_index:dt2_index+1]-baselineCorrection_emg #Get EEG segment immediately prior to current event.
           
            #Normalize signal data WRT to max and find generate power spectral density 
            freq_data, time_data, pwr_spectral_density_data = spectrogram( 
                                                                np.array([noEvent_emg]),
                                                                samplingRate
                                                                )
            
            if timestamps[i][0] < len(data)*1000*dt/2: #Check if timestamp is less half than total time of EMG data.
                noEvent_emg_PSD_train.append(np.sort(np.sum(pwr_spectral_density_data[0],1))[-numberOfPSDComponents:None].tolist())
                continue
            noEvent_emg_PSD_val.append(np.sort(np.sum(pwr_spectral_density_data[0],1))[-numberOfPSDComponents:None].tolist())
           
        if i == len(timestamps): #If iteration reaches last event timestamp, set indices to get any possible EMG segment beyond timestamp.
            numberOfSegments = int((len(data)*1000*dt/2-timestamps[i][0])/2000) #Calculate how many user-specified EMG segments can fit between two events.

            for segmentNum in range(0, numberOfSegments):
                #Add 500 ms between each EMG segment, except for segment closest in time to event
                dt1_index = int((timestamps[i][0]+3000+(2000*segmentNum))/1000/dt)
                dt2_index = int((timestamps[i][0]+5000+(2000*segmentNum)-500)/1000/dt) #500 represents 500 ms offset between each EEG segment.

                noEvent_emg = data[dt1_index:dt2_index+1]  #Get emg segment 
                #Normalize signal data WRT to max and find generate power spectral density 
                freq_data, time_data, pwr_spectral_density_data = spectrogram( 
                                                                    np.array([noEvent_emg]),#/max(noEvent_emg)]), 
                                                                    samplingRate
                                                                    )
                if timestamps[i][0] < len(data)*1000*dt/2:
                    noEvent_emg_PSD_train.append(np.sort(np.sum(pwr_spectral_density_data[0],1))[-numberOfPSDComponents:None].tolist())
                    continue
                noEvent_emg_PSD_val.append(np.sort(np.sum(pwr_spectral_density_data[0],1))[-numberOfPSDComponents:None].tolist())
        
    return noEvent_emg_PSD_train, noEvent_emg_PSD_val

#Create datasets, train model, and evaluate trained model.

def createDatasets(brakingEvent_emg_PSD_train, noEvent_emg_PSD_train, brakingEvent_emg_PSD_val, noEvent_emg_PSD_val):
    #Label = 0 indicates no event; label = 1 indicates EMG braking event
    trainData = np.concatenate((brakingEvent_emg_PSD_train, noEvent_emg_PSD_train))
    trainLabels_event = np.ones(len(brakingEvent_emg_PSD_train),dtype=int) 
    trainLabels_noEvent = np.zeros(len(noEvent_emg_PSD_train),dtype=int) 
    trainLabels = np.concatenate((trainLabels_event, trainLabels_noEvent))

    valData = np.concatenate((brakingEvent_emg_PSD_val, noEvent_emg_PSD_val))
    valLabels_event = np.ones(len(brakingEvent_emg_PSD_val),dtype=int) 
    valLabels_noEvent = np.zeros(len(noEvent_emg_PSD_val),dtype=int) 
    valLabels = np.concatenate((valLabels_event, valLabels_noEvent))
    
    return trainData, trainLabels, valData, valLabels

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import BatchNormalization

def trainModel(trainData, trainLabels, valData, valLabels):
    # number of input columns for autoencoder
    n_inputs = trainData.shape[1]
    # define encoder
    visible = Input(shape=(n_inputs,))
    # encoder level 1
    x = Dense(50)(visible)
    # output layer
    output = Dense(1, activation='sigmoid')(x)
    #output = tf.keras.backend.round(output)
    # define autoencoder model
    model = Model(inputs=visible, outputs=output)
    # compile autoencoder model
    model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=[tf.keras.metrics.AUC(), tf.keras.metrics.Accuracy()])
    # fit the autoencoder model to reconstruct input
    model.fit(trainData, trainLabels, epochs=50, batch_size=1, verbose=2, validation_split = 0.2)
    valResults = evaluateModel(model, valData, valLabels)
    return valResults

def evaluateModel(model, valData, valLabels):
    valResults = model.evaluate(valData, valLabels, batch_size=1)
    return valResults

In [None]:
import numpy as np
import h5py
from glob import glob
from tqdm import tqdm
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

#Find all paths for the test subject data.
testSubjectDataFilePaths =  glob('emergencyBrakingEMG/EMG_Dataset_Haufe/*.mat')
print(testSubjectDataFilePaths)
allTestSubjectAUCs = []

numberOfPSDComponents = 129 #Maximum number of components for samplingRate = 200 hz.

pbar1 = tqdm(testSubjectDataFilePaths)
pbar1.set_description('Processing test subject data')
for path in pbar1:
    f = h5py.File(path,'r')
    '''
    Read, sort and assign experimental to variables for: 
    signal channel names: cnt.clab
    sampling frequency: cnt.fs
    time-series data: cnt.x
    '''
    cnt = f.get('cnt')
    cnt.clab = np.array(cnt['clab'])
    cnt.fs = np.array(cnt['fs'])
    cnt.x = np.array(cnt['x']) 

    samplingRate = cnt.fs[0][0] #Down-/upsample rate for all data = 200Hz.
    
    #Read data for events corresponding to experimental data.
    mrk = f.get('mrk')
    mrk.classNames = mrk['className']
    mrk.time = mrk['time']
    mrk.y = mrk['y']
    mrk.events = mrk['event']
    #Find all car braking events (brake lights of lead vehicle turn on) and store corresponding timestamps.
    carBrakeTime = []
    for i in range(0, len(mrk.y)):
        if mrk.y[i][1] == 1: #Check if car is braking, i.e. y[i] = 1
            carBrakeTime.append(mrk.time[i][0]) #Store timestamp 
            
    #Create train and validation datasets
    brakingEvent_emg_PSD_train = []
    brakingEvent_emg_PSD_val = []

    noEvent_emg_PSD_train = []
    noEvent_emg_PSD_val = []
        
    data = cnt.x[61] #Channel 61 for EMG of tibialis anterior
    event_emg_PSD_train, event_emg_PSD_val = createDatasetFromEMGEvents(carBrakeTime, 
                                                                        data, 
                                                                        samplingRate, 
                                                                        numberOfPSDComponents)
    _noEvent_emg_PSD_train, _noEvent_emg_PSD_val = createDatasetFromEMGWithoutEvents(mrk.time, 
                                                                                     data, 
                                                                                     samplingRate, 
                                                                                     numberOfPSDComponents)

    for array in event_emg_PSD_train: brakingEvent_emg_PSD_train.append(array)
    for array in event_emg_PSD_val: brakingEvent_emg_PSD_val.append(array)
    for array in _noEvent_emg_PSD_train: noEvent_emg_PSD_train.append(array)
    for array in _noEvent_emg_PSD_val: noEvent_emg_PSD_val.append(array)
        
    trainData, trainLabels, valData, valLabels = createDatasets(brakingEvent_emg_PSD_train,
                                                                noEvent_emg_PSD_train,
                                                                brakingEvent_emg_PSD_val,
                                                                noEvent_emg_PSD_val)
  
    valResults = trainModel(trainData, trainLabels, valData, valLabels)
    # plot loss
    #plt.plot(history.history['loss'], label='train')
    #plt.plot(history.history['val_loss'], label='test')
    #plt.legend()
    #plt.show()
    
    AUCaccuracy = valResults[1]
    allTestSubjectAUCs.append(AUCaccuracy)
grandAverageAUC = mean(allTestSubjectAUCs)
print("MLP model AUC accuracy: ", round(grandAverageAUC,3))

['emergencyBrakingEMG/EMG_Dataset_Haufe/VPgac.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPih.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPgal.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPii.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPgaa.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPsaj.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPgam.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPae.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPbba.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPsal.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPgab.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPja.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPbax.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPdx.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPbad.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPgae.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPgah.mat', 'emergencyBrakingEMG/EMG_Dataset_Haufe/VPgag.mat']


Processing test subject data:   0%|          | 0/18 [00:00<?, ?it/s]

Epoch 1/50
1224/1224 - 3s - loss: 7.9262 - auc: 0.8463 - accuracy: 0.1225 - val_loss: 3.5230e-04 - val_auc: 0.0000e+00 - val_accuracy: 0.1013 - 3s/epoch - 3ms/step
Epoch 2/50
1224/1224 - 3s - loss: 5.9050 - auc: 0.8906 - accuracy: 0.1315 - val_loss: 0.3829 - val_auc: 0.0000e+00 - val_accuracy: 0.0359 - 3s/epoch - 2ms/step
Epoch 3/50
1224/1224 - 2s - loss: 6.3605 - auc: 0.9107 - accuracy: 0.1471 - val_loss: 1.1463 - val_auc: 0.0000e+00 - val_accuracy: 0.0490 - 2s/epoch - 2ms/step
Epoch 4/50
1224/1224 - 2s - loss: 5.6500 - auc: 0.9080 - accuracy: 0.1462 - val_loss: 1.5237 - val_auc: 0.0000e+00 - val_accuracy: 0.0229 - 2s/epoch - 2ms/step
Epoch 5/50
1224/1224 - 2s - loss: 3.4174 - auc: 0.9281 - accuracy: 0.1373 - val_loss: 0.6358 - val_auc: 0.0000e+00 - val_accuracy: 0.0359 - 2s/epoch - 2ms/step
Epoch 6/50
1224/1224 - 2s - loss: 3.3264 - auc: 0.9288 - accuracy: 0.1283 - val_loss: 11.4394 - val_auc: 0.0000e+00 - val_accuracy: 0.0000e+00 - 2s/epoch - 2ms/step
Epoch 7/50
1224/1224 - 2s - los

Processing test subject data:   6%|▌         | 1/18 [02:40<45:30, 160.59s/it]

Epoch 1/50
1234/1234 - 6s - loss: 31.0639 - auc_1: 0.8238 - accuracy: 0.2374 - val_loss: 34.3192 - val_auc_1: 0.0000e+00 - val_accuracy: 0.0841 - 6s/epoch - 4ms/step
Epoch 2/50
1234/1234 - 3s - loss: 19.1622 - auc_1: 0.9320 - accuracy: 0.4619 - val_loss: 6.2932 - val_auc_1: 0.0000e+00 - val_accuracy: 0.2136 - 3s/epoch - 2ms/step
Epoch 3/50
1234/1234 - 3s - loss: 15.4731 - auc_1: 0.9281 - accuracy: 0.3436 - val_loss: 16.1902 - val_auc_1: 0.0000e+00 - val_accuracy: 0.1197 - 3s/epoch - 2ms/step
Epoch 4/50
1234/1234 - 3s - loss: 14.2640 - auc_1: 0.9354 - accuracy: 0.4246 - val_loss: 78.2426 - val_auc_1: 0.0000e+00 - val_accuracy: 0.0162 - 3s/epoch - 2ms/step
Epoch 5/50
1234/1234 - 2s - loss: 13.0969 - auc_1: 0.9428 - accuracy: 0.4133 - val_loss: 9.8935 - val_auc_1: 0.0000e+00 - val_accuracy: 0.1877 - 2s/epoch - 2ms/step
Epoch 6/50
1234/1234 - 3s - loss: 10.4814 - auc_1: 0.9506 - accuracy: 0.4303 - val_loss: 9.0657 - val_auc_1: 0.0000e+00 - val_accuracy: 0.1424 - 3s/epoch - 2ms/step
Epoch 7