In [None]:
import os as os
import numpy as np
import scipy.signal
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from tensorflow import keras
from keras.utils import to_categorical
from keras.layers import Dense,RNN,LSTM,Activation,Dropout
from keras.models import Sequential
from scipy.signal import find_peaks

In [None]:
# Function to access the Subject Gyroscopic Data
# Subject Number - 1 to 139
# Walk Type - (0-Normal) (1-Impaired)
# Run Number - 1 to 3
def path_inp(sub,walk,num):
    if sub<10:
        o = './Smartphone1/subject0'+str(sub)
    else:
        o = './Smartphone1/subject'+str(sub)
    
    if walk == 0:
        o = o+'_normal0'+str(num)
    else:
        o = o+'_impaired0'+str(num)
    return o

In [None]:
# Function to plot Graph of Accuracy Values and Loss Values of Training Data and Validation Data
# Store the Data as .png file to be shown in the presentation
def plot_learningCurve(history, epochs,k_fold,run):
    if os.path.exists('./Results')==False:
        os.mkdir('./Results')
    
    # Plot training & validation accuracy values
    epoch_range = range(1, epochs+1)
    plt.plot(epoch_range, history.history['accuracy'])
    plt.plot(epoch_range, history.history['val_accuracy'])
    plt.title('Model accuracy, K-Fold - '+str(k_fold)+', Run - '+str(run))
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.savefig('./Results/ModelAccuracy_Kfold'+str(k_fold)+'_Run'+str(run)+'.png')
    plt.show()

    # Plot training & validation loss values
    plt.plot(epoch_range, history.history['loss'])
    plt.plot(epoch_range, history.history['val_loss'])
    plt.title('Model loss, K-Fold - '+str(k_fold)+', Run - '+str(run))
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.savefig('./Results/ModelLoss_Kfold'+str(k_fold)+'_Run'+str(run)+'.png')
    plt.show()

In [None]:
file_inp_dir = './Smartphone1'
files = os.listdir(file_inp_dir)

#Check if all 6 files are available for all subjects
complete_files = []
incomplete_files = []
num_subjects = 139
for i in range(1,num_subjects+1):
    check = True
    for j in range(0,2):
        for k in range(1,4):
            if not (os.path.exists(path_inp(i,j,k)) and os.path.exists(path_inp(i,j,k)+'/Gyroscope.csv')) :
                check = False
    #List the subject numbers with all the complete files in a list - complete_files 
    if check == False:
        incomplete_files.append(i)
    else:
        complete_files.append(i)
print('Number of Subjects with Incomplete Data - '+str(len(incomplete_files)))
print('Number of Subjects with Complete Data - '+str(len(complete_files)))

In [None]:
# Loop through the subjects with complete data and list their time intervals in a table
# This is done to find a common interval for all samples
time_intervals = np.empty((len(complete_files*6),2),dtype=float)
t_num = 0
for i in complete_files:
    for j in range(0,2):
        for k in range(1,4):
            
            data_input = np.genfromtxt(path_inp(i,j,k)+'/Gyroscope.csv',delimiter=',')
            data_input = np.delete(data_input,0,0)
            
            t_min = data_input[0,0]
            t_max = data_input[data_input.shape[0]-1,0]
            t_avg = (t_max-t_min)/data_input.shape[0]
            
            time_intervals[t_num] = t_avg
            t_num += 1    

In [None]:
# Loop through the time interval data and find the common time interval for all subjects 
Time_Step_inp = 0.0025
time_int_avg = np.empty_like(complete_files*6,dtype=float)
for i in range(0,time_intervals.shape[0]):
    i_value = time_intervals[i,0]
    if (i_value/Time_Step_inp)%1 <= 0.5:
        i_int = (i_value//Time_Step_inp)
    else:
        i_int = (i_value//Time_Step_inp)+1
    time_int_avg[i] = (i_int*Time_Step_inp)

Time_Step = np.amax(time_int_avg)
print('The Choosen Time Step is : '+"{:.4f}".format(Time_Step))
check = True
for i in time_int_avg:
    if (Time_Step%i)!=0:
        check = False
        print('Choose a different time step')
        break

if check == True:
    print('The choosen Time Step '+"{:.4f}".format(Time_Step)+' is Acceptable')

In [None]:
# Loop through the subjects with complete data, filter them, resample them and store them in an array nnInput
t_num = 0
num_sub = 0
#Time_Step = Time_Step
nnInput = np.empty((len(complete_files),2,3,int(20/Time_Step),3))
for i in complete_files:
    num_sub += 1
    nnInput[num_sub-1] = num_sub
    for j in range(0,2):
        nnInput[num_sub-1][j] = j
        for k in range(1,4):
            nnInput[num_sub-1][j][k-1] = k
            # Read the Gyroscope.csv file of the subject
            # Delete the first row as it contains Data of Axes
            data_input = np.genfromtxt(path_inp(i,j,k)+'/Gyroscope.csv',delimiter=',')
            data_input = np.delete(data_input,0,0)
            t_min = data_input[0,0]
            t_max = data_input[data_input.shape[0]-1,0]
            t_avg = (t_max-t_min)/data_input.shape[0]
            Resample_num = round(Time_Step/t_avg)
            inp_gx = data_input[:,1:2]
            inp_gy = data_input[:,2:3]
            inp_gz = data_input[:,3:4]
            t_inp_tstep = (Time_Step/Resample_num)
            # Filter the Data with Lowpass Butterworth Filter of 5Hz
            b, a = scipy.signal.butter(5, 5*2*t_inp_tstep, 'low')
            gyr_x_filt = scipy.signal.filtfilt(b, a, np.ravel(inp_gx))
            gyr_y_filt = scipy.signal.filtfilt(b, a, np.ravel(inp_gy))
            gyr_z_filt = scipy.signal.filtfilt(b, a, np.ravel(inp_gz))
            # Resample the Data with the common Time Step
            inp_gx_resample = scipy.signal.resample(gyr_x_filt, int(gyr_x_filt.shape[0]/Resample_num) )
            inp_gy_resample = scipy.signal.resample(gyr_y_filt, int(gyr_y_filt.shape[0]/Resample_num) )
            inp_gz_resample = scipy.signal.resample(gyr_z_filt, int(gyr_z_filt.shape[0]/Resample_num) )
            # Segment the Data - Take 20 seconds sample
            # 10 seconds before and after the middle time Step 
            lim_min = int(int(inp_gx_resample.shape[0]/2)-(10/Time_Step))
            lim_max = int(int(inp_gx_resample.shape[0]/2)+(10/Time_Step))
            gx = inp_gx_resample[lim_min:lim_max]
            gy = inp_gy_resample[lim_min:lim_max]
            gz = inp_gz_resample[lim_min:lim_max]
            gx = np.expand_dims(gx, axis=1)
            gy = np.expand_dims(gy, axis=1)
            gz = np.expand_dims(gz, axis=1)
            final_data = np.concatenate((gx, gy,gz), axis=1)
            # Store the data in the array nnInput
            nnInput[num_sub-1][j][k-1] = final_data

In [None]:
#Number of K-Fold
Num_Kfold = 10
#Number of Epochs
Num_Epochs = 10

# Define the Sequential Neural Network with 2 Hidden Layers . 1st Layer 25 Neurons ans 2nd Layer 20 Neurons
numFeatures = nnInput.shape[4]
seqLength = nnInput.shape[3]
model = Sequential()
model.add(LSTM(25,input_shape=(seqLength,numFeatures),return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(20))
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='adam',loss='mse',metrics = ['accuracy'])
model.save_weights('Model_Weights.h5')

# Split the data according to the given K-Fold Number
kfold = KFold(n_splits=Num_Kfold, random_state=None,shuffle=False)
data = np.array(range(0,nnInput.shape[0]))

In [None]:
test_num = 0
for train, test in kfold.split(data):
    test_num += 1
    
    #Initialize the same weights for each experiment
    model.load_weights('Model_Weights.h5')
    
    # Define empty arrays for training and test Data
    trainOutput=np.empty((len(train)*6))
    trainInput = np.empty((len(train)*6,seqLength,numFeatures))
    testOutput=np.empty((len(test)*6))
    testInput = np.empty((len(test)*6,seqLength,numFeatures))
    
    # Store the Training data in the respective arrays
    loop_num = 0
    for i in data[train]:
        for j in range(0,2):
            for k in range(1,4):
                trainOutput[loop_num]=j
                trainInput[loop_num]=nnInput[i][j][k-1]
                loop_num += 1
    
    # Store the Test data in the respective arrays
    loop_num = 0
    for i in data[test]:
        for j in range(0,2):
            for k in range(1,4):
                testOutput[loop_num]=j
                testInput[loop_num]=nnInput[i][j][k-1]
                loop_num += 1
    
    # Train the neural Network with the Training Data
    history = model.fit(trainInput,trainOutput,epochs=Num_Epochs, validation_data =(testInput,testOutput),verbose=1 )
    
    # Predict the results of Test Data
    pred = model.predict(testInput)
    
    # Plot Graph of Accuracy Values and Loss Values of Training Data and Validation Data for each K-Fold
    # Store the Graphs for use in Presentation
    plot_learningCurve(history, Num_Epochs,Num_Kfold,test_num)