In [191]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import StratifiedKFold
import keras
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.callbacks import History 
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras import backend as K
from keras import regularizers
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [192]:
path = r"C:\Users\Trinity\Documents\ANN_project\dataset-HAR-PUC-Rio.csv" 

In [193]:
dataset= pd.read_csv(path, delimiter=";", decimal = ",", low_memory=False) # Read the file
dataset.head()


Unnamed: 0,user,gender,age,how_tall_in_meters,weight,body_mass_index,x1,y1,z1,x2,y2,z2,x3,y3,z3,x4,y4,z4,Class
0,debora,Woman,46,1.62,75,28.6,-3,92,-63,-23,18,-19,5,104,-92,-150,-103,-147,sitting
1,debora,Woman,46,1.62,75,28.6,-3,94,-64,-21,18,-18,-14,104,-90,-149,-104,-145,sitting
2,debora,Woman,46,1.62,75,28.6,-1,97,-61,-12,20,-15,-13,104,-90,-151,-104,-144,sitting
3,debora,Woman,46,1.62,75,28.6,-2,96,-57,-15,21,-16,-13,104,-89,-153,-103,-142,sitting
4,debora,Woman,46,1.62,75,28.6,-1,96,-61,-13,20,-15,-13,104,-89,-153,-104,-143,sitting


In [194]:


oe = OrdinalEncoder()
oe.fit(dataset[["user","gender"]])
dataset[["user","gender"]] = oe.transform(dataset[["user","gender"]]) # Ordinal encoding of categorical input data

le = LabelEncoder()
dataset.Class = le.fit_transform(dataset.Class)# Label (integer) encoding of categorical target data
dataset.head()


Unnamed: 0,user,gender,age,how_tall_in_meters,weight,body_mass_index,x1,y1,z1,x2,y2,z2,x3,y3,z3,x4,y4,z4,Class
0,0.0,1.0,46,1.62,75,28.6,-3,92,-63,-23,18,-19,5,104,-92,-150,-103,-147,0
1,0.0,1.0,46,1.62,75,28.6,-3,94,-64,-21,18,-18,-14,104,-90,-149,-104,-145,0
2,0.0,1.0,46,1.62,75,28.6,-1,97,-61,-12,20,-15,-13,104,-90,-151,-104,-144,0
3,0.0,1.0,46,1.62,75,28.6,-2,96,-57,-15,21,-16,-13,104,-89,-153,-103,-142,0
4,0.0,1.0,46,1.62,75,28.6,-1,96,-61,-13,20,-15,-13,104,-89,-153,-104,-143,0


In [195]:
# Split the data to training and testing data 5-Fold
X = dataset.drop(["Class"], axis =1) # Input values
Y = dataset["Class"] # Target values



In [196]:
X = X.apply(lambda x: x-x.mean()) # Mean centering

scaler = MinMaxScaler()
X = scaler.fit_transform(X) # Scaling the data to [0,1]


In [197]:
kfold = StratifiedKFold(n_splits=5, shuffle =True) # Each fold has the same percentage of samples for every class


In [198]:
# Initializing lists
crossentropyList = []
accuracyList = []
mseList = []

accuracyhistoryList = []
val_acchistoryList = []
losshistoryList= []
val_losshistoryList = []
val_msehistoryList = []

# KFold loop
for i, (train, test) in enumerate(kfold.split(X,Y)):
    
    # Create model
    model = Sequential() 
    
    model.add(Dense(23, activation="relu", kernel_regularizer = regularizers.l2(0.9), input_dim=18)) #  Dense = fully connected
    model.add(Dense(5, activation="softmax", input_dim=23)) #  Softmax for crossentropy loss function

    # Compile model
    keras.optimizers.SGD(learning_rate=0.1, momentum=0.6) # Stochastic gradient descent optimizer
    model.compile(optimizer = 'sgd', loss = 'sparse_categorical_crossentropy', metrics=['accuracy','mean_squared_error']) # Sparse categorical crossentropy loss for integer encoding 

    # Callback for EarlyStopping
    es = EarlyStopping(monitor='val_accuracy', mode='max', verbose=0, patience=10) # When accuracy maximizes it waits another 10 epochs, it stops if there is no change. 
    #If there is, it continues until the next plateau or until it reaches the epochs intilized.
    mc = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)# Saves the model with the best performance obserbed during training
    history = History()
    
    # Fit model
    model.fit(X[train], Y[train], validation_data =(X[test], Y[test]), batch_size= 10, epochs=30, verbose=1, callbacks= [es, mc, history]) # Validation with test dataset 
    
    # Load best model
    saved_model = load_model('best_model.h5')
    
    # Append trainig histories to lists
    accuracyhistoryList.append(history.history['accuracy'])
    val_acchistoryList.append(history.history['val_accuracy'])
    losshistoryList.append(history.history['loss'])
    val_losshistoryList.append(history.history['val_loss'])
    val_msehistoryList.append(history.history['val_mean_squared_error'])
    
    # Evaluate model
    scores = saved_model.evaluate(X[test], Y[test], verbose=0) # The "best model" is evaluated
    crossentropyList.append(scores[0])
    accuracyList.append(scores[1])
    mseList.append(scores[2])
    print("Fold :", i, " Test Loss:", scores[0], " Test Accuracy:", scores[1], " Test MSE:", scores[2])
    


Epoch 1/30
Epoch 1: val_loss improved from inf to 1.45843, saving model to best_model.h5
Epoch 2/30
Epoch 2: val_loss improved from 1.45843 to 1.44971, saving model to best_model.h5
Epoch 3/30
Epoch 3: val_loss did not improve from 1.44971
Epoch 4/30
Epoch 4: val_loss improved from 1.44971 to 1.43541, saving model to best_model.h5
Epoch 5/30
Epoch 5: val_loss did not improve from 1.43541
Epoch 6/30
Epoch 6: val_loss did not improve from 1.43541
Epoch 7/30
Epoch 7: val_loss did not improve from 1.43541
Epoch 8/30
Epoch 8: val_loss improved from 1.43541 to 1.42327, saving model to best_model.h5
Epoch 9/30
Epoch 9: val_loss did not improve from 1.42327
Epoch 10/30
Epoch 10: val_loss did not improve from 1.42327
Epoch 11/30
Epoch 11: val_loss did not improve from 1.42327
Epoch 12/30
Epoch 12: val_loss improved from 1.42327 to 1.42318, saving model to best_model.h5
Epoch 13/30
Epoch 13: val_loss improved from 1.42318 to 1.41050, saving model to best_model.h5
Epoch 14/30
Epoch 14: val_loss d

KeyboardInterrupt: 

In [177]:
# History of "average model"
avg_acc_hist =[]
avg_val_acc_hist=[]
avg_loss_hist =[]
avg_val_loss_hist =[]
avg_val_mse_history=[]
avg_acc_hist=np.mean(accuracyhistoryList, axis=0)
avg_val_acc_hist=np.mean(val_acchistoryList, axis=0)
avg_loss_hist=np.mean(losshistoryList, axis=0)
avg_val_loss_hist=np.mean(val_losshistoryList, axis=0)
avg_val_mse_hist=np.mean(val_msehistoryList, axis=0)

In [180]:
# Plot graphs for average model

# Summarize history for accuracy
plt.plot(avg_acc_hist)
plt.plot(avg_val_acc_hist)
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# Summarize history for loss
plt.plot(avg_loss_hist)
plt.plot(avg_val_loss_hist)
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# Summarize history for mse
plt.plot(val_msehistoryList[i])
plt.title('model mse')
plt.ylabel('mse')
plt.xlabel('epoch')
plt.legend(['test'], loc='upper left')
plt.show()




In [182]:
# Mean values
print("Mean Loss: ", np.mean(crossentropyList), " Mean Accuracy:", np.mean(accuracyList), " Mean MSE:", np.mean(mseList))