In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import re
from scipy import stats
from keras.models import Sequential
from keras.callbacks import History 
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dense
from keras.optimizers import SGD
from keras.utils.np_utils import to_categorical
#from matplotlib import pyplot

# Load Dataset

Load Dataset, count Rows and Columns

In [2]:
data_path = os.path.join(os.getcwd(), "dataset/")
data_list = sorted(os.listdir(data_path))
data_list[0]


subject_csv = pd.read_csv(os.path.join(data_path, data_list[0]), delimiter=',')
subject_csv

#Rows and Columns
total_rows=len(subject_csv.axes[0]) #===> Axes of 0 is for a row
total_cols=len(subject_csv.axes[1]) #===> Axes of 1 is for a column
print("Number of Rows: "+str(total_rows))
print("Number of Columns: "+str(total_cols))

#subject_csv

Number of Rows: 1559
Number of Columns: 6


# Labels
stage (0-5, wake = 0, N1 = 1, N2 = 2, N3 = 3, REM = 5)

In [3]:
#Show labels
# Same labels will be reused throughout the program

subject_csv['Labels'].describe()
#subject_csv.hist('Heart Rate')
#subject_csv.describe()

subject_csv['Labels'] = subject_csv['Labels'].map({0:0,1:1,2:1,3:1,5:2,},na_action=None)

# Wake --> 0
# NREM --> 1
# REM --> 2

#Delete non-labeled Rows
subject_csv.dropna(inplace=True)

#Not labeled values --> NaN

#subject_csv

# Split Dataset
Train, Validation and Test

Split the data
We'll use a (70%, 20%, 10%) split for the training, validation, and test sets. Note the data is not being randomly shuffled before splitting. This is for two reasons.

It ensures that chopping the data into windows of consecutive samples is still possible.
It ensures that the validation/test results are more realistic, being evaluated on data collected after the model was trained.

***ANOTHER SPLITING OPTION WOULD BE TO SEPARATE USERS (Crear nueva columna con nombre usuario?? O manejar cada CSV por separado??)***





In [4]:
column_indices = {name: i for i, name in enumerate(subject_csv.columns)}

PERCENTAGE_TRAIN = 0.7
PERCENTAGE_VALIDATION = 0.2

n = len(subject_csv)
train_subject_csv = subject_csv[0:int(n*PERCENTAGE_TRAIN)]
val_subject_csv = subject_csv[int(n*PERCENTAGE_TRAIN):int(n*(PERCENTAGE_VALIDATION + PERCENTAGE_TRAIN))]
test_subject_csv = subject_csv[int(n*(PERCENTAGE_VALIDATION + PERCENTAGE_TRAIN)):]

num_features = subject_csv.shape[1]
#test_subject_csv

# Normalize Training Data
Next, we need to normalize our features within our training data. Of course there are various ways on how to normalize. Please keep in mind that you use the same normalization algorithm later when feeding new data into your neural network. Otherwise your preditions will be off. On top of the normalization we will also apply rounding to the three features.

In [5]:
# Normalize features for training data set (values between 0 and 1)***
# Surpress warning for next 3 operation
pd.options.mode.chained_assignment = None  # default='warn'
train_subject_csv['X'] = train_subject_csv['X'] / train_subject_csv['X'].max()
train_subject_csv['Y'] = train_subject_csv['Y'] / train_subject_csv['Y'].max()
train_subject_csv['Z'] = train_subject_csv['Z'] / train_subject_csv['Z'].max()
train_subject_csv['Heart Rate'] = train_subject_csv['Heart Rate'] / train_subject_csv['Heart Rate'].max()

# Round numbers (4 decimals)
train_subject_csv = train_subject_csv.round({'X': 4, 'Y': 4, 'Z': 4, 'Heart Rate': 4})

#train_subject_csv

# Reshape Data into Segments and Prepare for Keras
The data contained in the dataframe is not ready yet to be fed into a neural network. Therefore we need to reshape it. Let’s create another function for this called “create_segments_and_labels”. This function will take in the dataframe and the label names (the constant that we have defined at the beginning) as well as the length of each record. In our case, let’s go with 80 steps (see constant defined earlier). Taking into consideration the 20 Hz sampling rate, this equals to 4 second time intervals (calculation: 0.05 * 80 = 4). Besides reshaping the data, the function will also separate the features (x-acceleration, y-acceleration, z-acceleration) and the labels (associated activity). https://towardsdatascience.com/human-activity-recognition-har-tutorial-with-keras-and-core-ml-part-1-8c05e365dfa0

In [6]:
def create_segments_and_labels(subject_csv,labels):

    labels = subject_csv[labels]
    #segments = subject_csv[['Z','Heart Rate']]
    segments = subject_csv[['X','Y','Z','Heart Rate']] #All features

    return segments, labels

In [7]:
# x_train --> Features
# y_train --> Labels
   

x_train, y_train = create_segments_and_labels(train_subject_csv,'Labels')
x_val, y_val = create_segments_and_labels(val_subject_csv,'Labels')
x_test, y_test = create_segments_and_labels(test_subject_csv,'Labels')


In [8]:
print(x_train.shape[0], 'training samples')
print('x_train shape: ', x_train.shape)
print('y_train shape: ', y_train.shape)
x_train.shape[0]
#x_train

1091 training samples
x_train shape:  (1091, 4)
y_train shape:  (1091, 3)


1091

# Create Deep Neural Network Model in Keras


In [9]:
# define the keras model
num_classes = 3

#Needed to categorical_crossentropy loss function
#y_train = to_categorical(y_train)
#y_val = to_categorical(y_val)
#y_test = to_categorical(y_test)

model = Sequential()
model.add(Dense(60, input_dim= x_train.shape[1], activation='softmax'))
model.add(Dense(num_classes, activation='softmax'))#Last layer corresponds with the number of possible outputs

# compile the keras model
opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss= 'sparse_categorical_crossentropy', optimizer= opt, metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 60)                300       
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 183       
Total params: 483
Trainable params: 483
Non-trainable params: 0
_________________________________________________________________


# Train Model

In [None]:
# fit the keras model on the dataset
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=100, batch_size=36, verbose=1)

# Evaluate Model on Test Data

In [11]:
# evaluate the keras model

test_loss, test_acc = model.evaluate(x_test, y_test)

print("Test accuracy", test_acc)
print("Test loss", test_loss)

ValueError: ignored

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()



---



---



---




---



---



---



---



---



---



---



---



---


---



---



---



---



---


---



---



---



---




Shuffle the training set because we will be using the validation_split option later when training.

In [None]:
'''idx = np.random.permutation(len(x_train))
x_train = x_train[idx]
y_train = y_train[idx]