In [1]:
import pandas as pd
import numpy as np
import os
import re
from scipy import stats
from keras.models import Sequential
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dense

# Load Dataset

Load Dataset, count Rows and Columns

In [2]:
data_path = os.path.join(os.getcwd(), "dataset/")
data_list = sorted(os.listdir(data_path))
data_list[0]


subject_csv = pd.read_csv(os.path.join(data_path, data_list[0]), delimiter=',')
subject_csv

#Rows and Columns
total_rows=len(subject_csv.axes[0]) #===> Axes of 0 is for a row
total_cols=len(subject_csv.axes[1]) #===> Axes of 1 is for a column
print("Number of Rows: "+str(total_rows))
print("Number of Columns: "+str(total_cols))

#subject_csv

Number of Rows: 50
Number of Columns: 6


# Labels
stage (0-5, wake = 0, N1 = 1, N2 = 2, N3 = 3, REM = 5)

In [3]:
#Show labels
# Same labels will be reused throughout the program

subject_csv['labels'].describe()
#subject_csv.hist('Heart Rate')
#subject_csv.describe()
'''
subject_csv['labels'] = subject_csv['labels'].map({0:'Wake',
                             1:'NREM',
                             2:'NREM',
                             3:'NREM',
                             5:'REM',
                             },
                             na_action=None)
'''
subject_csv['labels'] = subject_csv['labels'].map({0:0,
                             1:1,
                             2:1,
                             3:1,
                             5:2,
                             },
                             na_action=None)

#Delete non-labeled Rows
subject_csv.dropna(inplace=True)

#Not labeled values --> NaN

#subject_csv

# Split Dataset
Train, Validation and Test

Split the data
We'll use a (70%, 20%, 10%) split for the training, validation, and test sets. Note the data is not being randomly shuffled before splitting. This is for two reasons.

It ensures that chopping the data into windows of consecutive samples is still possible.
It ensures that the validation/test results are more realistic, being evaluated on data collected after the model was trained.

***ANOTHER SPLITING OPTION WOULD BE TO SEPARATE USERS (Crear nueva columna con nombre usuario?? O manejar cada CSV por separado??)***





In [4]:
column_indices = {name: i for i, name in enumerate(subject_csv.columns)}

PERCENTAGE_TRAIN = 0.7
PERCENTAGE_VALIDATION = 0.2

n = len(subject_csv)
train_subject_csv = subject_csv[0:int(n*PERCENTAGE_TRAIN)]
val_subject_csv = subject_csv[int(n*PERCENTAGE_TRAIN):int(n*(PERCENTAGE_VALIDATION + PERCENTAGE_TRAIN))]
test_subject_csv = subject_csv[int(n*(PERCENTAGE_VALIDATION + PERCENTAGE_TRAIN)):]

num_features = subject_csv.shape[1]
#test_subject_csv

# Normalize Training Data
Next, we need to normalize our features within our training data. Of course there are various ways on how to normalize. Please keep in mind that you use the same normalization algorithm later when feeding new data into your neural network. Otherwise your preditions will be off. On top of the normalization we will also apply rounding to the three features.

*CORREGIR VALORES NEGATIVOS*

In [5]:
# Normalize features for training data set (values between 0 and 1)***
# Surpress warning for next 3 operation
pd.options.mode.chained_assignment = None  # default='warn'
train_subject_csv['X'] = train_subject_csv['X'] / train_subject_csv['X'].max()
train_subject_csv['Y'] = train_subject_csv['Y'] / train_subject_csv['Y'].max()
train_subject_csv['Z'] = train_subject_csv['Z'] / train_subject_csv['Z'].max()
train_subject_csv['Heart Rate'] = train_subject_csv['Heart Rate'] / train_subject_csv['Heart Rate'].max()

# Round numbers (4 decimals)
train_subject_csv = train_subject_csv.round({'X': 4, 'Y': 4, 'Z': 4, 'Heart Rate': 4})

#train_subject_csv

# Reshape Data into Segments and Prepare for Keras
The data contained in the dataframe is not ready yet to be fed into a neural network. Therefore we need to reshape it. Let’s create another function for this called “create_segments_and_labels”. This function will take in the dataframe and the label names (the constant that we have defined at the beginning) as well as the length of each record. In our case, let’s go with 80 steps (see constant defined earlier). Taking into consideration the 20 Hz sampling rate, this equals to 4 second time intervals (calculation: 0.05 * 80 = 4). Besides reshaping the data, the function will also separate the features (x-acceleration, y-acceleration, z-acceleration) and the labels (associated activity). https://towardsdatascience.com/human-activity-recognition-har-tutorial-with-keras-and-core-ml-part-1-8c05e365dfa0

In [6]:
def create_segments_and_labels(subject_csv,labels):

    labels = subject_csv[labels]
    segments = subject_csv[['X','Y','Z','Heart Rate']]

    return segments, labels

In [7]:
# x_train --> Features
# y_train --> Labels
   
x_train, y_train = create_segments_and_labels(train_subject_csv,'labels')
x_val, y_val = create_segments_and_labels(val_subject_csv,'labels')
x_test, y_test = create_segments_and_labels(test_subject_csv,'labels')


In [8]:
print(x_train.shape[0], 'training samples')
print('x_train shape: ', x_train.shape)
print('y_train shape: ', y_train.shape)
x_train.shape[0]
#x_train.shape[1]

32 training samples
x_train shape:  (32, 4)
y_train shape:  (32,)


32

# Create Deep Neural Network Model in Keras


In [9]:
# define the keras model
model = Sequential()
model.add(Dense(20, input_dim= x_train.shape[1], activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(3, activation="softmax")) #Last layer corresponds with the number of possible outputs


In [10]:
# compile the keras model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                100       
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 168       
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 27        
Total params: 295
Trainable params: 295
Non-trainable params: 0
_________________________________________________________________


# Train Model

In [11]:
# fit the keras model on the dataset
model.fit(x_train, y_train, epochs=150, batch_size=10)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

<keras.callbacks.History at 0x7f154fd909d0>

# Evaluate Model

In [12]:
# evaluate the keras model
model.evaluate(x_test, y_test)



[65.0142593383789, 0.800000011920929]

# Fit the DNN Model in Keras
Next, we will train the model with our training data that we have prepared earlier. We will define an early stopping callback monitor on training accuracy: if the training fails to improve for two consecutive epochs, then the training will stop with the best model. The hyperparameter used for the training are quite simple: We will use a batch size of 400 records and will train the model for 50 epochs. For model training, we will use a 80:20 split to separate training data and validation data. It is that simple. So let’s go ahead and train our model. There are some good explanations out there on the different hyperparameters, for instance here (https://towardsdatascience.com/epoch-vs-iterations-vs-batch-size-4dfb9c7ce9c9).

In [None]:
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5',
        monitor='val_loss', save_best_only=True),
    keras.callbacks.EarlyStopping(monitor='acc', patience=1)
]

model_m.compile(loss='categorical_crossentropy',
                optimizer='adam', metrics=['accuracy'])

# Hyper-parameters
BATCH_SIZE = 400
EPOCHS = 50

# Enable validation to use ModelCheckpoint and EarlyStopping callbacks.
history = model_m.fit(x_train,
                      y_train_hot,
                      batch_size=BATCH_SIZE,
                      epochs=EPOCHS,
                      callbacks=callbacks_list,
                      validation_split=0.2,
                      verbose=1)



---



---



---




---



---



---



---



---



---



---



---



---


---



---



---



---



---


---



---



---



---




# Standarize the data
Our timeseries are already in a single length (176). However, their values are usually in various ranges. This is not ideal for a neural network; in general we should seek to make the input values normalized. For this specific dataset, the data is already z-normalized: each timeseries sample has a mean equal to zero and a standard deviation equal to one. This type of normalization is very common for timeseries classification problems, see Bagnall et al. (2016).

Note that the timeseries data used here are univariate, meaning we only have one channel per timeseries example. We will therefore transform the timeseries into a multivariate one with one channel using a simple reshaping via numpy. This will allow us to construct a model that is easily applicable to multivariate time series.

Finally, in order to use sparse_categorical_crossentropy, we will have to count the number of classes beforehand.

Now we shuffle the training set because we will be using the validation_split option later when training.

In [None]:
idx = np.random.permutation(len(x_train))
x_train = x_train[idx]
y_train = y_train[idx]

Standardize the labels to positive integers. The expected labels will then be 0 and 1.

In [None]:
y_train[y_train == -1] = 0
y_test[y_test == -1] = 0

# Visualize the data
Here we visualize one timeseries example for each class in the dataset.

In [None]:
classes = np.unique(np.concatenate((y_train, y_test), axis=0))

plt.figure()
for c in classes:
    c_x_train = x_train[y_train == c]
    plt.plot(c_x_train[0], label="class " + str(c))
plt.legend(loc="best")
plt.show()
plt.close()

# Build the model


In [None]:
module_selection = ("mobilenet_v2", 224, 1280) 
handle_base, pixels, FV_SIZE = module_selection
MODULE_HANDLE ="https://tfhub.dev/google/tf2-preview/{}/feature_vector/4".format(handle_base)
IMAGE_SIZE = (pixels, pixels)
print("Using {} with input size {} and output dimension {}".format(MODULE_HANDLE, IMAGE_SIZE, FV_SIZE))

feature_extractor = hub.KerasLayer(MODULE_HANDLE,
                                   input_shape=IMAGE_SIZE + (3,), 
                                   output_shape=[FV_SIZE],
                                   trainable=False)

print("Building model with", MODULE_HANDLE)

model = tf.keras.Sequential([
        feature_extractor,
        tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.summary()

model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

EPOCHS = 15

hist = model.fit(train_batches,
                 epochs=EPOCHS,
                 validation_data=validation_batches)

# Train the model

In [None]:
epochs = 500
batch_size = 32

callbacks = [
    keras.callbacks.ModelCheckpoint(
        "best_model.h5", save_best_only=True, monitor="val_loss"
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001
    ),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1),
]
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)
history = model.fit(
    x_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callbacks,
    validation_split=0.2,
    verbose=1,
)

# Evaluate model on test data

In [None]:
model = keras.models.load_model("best_model.h5")

test_loss, test_acc = model.evaluate(x_test, y_test)

print("Test accuracy", test_acc)
print("Test loss", test_loss)

# Plot the model's training and validation loss

In [None]:
metric = "sparse_categorical_accuracy"
plt.figure()
plt.plot(history.history[metric])
plt.plot(history.history["val_" + metric])
plt.title("model " + metric)
plt.ylabel(metric, fontsize="large")
plt.xlabel("epoch", fontsize="large")
plt.legend(["train", "val"], loc="best")
plt.show()
plt.close()