In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import re
import glob
from numpy import mean
from numpy import std
from scipy import stats
from keras.models import Sequential
from keras.callbacks import History 
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import ConvLSTM2D
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.utils.np_utils import to_categorical
#from matplotlib import pyplot

# Load Dataset

Load Dataset, concatenate data and count Rows and Columns

In [None]:
# Download dataset from Github and unzip
!wget 'https://github.com/cargilgar/Smart-Alarm-using-tinyML/raw/main/dataset/dataset-1-15.zip'

!unzip dataset-1-15.zip

In [4]:
#Create full dataframe with all subject csv's concatenated
path = r'content/output/'
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    subject_csv = pd.read_csv(filename, index_col=None, header=0)
    li.append(subject_csv)

df = pd.concat(li, axis=0, ignore_index=True)

#Save big dataframe as CSV
df.to_csv('big_df.csv')

#Rows and Columns
total_rows=len(df.axes[0]) #===> Axes of 0 is for a row
total_cols=len(df.axes[1]) #===> Axes of 1 is for a column
print("Number of Rows: "+str(total_rows))
print("Number of Columns: "+str(total_cols))

Number of Rows: 51376
Number of Columns: 6


In [2]:
# Read full dataframe CSV (without download data)
df = pd.read_csv('big_df.csv')

# Labels
Initial stages (0-5, wake = 0, N1 = 1, N2 = 2, N3 = 3, REM = 5)

Relabeled stages (0-2, wake = 0, NREM = 1, REM = 2)

In [3]:
# Show labels
# Same labels will be reused throughout the program

df['Labels'].describe()
#subject_csv.hist('Heart Rate')
#subject_csv.describe()

df['Labels'] = df['Labels'].map({0:0,1:1,2:1,3:1,5:2,},na_action=None)

#Delete non-labeled Rows
df.dropna(inplace=True)

# Normalize Training Data
Next, we need to normalize our features within our training data. Of course there are various ways on how to normalize. Please keep in mind that you use the same normalization algorithm later when feeding new data into your neural network. Otherwise your preditions will be off. On top of the normalization we will also apply rounding to the three features.

In [4]:
# Normalize features for training data set (values between 0 and 1)***
# Surpress warning for next 3 operation
pd.options.mode.chained_assignment = None  # default='warn'
df['X'] = df['X'] / df['X'].max()
df['Y'] = df['Y'] / df['Y'].max()
df['Z'] = df['Z'] / df['Z'].max()
df['Heart Rate'] = df['Heart Rate'] / df['Heart Rate'].max()

# Round numbers (4 decimals)
df = df.round({'X': 4, 'Y': 4, 'Z': 4, 'Heart Rate': 4})

# Split Dataset
Train, Validation and Test

Split the data
We'll use a (70%, 20%, 10%) split for the training, validation, and test sets. Note the data is not being randomly shuffled before splitting. This is for two reasons.

It ensures that chopping the data into windows of consecutive samples is still possible.
It ensures that the validation/test results are more realistic, being evaluated on data collected after the model was trained.

***ANOTHER SPLITING OPTION WOULD BE TO SEPARATE USERS (Crear nueva columna con nombre usuario?? O manejar cada CSV por separado??)***





In [5]:
column_indices = {name: i for i, name in enumerate(df.columns)}

PERCENTAGE_TRAIN = 0.75
PERCENTAGE_VALIDATION = 0.15

n = len(df)
train_df = df[0:int(n*PERCENTAGE_TRAIN)]
val_df = df[int(n*PERCENTAGE_TRAIN):int(n*(PERCENTAGE_VALIDATION + PERCENTAGE_TRAIN))]
test_df = df[int(n*(PERCENTAGE_VALIDATION + PERCENTAGE_TRAIN)):]

num_features = df.shape[1]

# Reshape Data into Segments and Prepare for Keras
The data contained in the dataframe is not ready yet to be fed into a neural network. Therefore we need to reshape it. Let’s create another function for this called “create_segments_and_labels”. This function will take in the dataframe and the label names.

In [6]:
def create_segments_and_labels(dfs,labels):

    labels = dfs[labels]
    #segments = dfs[['Z','Heart Rate']]
    segments = dfs[['X','Y','Z','Heart Rate']] #All features

    return segments, labels

In [7]:
# x_train --> Features
# y_train --> Labels
   
x_train, y_train = create_segments_and_labels(train_df,'Labels')
x_val, y_val = create_segments_and_labels(val_df,'Labels')
x_test, y_test = create_segments_and_labels(test_df,'Labels')


# LSTM Model

In [61]:
#Prepare data for LSTM Model
x_train = np.expand_dims(x_train,-1) 
y_train = np.expand_dims(y_train,-1) 
x_val = np.expand_dims(x_val,-1) 
y_val = np.expand_dims(y_val,-1) 
x_test = np.expand_dims(x_test,-1) 
y_test = np.expand_dims(y_test,-1) 

In [10]:
#Needed to categorical_crossentropy loss function
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
y_test = to_categorical(y_test)

In [71]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, valX, valy):
    verbose, epochs, batch_size = 0, 100, 360

    n_timesteps = trainX.shape[1]
    n_features = trainX.shape[2]
    n_outputs = 3

    model = Sequential()
    model.add(LSTM(72, input_shape=(n_timesteps,n_features)))
    model.add(Dropout(0.5))
    model.add(Dense(30, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()
	# fit network
    model.fit(trainX, trainy, validation_data=(valX, valy), epochs=epochs, batch_size=batch_size, verbose=verbose)
	# evaluate model
    _, accuracy = model.evaluate(trainX, trainy, batch_size=batch_size, verbose=0)
    return accuracy

In [72]:
# summarize scores
def summarize_results(scores):
	print(scores)
	m, s = mean(scores), std(scores)
	print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

In [73]:
def run_experiment(repeats=3):
	# repeat experiment
	scores = list()
	for r in range(repeats):
		score = evaluate_model(x_train, y_train, x_val, y_val)
		score = score * 100.0
		print('>#%d: %.3f' % (r+1, score))
		scores.append(score)
	# summarize results
	summarize_results(scores)

In [74]:
run_experiment()

Model: "sequential_53"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_53 (LSTM)               (None, 72)                21312     
_________________________________________________________________
dropout_53 (Dropout)         (None, 72)                0         
_________________________________________________________________
dense_109 (Dense)            (None, 30)                2190      
_________________________________________________________________
dense_110 (Dense)            (None, 3)                 93        
Total params: 23,595
Trainable params: 23,595
Non-trainable params: 0
_________________________________________________________________
>#1: 70.479
Model: "sequential_54"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_54 (LSTM)               (None, 72)                21312     
______________

# Deep Neural Network Model in Keras


In [76]:
#Needed only for categorical_crossentropy loss function
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
y_test = to_categorical(y_test)

In [8]:
# define the keras model
num_classes = 3

model = Sequential()
model.add(Dense(60, input_dim= x_train.shape[1], activation='relu'))
model.add(Dense(60, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))#Last layer corresponds with the number of possible outputs

# compile the keras model
opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss= 'sparse_categorical_crossentropy', optimizer= 'adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 60)                300       
_________________________________________________________________
dense_1 (Dense)              (None, 60)                3660      
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 183       
Total params: 4,143
Trainable params: 4,143
Non-trainable params: 0
_________________________________________________________________


Train Model

In [9]:
# fit the keras model on the dataset
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=50, batch_size=360, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


# Predictions (Working on it)

In [21]:
predictions = model.predict(x_test)

In [None]:
index = 1
predictions[index]

In [None]:
np.argmax(predictions[index])

In [None]:
print(y_test[:5])

# Evaluate Model on Test Data

In [75]:
# evaluate themodel
test_loss, test_acc = model.evaluate(x_test, y_test)

print("Test accuracy", test_acc)
print("Test loss", test_loss)

Test accuracy 0.7587727904319763
Test loss 0.7283558249473572


In [66]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

NameError: ignored



---



---



---




---



---



---



---



---



---



---



---



---


---



---



---



---



---


---



---



---



---




Shuffle the training set because we will be using the validation_split option later when training.

In [None]:
'''idx = np.random.permutation(len(x_train))
x_train = x_train[idx]
y_train = y_train[idx]