In [None]:
#Environment Details
# !python --version
# !nvidia-smi
# !pip freeze

#Use when not running in Colab
# !pip install numpy==1.18.5
# !pip install tensorflow==2.3.0
# !pip install pandas==1.0.5
# !pip install seaborn==0.10.1
# !pip install matplotlib==3.2.2
# !pip install sklearn==0.0
# !pip install scipy==1.4.1

In [None]:
#Pre-requisites
!pip install gdown
# !pip install tensorflow==2.3.0

#Let’s download the data
# !gdown https://cdap-fmg.s3.us-east-2.amazonaws.com/WISDM_WATCH_ACCEL.txt
!gdown https://drive.google.com/uc?id=1bol4ADpBa5G7GJ3ilUxbAiRLKUh-qm4g

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from pandas.plotting import register_matplotlib_converters

Reading the Dataset:

In [None]:
columnNames = ['user_id', 'activity', 'timestamp', 'x_axis', 'y_axis', 'z_axis']

dataSet = pd.read_csv('WISDM_WATCH_ACCEL.txt', header=None, names=columnNames)
dataSet.z_axis.replace(regex=True, inplace=True, to_replace=r';', value=r'')
dataSet['z_axis'] = dataSet.z_axis.astype(np.float64)
dataSet.dropna(axis=0, how='any', inplace=True)

# removing the 'timestamp' column.
dataSet.pop('timestamp')
dataSet.head() #Outputs the first few lines of Data
#dataSet.shape #Outputs number of rows and collumns

spliting the data into training and test datasets:

In [None]:
dataSet_train = dataSet[dataSet['user_id'] <= 1640]
dataSet_test = dataSet[dataSet['user_id'] > 1640]

In [None]:
from scipy import stats

def create_dataset(X, y, time_steps=1, step=1):
    Xs, ys = [], []
    for i in range(0, len(X) - time_steps, step):
        v = X.iloc[i:(i + time_steps)].values
        labels = y.iloc[i: i + time_steps]
        Xs.append(v)        
        ys.append(stats.mode(labels)[0][0])
    return np.array(Xs), np.array(ys).reshape(-1, 1)

TIME_STEPS = 400
STEP = 40

# X_train is the training data set. 
# Y_train is the set of labels to all the data in x_train
# x_test,y_test - This part of the data does not participate in the training of the model, but is used to evaluate the quality of the trained model.

X_train, Y_train = create_dataset(
    dataSet_train[['x_axis', 'y_axis', 'z_axis']], 
    dataSet_train.activity, 
    TIME_STEPS, 
    STEP
)

x_test, y_test = create_dataset(
    dataSet_test[['x_axis', 'y_axis', 'z_axis']], 
    dataSet_test.activity, 
    TIME_STEPS, 
    STEP
)

Converting Activity names to numbers:

In [None]:
from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder(handle_unknown='ignore', sparse=False)

enc = enc.fit(Y_train)

Y_train = enc.transform(Y_train)
y_test = enc.transform(y_test)

In [None]:
# The model is defined as a Sequential Keras model.
# with a LSTM layer. 
# Which is followed by a dropout layer intended to reduce overfitting of the model to the training data. 
# A dense layer is used to interpret the features extracted by the LSTM layer, 
# Final output layer is used to make predictions.


model = keras.Sequential()
model.add(
      keras.layers.LSTM(
          units=128, 
          input_shape=[X_train.shape[1], X_train.shape[2]]
      )
)
model.add(keras.layers.Dropout(rate=0.5))  # 0 < rate < 1
model.add(keras.layers.Dense(units=128, activation='relu'))
model.add(keras.layers.Dense(Y_train.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])


In [None]:
# one epoch = one forward pass and one backward pass of all the training examples
# batch size = the number of training examples in one forward/backward pass. The higher the batch size, the more memory space you'll need.
# number of iterations = number of passes, each pass using [batch size] number of examples. To be clear, one pass = one forward pass + one backward pass

history = model.fit(
    X_train, Y_train,
    epochs=300,
    batch_size=64,
    validation_split=0.1,
    shuffle=False,
)

In [None]:
# See how well can it predict the test data, second number is accuracy percentage
model.evaluate(x_test, y_test)

Plotting the loss graph:

In [None]:
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='test loss')
plt.legend();

Plotting the Confusion Matrix:

In [None]:
y_pred = model.predict(x_test)

from sklearn.metrics import confusion_matrix

def plot_confMatrix(y_true, y_pred, class_names):
  confMatrix = confusion_matrix(y_true, y_pred)
  fig, ax = plt.subplots(figsize=(18, 16)) 
  ax = sns.heatmap(
      confMatrix, 
      annot=True, 
      fmt="d", 
      cmap=sns.diverging_palette(220, 20, n=7),
      ax=ax
  )

  plt.ylabel('Actual')
  plt.xlabel('Predicted')
  ax.set_xticklabels(class_names)
  ax.set_yticklabels(class_names)
  b, t = plt.ylim() # discover the values for bottom and top
  b += 0.5 # Add 0.5 to the bottom
  t -= 0.5 # Subtract 0.5 from the top
  plt.ylim(b, t) # update the ylim(bottom, top) values
  plt.show() # ta-da!

plot_confMatrix(
  enc.inverse_transform(y_test),
  enc.inverse_transform(y_pred),
  enc.categories_[0]
)

Creating the model h5 file:

In [None]:
# Exporting the model

model.save("lifestyle_model.h5")

#reloading saved model
new_model = tf.keras.models.load_model('lifestyle_model.h5')
new_model.summary()

print("\nmain model result ///////////////////////////////////////////////////////")
model.evaluate(x_test, y_test) # main model result

print("\nsaved and loaded model result ///////////////////////////////////////////////////////")
new_model.evaluate(x_test, y_test) # saved and loaded model result