In [None]:
import os
import sys

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Useful function for dealing with data, and classical machine learning
from sklearn import preprocessing
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import shuffle

# Deep learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling1D
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.models import load_model
from tensorflow.keras.layers.experimental.preprocessing import Normalization

module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)

%load_ext autoreload
%autoreload 2

from helpers import (  # noqa E402
    read_data,
    show_basic_dataframe_info,
    plot_activity,
    create_segments_and_labels,
    save_converted_model)

# Set some standard parameters upfront
pd.options.display.float_format = '{:.1f}'.format
sns.set()  # Default seaborn look and feel
plt.style.use('ggplot')
print('keras version ', keras.__version__)
%matplotlib inline

From https://github.com/ni79ls/har-keras-cnn

In [None]:
# The number of steps within one time segment
TIME_PERIODS = 80
# The steps to take from one segment to the next; if this value is equal to
# TIME_PERIODS, then there is no overlap between the segments
STEP_DISTANCE = 40

Load, inspect and transform data



In [None]:
# Load data set containing all the data from csv
df = read_data('../data/WISDM.txt', column_names=[
    'user-id',
    'activity',
    'timestamp',
    'x-axis',
    'y-axis',
    'z-axis'])

# Describe the data
show_basic_dataframe_info(df, 20)

In [None]:
# Get labels from data
LABELS = df["activity"].unique()
print(LABELS)

In [None]:
df['activity'].value_counts().plot(kind='bar',
                                   title='Training Examples by Activity Type')
plt.show()

df['user-id'].value_counts().plot(kind='bar',
                                  title='Training Examples by User')
plt.show()

In [None]:
for activity in np.unique(df["activity"]):
    subset = df[df["activity"] == activity][:180]
    plot_activity(activity, subset)

In [None]:
# Define column name of the label vector
LABEL = "ActivityEncoded"
# Transform the labels from String to Integer via LabelEncoder
le = preprocessing.LabelEncoder()
# Add a new column to the existing DataFrame with the encoded values
df[LABEL] = le.fit_transform(df["activity"].values.ravel())

In [None]:
# Differentiate between test set and training set
df_test = df[df['user-id'] % 5 == 0]
df_train = df[df['user-id'] % 5 != 0]

df_train = df_train.round({'x-axis': 6, 'y-axis': 6, 'z-axis': 6})

In [None]:
# Reshape the training data into segments
# so that they can be processed by the network
x_train, y_train = create_segments_and_labels(df_train,
                                              TIME_PERIODS,
                                              STEP_DISTANCE,
                                              LABEL)

df_test = df_test.round({'x-axis': 6, 'y-axis': 6, 'z-axis': 6})

x_test, y_test = create_segments_and_labels(df_test,
                                            TIME_PERIODS,
                                            STEP_DISTANCE,
                                            LABEL)

In [None]:
print(x_train.shape, y_train.shape)
x_train, y_train = shuffle(x_train, y_train)
print(x_train.shape, y_train.shape)

In [None]:
plt.figure()
df_train['activity'].value_counts().plot(
    kind='bar',
    title='Training Examples by Activity Type')
df_test['activity'].value_counts().plot(
    kind='bar',
    title='Training Examples by Activity Type')
plt.show()

In [None]:
df_train.describe()

In [None]:
# Inspect x data
print('x_train shape: ', x_train.shape)
print(x_train.shape[0], 'training samples')
print(x_test.shape[0], 'test samples')

# Inspect y data
print('y_train shape: ', y_train.shape)

# Set input & output dimensions
num_time_periods, num_sensors = x_train.shape[1], x_train.shape[2]
num_classes = le.classes_.size
print(list(le.classes_))

In [None]:
input_shape = (TIME_PERIODS, num_sensors)

y_train_one_hot = pd.get_dummies(y_train)
print('New y_train shape: ', y_train_one_hot.shape)

In [None]:
input_shape

# Create neural network model

In [None]:
weights = {c: len(y_train) / (y_train == c).sum() for c in range(num_classes)}

# DEFINE NETWORK PARAMETERS
trainSplitRatio = 0.7  # split ratio for test and validation
window_size = TIME_PERIODS  # Length of time slice.
# Actitrac was recorded at 20Hz
numFilters1 = 100  # number of filters in first Conv1D layer
kernelSize = 10  # kernal size of the Conv2D layer
batchSize = 10
numNueronsFCL2 = 160  # number of filters in fully connected output layer
dropout = 0.5  # dropout rate.
# % of neurons converted to 0 weight before softmax

norm_layer = Normalization()
norm_layer.adapt(x_train)
model_m = Sequential()
# model_m.add(norm_layer)
model_m.add(Conv1D(14, 10, activation='relu', input_shape=input_shape))
# model_m.add(Conv1D(100, 10, activation='relu'))
model_m.add(MaxPooling1D(3))
model_m.add(Conv1D(16, 10, activation='relu'))
# model_m.add(Conv1D(160, 10, activation='relu'))
model_m.add(GlobalAveragePooling1D())
model_m.add(Dropout(dropout))
model_m.add(Dense(num_classes, activation='softmax'))
print(model_m.summary())

# The EarlyStopping callback monitors training accuracy:
# if it fails to improve for two consecutive epochs,
# training stops early
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5',
        monitor='val_loss', save_best_only=True),
    keras.callbacks.EarlyStopping(monitor='accuracy', patience=5)
]

model_m.compile(loss='categorical_crossentropy',
                optimizer='adam', metrics=['accuracy'])

# Hyper-parameters
BATCH_SIZE = 200
EPOCHS = 150

# Fit the model

In [None]:
# Enable validation to use ModelCheckpoint and EarlyStopping callbacks.
if 'SAVED_MODEL_PATH' not in os.environ:
    history = model_m.fit(x_train,
                          y_train_one_hot,
                          batch_size=BATCH_SIZE,
                          epochs=EPOCHS,
                          callbacks=callbacks_list,
                          validation_split=0.2,
                          # class_weight=weights,
                          verbose=1)
    # summarize history for accuracy and loss
    plt.figure(figsize=(6, 4))
    plt.plot(history.history['accuracy'], "g--",
             label="Accuracy of training data")
    plt.plot(history.history['val_accuracy'], "g",
             label="Accuracy of validation data")
    plt.plot(history.history['loss'], "r--",
             label="Loss of training data")
    plt.plot(history.history['val_loss'], "r",
             label="Loss of validation data")
    plt.title('Model Accuracy and Loss')
    plt.ylabel('Accuracy and Loss')
    plt.xlabel('Training Epoch')
    plt.ylim(0)
    plt.legend()
    plt.show()
else:
    model_m = load_model(os.environ['SAVED_MODEL_PATH'])

In [None]:
# You can save the trained model by running the following:
# from tensorflow.keras.models import save_model
# save_model(model_m, '../models/cnn/saved')

# Check against test data

In [None]:
y_test_neural = pd.get_dummies(y_test)

score = model_m.evaluate(x_test, y_test_neural, verbose=1)

print(f"\nAccuracy on test data: {score[1] * 100:.1f}%")
print(f"\nLoss on test data: {score[0]:.2f}")

In [None]:
y_pred_test = model_m.predict(x_test)
# Take the class with the highest probability from the test predictions
max_y_pred_test = np.argmax(y_pred_test, axis=1)

cf_matrix = pd.DataFrame(confusion_matrix(y_test, max_y_pred_test),
                         columns=LABELS, index=LABELS)
sns.heatmap(cf_matrix / np.sum(cf_matrix), annot=True, fmt='.2%',
            cmap='Greens')

print(classification_report(y_test, max_y_pred_test))

In [None]:
def representative_dataset():
    for x in tf.data.Dataset.from_tensor_slices((x_train)).batch(1).take(100):
        yield [tf.dtypes.cast(x, tf.float32)]

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_m)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8
converter.representative_dataset = representative_dataset
converted = converter.convert()
save_converted_model(converted, "wisdm_tflite", LABELS)

In [None]:
# Check that we can load the converted model
interpreter = tf.lite.Interpreter("../models/wisdm_tflite")
interpreter.allocate_tensors()
print("Input:", interpreter.get_input_details())
print("Output:", interpreter.get_output_details())

In [None]:
print(interpreter.get_input_details()[0]['quantization'])
print(interpreter.get_output_details()[0]['quantization'])