In [None]:
import os
import sys

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Useful function for dealing with data, and classical machine learning
from sklearn import preprocessing
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import class_weight, shuffle

# Deep learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling1D
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.models import load_model, save_model
from tensorflow.keras.layers.experimental.preprocessing import Normalization

module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)

%load_ext autoreload
%autoreload 2

from helpers import (read_data,  # noqa E402
                     create_segments_and_labels,
                     save_converted_model,
                     compare)

# Set some standard parameters upfront
pd.options.display.float_format = '{:.1f}'.format
sns.set()  # Default seaborn look and feel
plt.style.use('ggplot')
print('keras version ', keras.__version__)
%matplotlib inline

In [None]:
# The number of steps within one time segment
TIME_PERIODS = 80
# The steps to take from one segment to the next; if this value is equal to
# TIME_PERIODS, then there is no overlap between the segments
STEP_DISTANCE = 40

In [None]:
df1 = read_data('../data/data_adem.txt')
df2 = read_data('../data/data_mathis.txt')
df2 = df2[df2['activity'] != 'Jogging']

df = pd.concat([df1, df2])
# df = df1
df

In [None]:
# Define column name of the label vector
LABEL = "ActivityEncoded"
# Transform the labels from String to Integer via LabelEncoder
le = preprocessing.LabelEncoder()
# Add a new column to the existing DataFrame with the encoded values
df[LABEL] = le.fit_transform(df["activity"].values.ravel())
# Get the ordered list of labels
LABELS = le.classes_
print(f"Labels: {LABELS}")

In [None]:
df

In [None]:
model_m = load_model('../models/cnn/saved')

In [None]:
df = df.round({'x-axis': 6, 'y-axis': 6, 'z-axis': 6})

In [None]:
x, y = create_segments_and_labels(df,
                                  TIME_PERIODS,
                                  STEP_DISTANCE,
                                  LABEL)

Test the model on our data

In [None]:
x_test, y_test = x, y
y_test_neural = pd.get_dummies(y_test)

score = model_m.evaluate(x_test, y_test_neural, verbose=1)

print(f"\nAccuracy on test data: {score[1] * 100:.1f}%")
print(f"\nLoss on test data: {score[0]:.2f}")

In [None]:
y_pred_test = model_m.predict(x_test)
# Take the class with the highest probability from the test predictions
max_y_pred_test = np.argmax(y_pred_test, axis=1)

cf_matrix = pd.DataFrame(confusion_matrix(y_test, max_y_pred_test),
                         columns=LABELS, index=LABELS)
sns.heatmap(cf_matrix / np.sum(cf_matrix), annot=True, fmt='.2%',
            cmap='Greens')

print(classification_report(y_test, max_y_pred_test))

Train the model on our data

In [None]:
x, y = shuffle(x, y)

x_train2, x_test2 = x[:int(0.8 * len(x))], x[int(0.8 * len(x)):]
y_train2, y_test2 = y[:int(0.8 * len(y))], y[int(0.8 * len(y)):]
y_train_one_hot2 = pd.get_dummies(y_train2)

In [None]:
# The following cell is used if we want to train the model with
# new randomized weights, set the boolean below to True to do so
use_new_weights = True

In [None]:
# Set class weight
class_weights = class_weight.compute_class_weight('balanced',
                                                  classes=np.unique(y_train2),
                                                  y=y_train2)
class_weights
dict_weights = {}
for i in range(len(class_weights)):
    dict_weights[i] = class_weights[i]
dict_weights

In [None]:
if use_new_weights:
    num_time_periods, num_sensors = x_train2.shape[1], x_train2.shape[2]
    num_classes = le.classes_.size
    input_shape = (TIME_PERIODS, num_sensors)

    print("\n--- Create neural network model ---\n")

    # 1D CNN neural network
    norm_layer = Normalization()
    norm_layer.adapt(x_train2)

    model_m = Sequential()
    model_m.add(Conv1D(14, 10, activation='relu', input_shape=input_shape))
    model_m.add(MaxPooling1D(3))
    model_m.add(Conv1D(16, 10, activation='relu'))
    model_m.add(GlobalAveragePooling1D())
    model_m.add(Dropout(0.5))
    model_m.add(Dense(num_classes, activation='softmax'))
    print(model_m.summary())

    # The EarlyStopping callback monitors training accuracy:
    # if it fails to improve for two consecutive epochs,
    # training stops early
    callbacks_list = [
        keras.callbacks.ModelCheckpoint(
            filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5',
            monitor='val_loss', save_best_only=True),
        keras.callbacks.EarlyStopping(monitor='accuracy', patience=1)
    ]

    model_m.compile(loss='categorical_crossentropy',
                    optimizer='adam', metrics=['accuracy'])

In [None]:
BATCH_SIZE = 50
EPOCHS = 150
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5',
        monitor='val_loss', save_best_only=True),
    keras.callbacks.EarlyStopping(monitor='accuracy', patience=5)
]

history = model_m.fit(x_train2,
                      y_train_one_hot2,
                      batch_size=BATCH_SIZE,
                      epochs=EPOCHS,
                      callbacks=callbacks_list,
                      validation_split=0.2,
                      verbose=1,
                      )  # class_weight=dict_weights)
# summarize history for accuracy and loss
plt.figure(figsize=(6, 4))
plt.plot(history.history['accuracy'], "g--",
         label="Accuracy of training data")
plt.plot(history.history['val_accuracy'], "g",
         label="Accuracy of validation data")
plt.plot(history.history['loss'], "r--",
         label="Loss of training data")
plt.plot(history.history['val_loss'], "r",
         label="Loss of validation data")
plt.title('Model Accuracy and Loss')
plt.ylabel('Accuracy and Loss')
plt.xlabel('Training Epoch')
plt.ylim(0)
plt.legend()
plt.show()

In [None]:
y_test_neural2 = pd.get_dummies(y_test2)

score = model_m.evaluate(x_test2, y_test_neural2, verbose=1)

print(f"\nAccuracy on test data: {score[1] * 100:.1f}%")
print(f"\nLoss on test data: {score[0]:.2f}")

In [None]:
y_pred_test2 = model_m.predict(x_test2)
# Take the class with the highest probability from the test predictions
max_y_pred_test2 = np.argmax(y_pred_test2, axis=1)

cf_matrix = pd.DataFrame(confusion_matrix(y_test2, max_y_pred_test2),
                         columns=LABELS, index=LABELS)
sns.heatmap(cf_matrix / np.sum(cf_matrix), annot=True, fmt='.2%',
            cmap='Greens')

print(classification_report(y_test2, max_y_pred_test2))

In [None]:
# test the model on the WISDM data
df_test_3 = read_data('../data/WISDM.txt', column_names=['user-id',
                                                         'activity',
                                                         'timestamp',
                                                         'x-axis',
                                                         'y-axis',
                                                         'z-axis'])
df_test_3[LABEL] = le.fit_transform(df_test_3["activity"].values.ravel())
df_test_3 = df_test_3.round({'x-axis': 6, 'y-axis': 6, 'z-axis': 6})

x_test_3, y_test_3 = create_segments_and_labels(df_test_3,
                                                TIME_PERIODS,
                                                STEP_DISTANCE,
                                                LABEL)

y_test_neural_3 = pd.get_dummies(y_test_3)

score = model_m.evaluate(x_test_3, y_test_neural_3, verbose=1)

print(f"\nAccuracy on test data: {score[1] * 100:.1f}%")
print(f"\nLoss on test data: {score[0]:.2f}")

In [None]:
y_pred_test_3 = model_m.predict(x_test_3)
# Take the class with the highest probability from the test predictions
max_y_pred_test_3 = np.argmax(y_pred_test_3, axis=1)

cf_matrix = pd.DataFrame(confusion_matrix(y_test_3, max_y_pred_test_3),
                         columns=LABELS, index=LABELS)
sns.heatmap(cf_matrix / np.sum(cf_matrix), annot=True, fmt='.2%',
            cmap='Greens')

print(classification_report(y_test_3, max_y_pred_test_3))

In [None]:
# You can save the trained model by running the following:
save_model(model_m, '../models/cnn/saved2')

In [None]:
def representative_dataset():
    for x in tf.data.Dataset.from_tensor_slices((x_train2)).batch(1).take(100):
        yield [tf.dtypes.cast(x, tf.float32)]

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_m)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8 
converter.inference_output_type = tf.int8
converter.representative_dataset = representative_dataset
converted = converter.convert()
save_converted_model(converted, "paf", LABELS)

In [None]:
# Check that we can load the converted model
interpreter = tf.lite.Interpreter(model_content=converted)
interpreter.allocate_tensors()
print("Input:", interpreter.get_input_details())
print("Output:", interpreter.get_output_details())

In [None]:
print(interpreter.get_input_details()[0]['quantization'])
print(interpreter.get_output_details()[0]['quantization'])

In [None]:
input_verif = [-8.334668,0.831072,0.524509,-19.172131,1.816622,0.948428,-17.723145,1.662144,3.489543,-12.130774,1.100511,1.718426,-7.408993,0.941243,1.472937,-1.081351,1.524430,-0.085023,-1.283731,0.783171,-0.443079,-12.764256,0.502954,0.350870,-19.416424,0.639470,5.247488,-13.894706,0.833467,4.226012,-9.366921,0.822689,2.887196,-5.401967,0.730481,3.847599,-0.404758,1.591491,1.178350,-4.155359,1.208287,1.638193,-17.365089,2.405797,3.946992,-17.921930,1.601071,6.442603,-10.030342,1.055006,4.267925,-8.123906,2.240541,3.981720,-2.559078,2.421365,1.051414,0.593965,1.398691,0.317340,-10.484198,-0.439486,0.239502,-20.066671,1.857338,-1.464554,-16.021482,-0.100591,2.371069,-8.672366,-0.337698,0.191602,-2.295626,-0.199984,1.095721,-5.714517,-2.435735,-0.646655,-11.906839,0.203577,-2.343527,-14.152170,-3.678750,0.962798,-13.462405,-1.975891,1.068179,-7.655680,-3.787723,1.781895,-5.657036,-1.057401,0.929268,-6.033054,-2.298021,-0.555645,-10.545271,-2.030977,-1.577120,-14.246774,2.463278,-0.635878,-13.765375,-5.210365,2.359094,-13.214520,-0.226329,1.277743,-5.967191,-2.768643,1.622626,-4.203259,0.283810,-0.734073,-6.252199,-3.042872,-1.723217,-11.626622,0.035925,-1.023871,-17.051342,-2.159110,-0.908910,-14.784455,-1.748364,2.070494,-11.572735,-1.330433,0.234712,-5.960006,-1.195115,0.073048,-2.860851,-0.488584,-0.243094,-5.417534,-2.947072,-2.317181,-13.610896,2.215393,-1.580713,-15.712525,-6.367159,0.041913,-15.144906,-2.245331,1.410666,-9.183702,-2.360292,1.040636,-3.923042,-0.378413,-0.366438,-4.497847,-2.838098,-0.402363,-9.849518,-2.870431,-0.754431,-17.573456,-1.240620,-2.539918,-13.907879,-5.301376,2.896776,-10.939252,-0.786764,0.373623,-5.689369,-1.478925,-2.068099,-3.026107,-0.364043,1.987866,-8.384963,-3.229684,-0.647853,-16.557968,2.420167,-1.322051,-15.367642,-4.514612,2.994972,-11.208692,-0.884960,1.448987,-7.555089,-0.979563,-2.380649,-3.993695,-0.129331,3.190166,-4.402046,-3.356620,0.057480,-13.699512,-0.087418,-2.941084,-17.459692,-3.982918,-0.402363,-12.815749,-2.074087,3.271597,-9.194480,-0.385598,-1.741179,-4.334985,-0.946033,-0.186812,-4.257147,-2.180665,1.574725,-12.394226,-3.100353,-0.192799,-17.444124,-4.382886,-0.676593,-12.507990,-2.948269,4.605623,-9.508227,-1.677711,0.311353,-3.860771,-2.405797,0.219144,-4.925358,-2.301614,-0.247885,-12.507990,-0.043110,-1.356779,-12.484039,-3.518284,0.428708,-8.776549,-2.054927,1.304088,]
input_verif = np.array(input_verif).reshape((1, 80, 3))
print(input_verif[0, :5])
output_verif = [0.964844,0.000000,0.000000,0.000000,0.027344,0.007812,]

print(LABELS)
compare(input_verif, model_m, interpreter)

In [None]:
input_test = df[df["activity"] == "Walking"][1000:][:80][
    ["x-axis", "y-axis", "z-axis"]].to_numpy().reshape((1, 80, 3))
print(input_test[0, :2])
print(LABELS)
print("Comparison:", compare(input_test, model_m, interpreter))

model_m.predict(
    df[0:80][["x-axis", "y-axis", "z-axis"]].values.reshape((-1, 80, 3),
                                                            order='C'))


In [None]:
x[280:280 + 80]

In [None]:
r = model_m.predict(x)
np.argmax(r, axis=1)
print(df.head, x[0])