In [None]:
import os
import sys

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Useful function for dealing with data, and classical machine learning
from sklearn import preprocessing
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import class_weight, shuffle

# Deep learning
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling1D
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.layers.experimental.preprocessing import Normalization

module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)

%load_ext autoreload
%autoreload 2

from helpers import (read_data,  # noqa E402
                     create_segments_and_labels,
                     save_converted_model,
                     compare,
                     plot_activity)

# Set some standard parameters upfront
pd.options.display.float_format = '{:.1f}'.format
sns.set()  # Default seaborn look and feel
plt.style.use('ggplot')
print('keras version ', keras.__version__)
%matplotlib inline

In [None]:
df1 = read_data('../data/data_adem.txt')
df1['user-id'] = 34
df2 = read_data('../data/data_mathis.txt')
df2 = df2[df2['activity'] != 'Jogging']
df2['user-id'] = 35
df3 = read_data('../data/WISDM.txt', ['user-id',
                                      'activity',
                                      'timestamp',
                                      'x-axis',
                                      'y-axis',
                                      'z-axis'])
df = pd.concat([df1, df2, df3])
df

In [None]:
# compare walking data
df1W = df1[df1["activity"] == 'Walking'][:1000]
df2W = df2[df2["activity"] == 'Walking'][:1000]
df3W = df3[(df3["activity"] == 'Walking') & (df3["user-id"] == 20)][:1000]
plot_activity("Walking", df2W)
plot_activity("Walking", df2W)
plot_activity("Walking", df3W)

In [None]:
# compare walking data
df1W = df1[df1["activity"] == 'Downstairs'][:1000]
df2W = df2[df2["activity"] == 'Downstairs'][:1000]
df3W = df3[(df3["activity"] == 'Downstairs') & (df3["user-id"] == 20)][:1000]
plot_activity("Downstairs", df2W)
plot_activity("Downstairs", df2W)
plot_activity("Downstairs", df3W)

In [None]:
# The number of steps within one time segment
TIME_PERIODS = 80
# The steps to take from one segment to the next; if this value is equal to
# TIME_PERIODS, then there is no overlap between the segments
STEP_DISTANCE = 40

In [None]:
# Get labels from data
LABELS = df["activity"].unique()
print(LABELS)

In [None]:
# Define column name of the label vector
LABEL = "ActivityEncoded"
# Transform the labels from String to Integer via LabelEncoder
le = preprocessing.LabelEncoder()
# Add a new column to the existing DataFrame with the encoded values
df[LABEL] = le.fit_transform(df["activity"].values.ravel())
df

In [None]:
x, y = create_segments_and_labels(
    df,
    TIME_PERIODS,
    STEP_DISTANCE,
    LABEL)
x, y = shuffle(x, y)

x_train, x_test = x[:int(0.8 * len(x))], x[int(0.8 * len(x)):]
y_train, y_test = y[:int(0.8 * len(y))], y[int(0.8 * len(y)):]
y_train_one_hot = pd.get_dummies(y_train)

In [None]:
# The following cell is used if we want to train the model with
# new randomized weights, set the boolean below to True to do so
use_new_weights = True

In [None]:
if use_new_weights:
    num_time_periods, num_sensors = x_train.shape[1], x_train.shape[2]
    num_classes = le.classes_.size
    input_shape = (TIME_PERIODS, num_sensors)

    print("\n--- Create neural network model ---\n")

    # 1D CNN neural network
    norm_layer = Normalization()
    norm_layer.adapt(x_train)

    model_m = Sequential()
    # model_m.add(norm_layer)
    model_m.add(Conv1D(100, 10, activation='relu', input_shape=input_shape))
    model_m.add(Conv1D(100, 10, activation='relu'))
    model_m.add(MaxPooling1D(3))
    model_m.add(Conv1D(160, 10, activation='relu'))
    # model_m.add(Conv1D(160, 10, activation='relu'))
    model_m.add(GlobalAveragePooling1D())
    model_m.add(Dropout(0.5))
    model_m.add(Dense(num_classes, activation='softmax'))
    print(model_m.summary())

    # The EarlyStopping callback monitors training accuracy:
    # if it fails to improve for two consecutive epochs,
    # training stops early
    callbacks_list = [
        keras.callbacks.ModelCheckpoint(
            filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5',
            monitor='val_loss', save_best_only=True),
        keras.callbacks.EarlyStopping(monitor='accuracy', patience=1)
    ]

    model_m.compile(loss='categorical_crossentropy',
                    optimizer='adam', metrics=['accuracy'])

In [None]:
# Set class weight
class_weights = class_weight.compute_class_weight('balanced',
                                                  np.unique(y_train),
                                                  y_train)
class_weights
dict_weights = {}
for i in range(len(class_weights)):
    dict_weights[i] = class_weights[i]
dict_weights

In [None]:
BATCH_SIZE = 50
EPOCHS = 5
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5',
        monitor='val_loss', save_best_only=True),
    keras.callbacks.EarlyStopping(monitor='accuracy', patience=1)
]

history = model_m.fit(x_train,
                      y_train_one_hot,
                      batch_size=BATCH_SIZE,
                      epochs=EPOCHS,
                      callbacks=callbacks_list,
                      validation_split=0.2,
                      verbose=1,
                      class_weight=dict_weights)
# summarize history for accuracy and loss
plt.figure(figsize=(6, 4))
plt.plot(history.history['accuracy'], "g--",
         label="Accuracy of training data")
plt.plot(history.history['val_accuracy'], "g",
         label="Accuracy of validation data")
plt.plot(history.history['loss'], "r--",
         label="Loss of training data")
plt.plot(history.history['val_loss'], "r",
         label="Loss of validation data")
plt.title('Model Accuracy and Loss')
plt.ylabel('Accuracy and Loss')
plt.xlabel('Training Epoch')
plt.ylim(0)
plt.legend()
plt.show()

In [None]:
y_pred_test = model_m.predict(x_test)
# Take the class with the highest probability from the test predictions
max_y_pred_test = np.argmax(y_pred_test, axis=1)

cf_matrix = pd.DataFrame(confusion_matrix(y_test, max_y_pred_test),
                         columns=LABELS, index=LABELS)
sns.heatmap(cf_matrix / np.sum(cf_matrix), annot=True, fmt='.2%',
            cmap='Greens')

print(classification_report(y_test, max_y_pred_test))

In [None]:
df_train = df[df['user-id'] > 10]
df_test = df[df['user-id'] <= 10]
x_train, y_train = create_segments_and_labels(df_train,
                                              TIME_PERIODS,
                                              STEP_DISTANCE,
                                              LABEL)
y_train_one_hot = pd.get_dummies(y_train)

In [None]:
# Set class weight
class_weights = class_weight.compute_class_weight('balanced',
                                                  np.unique(y_train),
                                                  y_train)
class_weights
dict_weights = {}
for i in range(len(class_weights)):
    dict_weights[i] = class_weights[i]
dict_weights

In [None]:
if use_new_weights:
    num_time_periods, num_sensors = x_train.shape[1], x_train.shape[2]
    num_classes = le.classes_.size
    input_shape = (TIME_PERIODS, num_sensors)

    print("\n--- Create neural network model ---\n")

    # 1D CNN neural network
    norm_layer = Normalization()
    norm_layer.adapt(x_train)

    model_m = Sequential()
    # model_m.add(norm_layer)
    model_m.add(Conv1D(100, 10, activation='relu', input_shape=input_shape))
    model_m.add(Conv1D(100, 10, activation='relu'))
    model_m.add(MaxPooling1D(3))
    model_m.add(Conv1D(160, 10, activation='relu'))
    # model_m.add(Conv1D(160, 10, activation='relu'))
    model_m.add(GlobalAveragePooling1D())
    model_m.add(Dropout(0.5))
    model_m.add(Dense(num_classes, activation='softmax'))
    print(model_m.summary())

    # The EarlyStopping callback monitors training accuracy:
    # if it fails to improve for two consecutive epochs,
    # training stops early
    callbacks_list = [
        keras.callbacks.ModelCheckpoint(
            filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5',
            monitor='val_loss', save_best_only=True),
        keras.callbacks.EarlyStopping(monitor='accuracy', patience=1)
    ]

    model_m.compile(loss='categorical_crossentropy',
                    optimizer='adam', metrics=['accuracy'])

In [None]:
BATCH_SIZE = 50
EPOCHS = 5
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5',
        monitor='val_loss', save_best_only=True),
    keras.callbacks.EarlyStopping(monitor='accuracy', patience=1)
]

history = model_m.fit(x_train,
                      y_train_one_hot,
                      batch_size=BATCH_SIZE,
                      epochs=EPOCHS,
                      callbacks=callbacks_list,
                      validation_split=0.2,
                      verbose=1,
                      class_weight=dict_weights)
# summarize history for accuracy and loss
plt.figure(figsize=(6, 4))
plt.plot(history.history['accuracy'], "g--",
         label="Accuracy of training data")
plt.plot(history.history['val_accuracy'], "g",
         label="Accuracy of validation data")
plt.plot(history.history['loss'], "r--",
         label="Loss of training data")
plt.plot(history.history['val_loss'], "r",
         label="Loss of validation data")
plt.title('Model Accuracy and Loss')
plt.ylabel('Accuracy and Loss')
plt.xlabel('Training Epoch')
plt.ylim(0)
plt.legend()
plt.show()

In [None]:
y_pred_test = model_m.predict(x_test)
# Take the class with the highest probability from the test predictions
max_y_pred_test = np.argmax(y_pred_test, axis=1)

cf_matrix = pd.DataFrame(confusion_matrix(y_test, max_y_pred_test),
                         columns=LABELS, index=LABELS)
sns.heatmap(cf_matrix / np.sum(cf_matrix), annot=True, fmt='.2%',
            cmap='Greens')

print(classification_report(y_test, max_y_pred_test))