In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import  roc_auc_score
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras_tuner import RandomSearch
import keras.backend as K


# LSTM implementation
Apply a deep learning approach which embeds the temporal dimension that was not

explicitly discussed in Chapter 8 (e.g. an LSTM, TCN, or some other state-of-the-art

approach) in the same setting you have used in (5). Provide a rationale on the choice

of your algorithm, hyperparameter settings, discuss your results, and compare your

results to those found under (5).

In [2]:
df = pd.read_csv('combined_data.csv')

# Handle missing values if any
df.fillna(method='ffill', inplace=True)

label_encoder = LabelEncoder()
df['activity'] = label_encoder.fit_transform(df['activity'])

# Scale the features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df.drop(columns=['date_time', 'activity']))

X = []
y = []

# This line sets the number of time steps that the LSTM will use to make predictions.
time_steps = 10 # This values is worth experimenting with

for i in range(time_steps, len(scaled_features)):
    X.append(scaled_features[i-time_steps:i])
    y.append(df['activity'].iloc[i])

X, y = np.array(X), np.array(y)

# One-hot encode the target variable
y = to_categorical(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

In [3]:
# Define custom F1-score metric
def f1_m(y_true, y_pred):
    precision = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) / (K.sum(K.round(K.clip(y_pred, 0, 1))) + K.epsilon())
    recall = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) / (K.sum(K.round(K.clip(y_true, 0, 1))) + K.epsilon())
    f1 = 2 * (precision * recall) / (precision + recall + K.epsilon())
    return f1

In [4]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=50))
model.add(Dense(units=y_train.shape[1], activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_m])

In [5]:
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [6]:
val_loss, val_accuracy, val_f1 = model.evaluate(X_test, y_test)
print(f'Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}, Validation F1: {val_f1}')

roc_auc = roc_auc_score(y_test, model.predict(X_test), multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

Validation Loss: 0.0016593772452324629, Validation Accuracy: 0.9992607831954956, Validation F1: 0.9993551969528198
ROC AUC Score: 0.9999991709618502


In [7]:
def build_model(hp):
    model = Sequential()
    model.add(LSTM(units=hp.Int('units_1', min_value=32, max_value=128, step=32), 
                   return_sequences=True, 
                   input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(units=hp.Int('units_2', min_value=32, max_value=128, step=32)))
    model.add(Dense(units=y_train.shape[1], activation='softmax'))
    
    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', f1_m])
    return model

# Set up the tuner
tuner = RandomSearch(
    build_model,
    objective='accuracy',
    max_trials=10,
    executions_per_trial=1,
    directory='hyperparameter_tuning',
    project_name='lstm_tuning'
)

# Perform the hyperparameter search
tuner.search(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Get the best model and hyperparameters
best_model = tuner.get_best_models(num_models=1)[0]
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

print("Best hyperparameters: ", best_hyperparameters.values)

Trial 10 Complete [00h 08m 10s]
accuracy: 0.9998767971992493

Best accuracy So Far: 0.9999383687973022
Total elapsed time: 00h 56m 59s
Best hyperparameters:  {'units_1': 128, 'units_2': 96, 'learning_rate': 0.001}


In [8]:
# Train the best model
history = best_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [9]:
# Evaluate the best model
val_loss, val_accuracy, val_f1 = best_model.evaluate(X_test, y_test)
print(f'Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}, Validation F1: {val_f1}')

roc_auc = roc_auc_score(y_test, best_model.predict(X_test), multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

Validation Loss: 0.004953647032380104, Validation Accuracy: 0.9992607831954956, Validation F1: 0.9992647171020508
ROC AUC Score: 0.9999962902112727
