In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense, Conv1D, Flatten, Dropout, BatchNormalization
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras_tuner import RandomSearch

# LSTM implementation
Apply a deep learning approach which embeds the temporal dimension that was not

explicitly discussed in Chapter 8 (e.g. an LSTM, TCN, or some other state-of-the-art

approach) in the same setting you have used in (5). Provide a rationale on the choice

of your algorithm, hyperparameter settings, discuss your results, and compare your

results to those found under (5).

In [2]:
rand = 0xC0FFEE

# Load the dataset
df = pd.read_csv('combined_data.csv')

# Handle missing values if any
df.fillna(method='ffill', inplace=True)

# Encode the target variable
label_encoder = LabelEncoder()
df['activity'] = label_encoder.fit_transform(df['activity'])

# Scale the features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df.drop(columns=['date_time', 'activity']))

X = []
y = []

# This line sets the number of time steps that the LSTM will use to make predictions.
time_steps = 10 # This values is worth experimenting with

for i in range(time_steps, len(scaled_features)):
    X.append(scaled_features[i-time_steps:i])
    y.append(df['activity'].iloc[i])

X, y = np.array(X), np.array(y)

# One-hot encode the target variable
y = to_categorical(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rand, stratify=y)

In [3]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=50))
model.add(Dense(units=y_train.shape[1], activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [4]:
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [5]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy*100:.2f}%')

Test Accuracy: 99.96%


In [6]:
predictions = model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(y_test, axis=1)

# Decode the predicted and true classes
predicted_labels = label_encoder.inverse_transform(predicted_classes)
true_labels = label_encoder.inverse_transform(true_classes)

# Print some predictions
for i in range(10):
    print(f'Predicted: {predicted_labels[i]}, True: {true_labels[i]}')

Predicted: cycling, True: cycling
Predicted: walking, True: walking
Predicted: walking, True: walking
Predicted: walking, True: walking
Predicted: cycling, True: cycling
Predicted: cycling, True: cycling
Predicted: cycling, True: cycling
Predicted: running, True: running
Predicted: cycling, True: cycling
Predicted: walking, True: walking


In [9]:
# Define the model building function
def build_model(hp):
    model = Sequential()
    model.add(LSTM(units=hp.Int('units_1', min_value=32, max_value=128, step=32), 
                   return_sequences=True, 
                   input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(units=hp.Int('units_2', min_value=32, max_value=128, step=32)))
    model.add(Dense(units=y_train.shape[1], activation='softmax'))
    
    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Set up the tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=1,
    directory='hyperparameter_tuning',
    project_name='lstm_tuning'
)

# Perform the hyperparameter search
tuner.search(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Get the best model and hyperparameters
best_model = tuner.get_best_models(num_models=1)[0]
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

print("Best hyperparameters: ", best_hyperparameters.values)

Trial 10 Complete [00h 02m 34s]
val_accuracy: 0.9996303915977478

Best val_accuracy So Far: 0.9996303915977478
Total elapsed time: 00h 33m 54s
Best hyperparameters:  {'units_1': 96, 'units_2': 96, 'learning_rate': 0.001}


In [10]:
# Train the best model
history = best_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [11]:
# Evaluate the best model
val_loss, val_accuracy = best_model.evaluate(X_test, y_test)
print(f'Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}')

Validation Loss: 0.00728636933490634, Validation Accuracy: 0.9994455575942993
