In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import KFold
import joblib

In [None]:
df_train = pd.read_csv('raw182_Training_Relabeled_Auto_25.csv')
df_test = pd.read_csv('raw91_Testing_Relabeled_Auto_25.csv')
df_adl = pd.read_csv('Raw_Data_90ADL.csv')


In [None]:
print("Training Data Columns:", df_train.columns)
print("Testing Data Columns:", df_test.columns)
print("ADL Data Columns:", df_adl.columns)

Training Data Columns: Index([' ms_accelerometer_x', ' ms_accelerometer_y', ' ms_accelerometer_z',
       'outcome'],
      dtype='object')
Testing Data Columns: Index([' ms_accelerometer_x', ' ms_accelerometer_y', ' ms_accelerometer_z',
       'outcome'],
      dtype='object')
ADL Data Columns: Index([' ms_accelerometer_x', ' ms_accelerometer_y', ' ms_accelerometer_z',
       'outcome'],
      dtype='object')


In [None]:
# Clean column names by removing leading and trailing spaces
df_train.columns = df_train.columns.str.strip()
df_test.columns = df_test.columns.str.strip()
df_adl.columns = df_adl.columns.str.strip()

# Print cleaned column names
print("Training Data Columns:", df_train.columns)
print("Testing Data Columns:", df_test.columns)
print("ADL Data Columns:", df_adl.columns)

# Prepare the training and testing data
X_train = df_train[['ms_accelerometer_x', 'ms_accelerometer_y', 'ms_accelerometer_z']].values
y_train = df_train['outcome'].values

X_test = df_test[['ms_accelerometer_x', 'ms_accelerometer_y', 'ms_accelerometer_z']].values
y_test = df_test['outcome'].values

X_adl = df_adl[['ms_accelerometer_x', 'ms_accelerometer_y', 'ms_accelerometer_z']].values
y_adl = df_adl['outcome'].values

Training Data Columns: Index(['ms_accelerometer_x', 'ms_accelerometer_y', 'ms_accelerometer_z',
       'outcome'],
      dtype='object')
Testing Data Columns: Index(['ms_accelerometer_x', 'ms_accelerometer_y', 'ms_accelerometer_z',
       'outcome'],
      dtype='object')
ADL Data Columns: Index(['ms_accelerometer_x', 'ms_accelerometer_y', 'ms_accelerometer_z',
       'outcome'],
      dtype='object')


In [None]:
# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_adl_scaled = scaler.transform(X_adl)

# Add a dimension to the data for LSTM input: (samples, timesteps, features)
X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))
X_adl_scaled = X_adl_scaled.reshape((X_adl_scaled.shape[0], 1, X_adl_scaled.shape[1]))

In [None]:
# Define the class labels
class_labels = [0, 1]  # Update this based on your labels

# Convert labels to categorical format
label_encoder = LabelEncoder()
label_encoder.fit(class_labels)  # Fit on known labels

encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)
encoded_y_adl = label_encoder.transform(y_adl)

one_hot_y_train = to_categorical(encoded_y_train)
one_hot_y_test = to_categorical(encoded_y_test)
one_hot_y_adl = to_categorical(encoded_y_adl)

In [None]:
def create_model():
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
    model.add(Dropout(0.2))
    model.add(LSTM(50))
    model.add(Dropout(0.2))
    model.add(Dense(len(class_labels), activation='softmax'))  # Number of classes
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [None]:
# K-Fold Cross-Validation
k_folds = 5
k_fold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
scores = []


In [None]:
for train_indices, test_indices in k_fold.split(X_train_scaled):
    X_train_cv, X_test_cv = X_train_scaled[train_indices], X_train_scaled[test_indices]
    y_train_cv, y_test_cv = one_hot_y_train[train_indices], one_hot_y_train[test_indices]

    model = create_model()  # Reinitialize model for each fold

    # Train the model on the current fold
    history = model.fit(X_train_cv, y_train_cv, epochs=20, batch_size=16, validation_split=0.2, verbose=1)

    # Evaluate the model on the test data of the current fold
    _, accuracy = model.evaluate(X_test_cv, y_test_cv)
    scores.append(accuracy)

  super().__init__(**kwargs)


Epoch 1/20
[1m1361/1361[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 5ms/step - accuracy: 0.8664 - loss: 0.4186 - val_accuracy: 0.8696 - val_loss: 0.3738
Epoch 2/20
[1m1361/1361[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.8791 - loss: 0.3412 - val_accuracy: 0.8699 - val_loss: 0.3723
Epoch 3/20
[1m1361/1361[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.8746 - loss: 0.3480 - val_accuracy: 0.8705 - val_loss: 0.3720
Epoch 4/20
[1m1361/1361[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.8769 - loss: 0.3399 - val_accuracy: 0.8677 - val_loss: 0.3689
Epoch 5/20
[1m1361/1361[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.8686 - loss: 0.3527 - val_accuracy: 0.8731 - val_loss: 0.3580
Epoch 6/20
[1m1361/1361[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.8730 - loss: 0.3500 - val_accuracy: 0.8707 - val_loss: 0.3605
Epoch 7/20


In [None]:
# Calculate and print the mean accuracy
mean_accuracy = np.mean(scores)
print(f"Mean accuracy across {k_folds}-fold cross-validation: {mean_accuracy * 100:.2f}%")

# Train and evaluate the model on the entire training set and test set
model = create_model()
model.fit(X_train_scaled, one_hot_y_train, epochs=20, batch_size=16, validation_split=0.2, verbose=1)

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(X_test_scaled, one_hot_y_test)
print(f"Test accuracy: {test_accuracy * 100:.2f}%")

Mean accuracy across 5-fold cross-validation: 88.06%
Epoch 1/20
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 5ms/step - accuracy: 0.8643 - loss: 0.4068 - val_accuracy: 0.8695 - val_loss: 0.3703
Epoch 2/20
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8748 - loss: 0.3465 - val_accuracy: 0.8749 - val_loss: 0.3644
Epoch 3/20
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.8745 - loss: 0.3459 - val_accuracy: 0.8760 - val_loss: 0.3584
Epoch 4/20
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8774 - loss: 0.3390 - val_accuracy: 0.8746 - val_loss: 0.3510
Epoch 5/20
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.8776 - loss: 0.3360 - val_accuracy: 0.8724 - val_loss: 0.3519
Epoch 6/20
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.8791 - loss: 0.3315 -

In [None]:
# Predict on ADL data
y_adl_pred = model.predict(X_adl_scaled)
y_adl_pred = np.argmax(y_adl_pred, axis=1)
y_adl_true = np.argmax(one_hot_y_adl, axis=1)

# Classification report for ADL data
from sklearn.metrics import classification_report, confusion_matrix

print("Classification Report for ADL Data:")
print(classification_report(y_adl_true, y_adl_pred, target_names=[str(label) for label in class_labels]))

print("Confusion Matrix for ADL Data:")
print(confusion_matrix(y_adl_true, y_adl_pred))

[1m1461/1461[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step
Classification Report for ADL Data:
              precision    recall  f1-score   support

           0       0.66      0.77      0.71     29191
           1       0.48      0.35      0.41     17561

    accuracy                           0.61     46752
   macro avg       0.57      0.56      0.56     46752
weighted avg       0.60      0.61      0.60     46752

Confusion Matrix for ADL Data:
[[22494  6697]
 [11343  6218]]
