In [1]:
# Import necessary libraries for data processing, modeling, and evaluation
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

In [2]:
# Mount Google Drive to access the dataset stored in Google Drive (for Google Colab)
from google.colab import drive
drive.mount('/content/drive')

# Load the training and testing data
train_data = pd.read_csv('/content/drive/My Drive/archive/train.csv')
test_data = pd.read_csv('/content/drive/My Drive/archive/test.csv')

Mounted at /content/drive


In [3]:
# Extract features and labels from the training and testing data
X_train = train_data.drop(columns=['Activity'])
y_train = train_data['Activity']
X_test = test_data.drop(columns=['Activity'])
y_test = test_data['Activity']

In [4]:
# Map the activity labels to numerical values
activity_mapping = {'LAYING': 0, 'WALKING': 1, 'WALKING_UPSTAIRS': 2,
                    'WALKING_DOWNSTAIRS': 3, 'SITTING': 4, 'STANDING': 5}
y_train = y_train.map(activity_mapping)
y_test = y_test.map(activity_mapping)

In [5]:
# Convert activity labels to categorical (one-hot encoding)
y_train = to_categorical(y_train, num_classes=6)
y_test = to_categorical(y_test, num_classes=6)

In [6]:
# Reshape input data for Conv1D (adding a channel dimension)
X_train = X_train.values.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.values.reshape(X_test.shape[0], X_test.shape[1], 1)


In [7]:
# Define the CNN model architecture with Conv1D layers
def build_cnn_model():
    model = Sequential([
        Input(shape=(X_train.shape[1], 1)),  # Input layer
        Conv1D(64, kernel_size=3, activation='relu'),  # First Conv Layer
        BatchNormalization(),  # BatchNormalization after Conv Layer
        MaxPooling1D(pool_size=2),  # MaxPooling layer
        Dropout(0.3),  # Dropout for regularization

        Conv1D(128, kernel_size=3, activation='relu'),  # Second Conv Layer
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.4),

        Conv1D(256, kernel_size=3, activation='relu'),  # Third Conv Layer
        BatchNormalization(),
        MaxPooling1D(pool_size=2),

        Flatten(),  # Flatten the output for Dense layers
        Dense(128, activation='relu'),  # Fully connected layer
        Dropout(0.5),  # Dropout for regularization
        Dense(6, activation='softmax')  # Output layer with 6 classes (activities)
    ])
    return model


In [8]:
# Initialize parameters and lists to store models and accuracies
num_models = 3
models = []
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cnn_fold_accuracies = []
history_dict = {}

In [None]:
# Loop over to train multiple CNN models
for i in range(num_models):
    print(f"\nTraining CNN Model {i+1}")
    cnn_model = build_cnn_model()  # Build the CNN model
    cnn_model.compile(optimizer=Adam(learning_rate=0.001),  # Compile the model with Adam optimizer
                      loss='categorical_crossentropy',  # Loss function for multi-class classification
                      metrics=['accuracy'])  # Track accuracy during training

    # Perform K-Fold Cross-Validation
    for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
        print(f"\nCNN Model {i+1}, Fold {fold + 1}")
        X_tr, X_val = X_train[train_index], X_train[val_index]
        y_tr, y_val = y_train[train_index], y_train[val_index]

        # Early stopping and learning rate reduction callbacks
        early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-5)

        # Train the model on the current fold
        history = cnn_model.fit(X_tr, y_tr, validation_data=(X_val, y_val),
                                epochs=30, batch_size=32,
                                callbacks=[early_stopping, reduce_lr])

        # Evaluate the model on the validation data
        val_loss, val_accuracy = cnn_model.evaluate(X_val, y_val, verbose=0)
        cnn_fold_accuracies.append(val_accuracy)
        print(f"Model {i+1}, Fold Validation Accuracy: {val_accuracy:.4f}")

    models.append(cnn_model)  # Add the trained model to the models list



Training CNN Model 1

CNN Model 1, Fold 1
Epoch 1/30
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 213ms/step - accuracy: 0.5576 - loss: 1.5918 - val_accuracy: 0.1360 - val_loss: 17.3251 - learning_rate: 0.0010
Epoch 2/30
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 191ms/step - accuracy: 0.8160 - loss: 0.4033 - val_accuracy: 0.5303 - val_loss: 3.6572 - learning_rate: 0.0010
Epoch 3/30
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 184ms/step - accuracy: 0.8473 - loss: 0.3428 - val_accuracy: 0.8314 - val_loss: 0.3951 - learning_rate: 0.0010
Epoch 4/30
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 189ms/step - accuracy: 0.8701 - loss: 0.3086 - val_accuracy: 0.9443 - val_loss: 0.1381 - learning_rate: 0.0010
Epoch 5/30
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 193ms/step - accuracy: 0.8869 - loss: 0.2586 - val_accuracy: 0.9694 - val_loss: 0.1022 - learning_rate: 0.0010
Epoch 6/30


In [None]:
# Function to predict with an ensemble of models by averaging their predictions
def model_ensemble_predict(models, X):
    # Get predictions from each model and average them
    predictions = [model.predict(X) for model in models]
    averaged_predictions = np.mean(predictions, axis=0)  # Average across all models
    return averaged_predictions

In [None]:
# Evaluate ensemble model on the test set
ensemble_predictions = model_ensemble_predict(models, X_test)

In [None]:
# Calculate test accuracy of the ensemble model
ensemble_test_accuracy = np.mean(np.argmax(ensemble_predictions, axis=1) == np.argmax(y_test, axis=1))
print(f"Ensemble Test Accuracy: {ensemble_test_accuracy:.4f}")

In [None]:
# Evaluate on Test Set using Ensemble
ensemble_predictions = model_ensemble_predict(models, X_test)
ensemble_test_accuracy = np.mean(np.argmax(ensemble_predictions, axis=1) == np.argmax(y_test, axis=1))
print(f"Ensemble Test Accuracy: {ensemble_test_accuracy:.4f}")

In [None]:
# Classification Report for Ensemble
print("\nClassification Report for Ensemble:")
print(classification_report(y_true, y_pred_ensemble, target_names=activity_mapping.keys()))

In [None]:
# Confusion Matrix for Ensemble
y_pred_ensemble = ensemble_predictions.argmax(axis=1)
y_true = y_test.argmax(axis=1)
cm_ensemble = confusion_matrix(y_true, y_pred_ensemble)


In [None]:
# Plot Confusion Matrix for Ensemble
plt.figure(figsize=(8, 6))
sns.heatmap(cm_ensemble, annot=True, fmt='d', cmap='Blues', xticklabels=activity_mapping.keys(), yticklabels=activity_mapping.keys())
plt.title('Ensemble Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Assuming you have 'cnn_fold_accuracies' from your original code

# Reshape accuracies to (num_models, num_folds)
num_folds = 5  # Number of folds in KFold
cnn_fold_accuracies_reshaped = np.array(cnn_fold_accuracies).reshape(num_models, num_folds)

# Calculate mean and standard deviation across folds for each model
mean_accuracies = np.mean(cnn_fold_accuracies_reshaped, axis=1)
std_accuracies = np.std(cnn_fold_accuracies_reshaped, axis=1)

# Plotting Accuracy across Folds for Each Model
plt.figure(figsize=(10, 6))
for i in range(num_models):
    plt.plot(range(1, num_folds + 1), cnn_fold_accuracies_reshaped[i], label=f"Model {i + 1}")
    plt.errorbar(range(1, num_folds + 1), cnn_fold_accuracies_reshaped[i], yerr=std_accuracies[i], fmt='o', capsize=5)

plt.title("Accuracy across Folds for Each Model in Ensemble")
plt.xlabel("Fold")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True)
plt.show()