In [8]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_score, KFold
from sklearn.model_selection import LeaveOneGroupOut
import warnings

warnings.filterwarnings("ignore")

train_features_path = r"C:\Users\Jeet\Desktop\IIT_Gandhinagar\Dataset_Folder\UCI HAR Dataset\UCI HAR Dataset\train\X_train.txt"
train_labels_path = r"C:\Users\Jeet\Desktop\IIT_Gandhinagar\Dataset_Folder\UCI HAR Dataset\UCI HAR Dataset\train\y_train.txt"
test_features_path = r"C:\Users\Jeet\Desktop\IIT_Gandhinagar\Dataset_Folder\UCI HAR Dataset\UCI HAR Dataset\test\X_test.txt"
test_labels_path = r"C:\Users\Jeet\Desktop\IIT_Gandhinagar\Dataset_Folder\UCI HAR Dataset\UCI HAR Dataset\test\y_test.txt"

X_train = pd.read_csv(train_features_path, delim_whitespace=True, header=None)
y_train = pd.read_csv(train_labels_path, delim_whitespace=True, header=None).squeeze()
X_test = pd.read_csv(test_features_path, delim_whitespace=True, header=None)
y_test = pd.read_csv(test_labels_path, delim_whitespace=True, header=None).squeeze()

models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(random_state=42, max_iter=500),
    "AdaBoost": AdaBoostClassifier(random_state=42)
}

metrics = {
    "Accuracy": accuracy_score,
    "Precision": precision_score,
    "Recall": recall_score,
    "F1 Score": f1_score
}

kfold = KFold(n_splits=5, shuffle=True, random_state=42)

print("K-Fold Cross-Validation Results:\n")
for model_name, model in models.items():
    scores = {
        "Accuracy": [],
        "Precision": [],
        "Recall": [],
        "F1 Score": []
    }
    for train_idx, test_idx in kfold.split(X_train, y_train):
        X_ktrain, X_ktest = X_train.iloc[train_idx], X_train.iloc[test_idx]
        y_ktrain, y_ktest = y_train.iloc[train_idx], y_train.iloc[test_idx]

        model.fit(X_ktrain, y_ktrain)
        y_pred = model.predict(X_ktest)

        for metric_name, metric_func in metrics.items():
            if metric_name == "Accuracy":
                scores[metric_name].append(metric_func(y_ktest, y_pred))
            else:
                scores[metric_name].append(metric_func(y_ktest, y_pred, average="weighted"))

    print(f"{model_name}:\n")
    for metric_name in metrics.keys():
        print(f"  {metric_name}: {np.mean(scores[metric_name]):.4f}")
    print("\n")


subject_train_path = r"C:\Users\Jeet\Desktop\IIT_Gandhinagar\Dataset_Folder\UCI HAR Dataset\UCI HAR Dataset\train\subject_train.txt"
subjects = pd.read_csv(subject_train_path, delim_whitespace=True, header=None).squeeze()
logo = LeaveOneGroupOut()

print("Leave-One-Subject-Out Cross-Validation Results:\n")
for model_name, model in models.items():
    scores = {
        "Accuracy": [],
        "Precision": [],
        "Recall": [],
        "F1 Score": []
    }
    for train_idx, test_idx in logo.split(X_train, y_train, groups=subjects):
        X_ltrain, X_ltest = X_train.iloc[train_idx], X_train.iloc[test_idx]
        y_ltrain, y_ltest = y_train.iloc[train_idx], y_train.iloc[test_idx]

        model.fit(X_ltrain, y_ltrain)
        y_pred = model.predict(X_ltest)

        for metric_name, metric_func in metrics.items():
            if metric_name == "Accuracy":
                scores[metric_name].append(metric_func(y_ltest, y_pred))
            else:
                scores[metric_name].append(metric_func(y_ltest, y_pred, average="weighted"))

    print(f"{model_name}:\n")
    for metric_name in metrics.keys():
        print(f"  {metric_name}: {np.mean(scores[metric_name]):.4f}")
    print("\n")

K-Fold Cross-Validation Results:

Random Forest:

  Accuracy: 0.9810
  Precision: 0.9810
  Recall: 0.9810
  F1 Score: 0.9810


Decision Tree:

  Accuracy: 0.9368
  Precision: 0.9370
  Recall: 0.9368
  F1 Score: 0.9368


Logistic Regression:

  Accuracy: 0.9834
  Precision: 0.9835
  Recall: 0.9834
  F1 Score: 0.9834


AdaBoost:

  Accuracy: 0.5449
  Precision: 0.3502
  Recall: 0.5449
  F1 Score: 0.4093


Leave-One-Subject-Out Cross-Validation Results:

Random Forest:

  Accuracy: 0.9132
  Precision: 0.9242
  Recall: 0.9132
  F1 Score: 0.9046


Decision Tree:

  Accuracy: 0.8515
  Precision: 0.8658
  Recall: 0.8515
  F1 Score: 0.8459


Logistic Regression:

  Accuracy: 0.9397
  Precision: 0.9467
  Recall: 0.9397
  F1 Score: 0.9352


AdaBoost:

  Accuracy: 0.5444
  Precision: 0.3486
  Recall: 0.5444
  F1 Score: 0.4085




In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.model_selection import LeaveOneGroupOut
import warnings

warnings.filterwarnings("ignore")

train_features_path = r"C:\Users\Jeet\Desktop\IIT_Gandhinagar\Dataset_Folder\UCI HAR Dataset\UCI HAR Dataset\train\X_train.txt"
train_labels_path = r"C:\Users\Jeet\Desktop\IIT_Gandhinagar\Dataset_Folder\UCI HAR Dataset\UCI HAR Dataset\train\y_train.txt"
test_features_path = r"C:\Users\Jeet\Desktop\IIT_Gandhinagar\Dataset_Folder\UCI HAR Dataset\UCI HAR Dataset\test\X_test.txt"
test_labels_path = r"C:\Users\Jeet\Desktop\IIT_Gandhinagar\Dataset_Folder\UCI HAR Dataset\UCI HAR Dataset\test\y_test.txt"

X_train = pd.read_csv(train_features_path, delim_whitespace=True, header=None)
y_train = pd.read_csv(train_labels_path, delim_whitespace=True, header=None).squeeze()
X_test = pd.read_csv(test_features_path, delim_whitespace=True, header=None)
y_test = pd.read_csv(test_labels_path, delim_whitespace=True, header=None).squeeze()

models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(random_state=42, max_iter=500),
}

# Hyperparameter tuning for AdaBoost
base_estimator = DecisionTreeClassifier(random_state=42)

ada_boost_model = AdaBoostClassifier(base_estimator=base_estimator, random_state=42)

param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.5],
    'base_estimator__max_depth': [1, 3, 5]
}

# GridSearchCV for AdaBoost hyperparameter tuning
grid_search = GridSearchCV(ada_boost_model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


best_adaboost = grid_search.best_estimator_
models["AdaBoost"] = best_adaboost

metrics = {
    "Accuracy": accuracy_score,
    "Precision": precision_score,
    "Recall": recall_score,
    "F1 Score": f1_score
}

# K-Fold Cross-Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

print("K-Fold Cross-Validation Results:\n")
for model_name, model in models.items():
    scores = {
        "Accuracy": [],
        "Precision": [],
        "Recall": [],
        "F1 Score": []
    }
    for train_idx, test_idx in kfold.split(X_train, y_train):
        X_ktrain, X_ktest = X_train.iloc[train_idx], X_train.iloc[test_idx]
        y_ktrain, y_ktest = y_train.iloc[train_idx], y_train.iloc[test_idx]

        model.fit(X_ktrain, y_ktrain)
        y_pred = model.predict(X_ktest)

        for metric_name, metric_func in metrics.items():
            if metric_name == "Accuracy":
                scores[metric_name].append(metric_func(y_ktest, y_pred))
            else:
                scores[metric_name].append(metric_func(y_ktest, y_pred, average="weighted"))

    print(f"{model_name}:\n")
    for metric_name in metrics.keys():
        print(f"  {metric_name}: {np.mean(scores[metric_name]):.4f}")
    print("\n")

# Leave-One-Subject-Out Cross-Validation
subject_train_path = "Dataset_Folder/UCI HAR Dataset/UCI HAR Dataset/train/subject_train.txt"
subjects = pd.read_csv(subject_train_path, delim_whitespace=True, header=None).squeeze()
logo = LeaveOneGroupOut()

print("Leave-One-Subject-Out Cross-Validation Results:\n")
for model_name, model in models.items():
    scores = {
        "Accuracy": [],
        "Precision": [],
        "Recall": [],
        "F1 Score": []
    }
    for train_idx, test_idx in logo.split(X_train, y_train, groups=subjects):
        X_ltrain, X_ltest = X_train.iloc[train_idx], X_train.iloc[test_idx]
        y_ltrain, y_ltest = y_train.iloc[train_idx], y_train.iloc[test_idx]

        model.fit(X_ltrain, y_ltrain)
        y_pred = model.predict(X_ltest)

        for metric_name, metric_func in metrics.items():
            if metric_name == "Accuracy":
                scores[metric_name].append(metric_func(y_ltest, y_pred))
            else:
                scores[metric_name].append(metric_func(y_ltest, y_pred, average="weighted"))

    print(f"{model_name}:\n")
    for metric_name in metrics.keys():
        print(f"  {metric_name}: {np.mean(scores[metric_name]):.4f}")
    print("\n")


print("Evaluation on Test Set with Best AdaBoost Model:\n")
y_pred = best_adaboost.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}")
print(f"Recall: {recall_score(y_test, y_pred, average='weighted'):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred, average='weighted'):.4f}")
