In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, KFold, LeaveOneGroupOut
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression


In [2]:
# File paths (adjust according to the location of the files)
train_features_path = r'C:\Users\ASUS\Desktop\Intern Task\UCI HAR Dataset\UCI HAR Dataset\train\X_train.txt'
train_labels_path = r'C:\Users\ASUS\Desktop\Intern Task\UCI HAR Dataset\UCI HAR Dataset\train\y_train.txt'
subject_train_path = r'C:\Users\ASUS\Desktop\Intern Task\UCI HAR Dataset\UCI HAR Dataset\train\subject_train.txt'

X_train = pd.read_csv(train_features_path, delim_whitespace=True, header=None)
y_train = pd.read_csv(train_labels_path, delim_whitespace=True, header=None).values.ravel()
subjects = pd.read_csv(subject_train_path, delim_whitespace=True, header=None).values.ravel()


  X_train = pd.read_csv(train_features_path, delim_whitespace=True, header=None)
  y_train = pd.read_csv(train_labels_path, delim_whitespace=True, header=None).values.ravel()
  subjects = pd.read_csv(subject_train_path, delim_whitespace=True, header=None).values.ravel()


In [3]:
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "AdaBoost": AdaBoostClassifier(n_estimators=100, algorithm='SAMME', random_state=42)
}

# Hyperparameters to tune for each model
param_grid = {
    "Decision Tree": {"max_depth": [3, 5, 10, None], "min_samples_split": [2, 10, 20]},
    "Random Forest": {"n_estimators": [50, 100, 200], "max_depth": [None, 5, 10], "min_samples_split": [2, 10]},
    "Logistic Regression": {"C": [0.01, 0.1, 1, 10]},
    "AdaBoost": {"n_estimators": [50, 100, 150], "learning_rate": [0.01, 0.1, 1]}
}


In [None]:
# K-Fold Cross-Validation Setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Grid search for each model
best_models = {}
for name, model in models.items():
    print(f"Running GridSearchCV for {name}...")
    grid_search = GridSearchCV(model, param_grid[name], cv=kf, scoring="accuracy")
    grid_search.fit(X_train, y_train)
    best_models[name] = grid_search.best_estimator_
    print(f"Best hyperparameters for {name}: {grid_search.best_params_}")


Running GridSearchCV for Decision Tree...


In [None]:
# Leave-One-Subject-Out Cross-Validation
logo = LeaveOneGroupOut()

# Evaluate each model with LOSO-CV
for name, model in best_models.items():
    scores = cross_val_score(model, X_train, y_train, groups=subjects, cv=logo, scoring="accuracy")
    print(f"{name} - LOSO-CV Accuracy: {scores.mean():.4f} ± {scores.std():.4f}")


In [None]:
# Load test data
test_features_path = r'C:\Users\ASUS\Desktop\Intern Task\UCI HAR Dataset\UCI HAR Dataset\test\X_test.txt'
test_labels_path = r'C:\Users\ASUS\Desktop\Intern Task\UCI HAR Dataset\UCI HAR Dataset\test\y_test.txt'

X_test = pd.read_csv(test_features_path, delim_whitespace=True, header=None)
y_test = pd.read_csv(test_labels_path, delim_whitespace=True, header=None).values.ravel()

# Evaluate on test set
for name, model in best_models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"--- {name} Test Set Performance ---")
    print(classification_report(y_test, y_pred))


In [None]:
import matplotlib.pyplot as plt

# Store results for visualization
model_names = list(best_models.keys())
kf_accuracies = [cross_val_score(best_models[name], X_train, y_train, cv=kf, scoring="accuracy").mean() for name in model_names]
loso_accuracies = [cross_val_score(best_models[name], X_train, y_train, groups=subjects, cv=logo, scoring="accuracy").mean() for name in model_names]

# Plotting
fig, ax = plt.subplots(1, 2, figsize=(12, 6))

# K-Fold CV Plot
ax[0].bar(model_names, kf_accuracies, color='skyblue')
ax[0].set_title('K-Fold CV Accuracy')
ax[0].set_ylabel('Accuracy')

# LOSO CV Plot
ax[1].bar(model_names, loso_accuracies, color='salmon')
ax[1].set_title('LOSO CV Accuracy')
ax[1].set_ylabel('Accuracy')

plt.tight_layout()
plt.show()
