In [2]:
pip install --user xgboost

Collecting xgboostNote: you may need to restart the kernel to use updated packages.

  Using cached xgboost-2.0.2-py3-none-win_amd64.whl (99.8 MB)
Installing collected packages: xgboost
Successfully installed xgboost-2.0.2


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [6]:
# Load the preprocessed data
X_train = pd.read_csv("../Data/X_train_cleaned.csv")
y_train = pd.read_csv("../Data/y_train_cleaned.csv")['activity']
# Load the preprocessed test data
X_test = pd.read_csv("../Data/X_test_cleaned.csv")
y_test = pd.read_csv("../Data/y_test_cleaned.csv")['activity']

In [7]:
# Define the parameter grid for hyperparameter tuning
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0]
}

In [8]:
# Create the XGBoost classifier
classifier = XGBClassifier(random_state=42)

# Perform grid search with cross-validation
grid_search = GridSearchCV(classifier, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)


GridSearchCV(cv=5,
             estimator=XGBClassifier(base_score=None, booster=None,
                                     callbacks=None, colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, device=None,
                                     early_stopping_rounds=None,
                                     enable_categorical=False, eval_metric=None,
                                     feature_types=None, gamma=None,
                                     grow_policy=None, importance_type=None,
                                     interaction_constraints=None,
                                     learning_rate=None,...
                                     max_leaves=None, min_child_weight=None,
                                     missing=nan, monotone_constraints=None,
                                     multi_strategy=None, n_estimators=None,
                                     n_jobs=None, num

In [9]:
# Get the best parameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

Best Hyperparameters: {'colsample_bytree': 1.0, 'learning_rate': 0.2, 'max_depth': 7, 'n_estimators': 200, 'subsample': 0.8}


In [10]:
# Train the XGBoost classifier with the best hyperparameters
best_classifier = XGBClassifier(random_state=42, **best_params)
best_classifier.fit(X_train, y_train)

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=1.0, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.2, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=7, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=200, n_jobs=None,
              num_parallel_tree=None, objective='multi:softprob', ...)

In [12]:
# Make predictions on the test data
y_test_pred = best_classifier.predict(X_test)

In [13]:
# Evaluate the model on the test data
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"\nTest Accuracy after Hyperparameter Tuning: {test_accuracy:.2f}")


Test Accuracy after Hyperparameter Tuning: 0.92


In [None]:


# Load the dataset
file_path = "path/to/your/dataset.csv"
df = pd.read_csv(file_path)

# Separate features (X) and target variable (y)
X = df.drop('activity', axis=1)
y = df['activity']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features (optional, but often recommended)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Encode the target variable (convert activity labels to numerical values)
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)



# Create the XGBoost classifier
classifier = XGBClassifier(random_state=42)

# Perform grid search with cross-validation
grid_search = GridSearchCV(classifier, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Train the XGBoost classifier with the best hyperparameters
best_classifier = XGBClassifier(random_state=42, **best_params)
best_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_test_pred = best_classifier.predict(X_test)

# Evaluate the model on the test data
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"\nTest Accuracy after Hyperparameter Tuning: {test_accuracy:.2f}")
print("Classification Report (Test Data):")
print(classification_report(y_test, y_test_pred))
print("Confusion Matrix (Test Data):")
print(confusion_matrix(y_test, y_test_pred))
