# Imports - XGBoost Final Tuned Model

In [2]:
# Core Libraries
import numpy as np
import joblib

# Model
from xgboost import XGBRegressor

# Evaluation Metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV

# Load Preprocessed Data

In [3]:
# Load preprocessed datasets
X_train = joblib.load(r"D:\Python\SmartSignalAI\data\X_train.pkl")
y_train = joblib.load(r"D:\Python\SmartSignalAI\data\y_train.pkl")

X_val   = joblib.load(r"D:\Python\SmartSignalAI\data\X_val.pkl")
y_val   = joblib.load(r"D:\Python\SmartSignalAI\data\y_val.pkl")

X_test  = joblib.load(r"D:\Python\SmartSignalAI\data\X_test.pkl")
y_test  = joblib.load(r"D:\Python\SmartSignalAI\data\y_test.pkl")

print("Data loaded successfully!")
print("Train shape:", X_train.shape)

Data loaded successfully!
Train shape: (21647, 10)


# Combine Train + Validation for Tuning

In [4]:
# Combine train and validation for cross-validation tuning
X_train_val = np.vstack((X_train, X_val))
y_train_val = np.hstack((y_train, y_val))

print("Combined Train + Val shape:", X_train_val.shape)

Combined Train + Val shape: (26286, 10)


# Define Hyperparameter Grid

In [5]:
# Define parameter grid for tuning
param_grid = {
    'n_estimators': [200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

# Initialize XGBoost Model

In [6]:
# Initialize base XGBoost model
xgb = XGBRegressor(
    random_state=42,
    n_jobs=-1
)

# GridSearchCV

In [7]:
# Setup GridSearch with 3-fold cross-validation
grid_search = GridSearchCV(
    estimator=xgb,
    param_grid=param_grid,
    scoring='neg_mean_squared_error',  # minimize MSE
    cv=3,
    n_jobs=-1,
    verbose=2
)

# Run hyperparameter tuning
grid_search.fit(X_train_val, y_train_val)

print("Best Hyperparameters:", grid_search.best_params_)
print("Best CV Score (neg MSE):", grid_search.best_score_)

Fitting 3 folds for each of 48 candidates, totalling 144 fits
Best Hyperparameters: {'colsample_bytree': 1.0, 'learning_rate': 0.05, 'max_depth': 7, 'n_estimators': 300, 'subsample': 0.8}
Best CV Score (neg MSE): -55.68626769166887


# Test Set Evaluation

In [8]:
# Get best model
xgb_best = grid_search.best_estimator_

# Predict on test set
y_test_pred = xgb_best.predict(X_test)

# Calculate metrics
mae_test  = mean_absolute_error(y_test, y_test_pred)
rmse_test = np.sqrt(mean_squared_error(y_test, y_test_pred))
r2_test   = r2_score(y_test, y_test_pred)

print("Tuned XGBoost - Test Metrics")
print("MAE :", round(mae_test, 3))
print("RMSE:", round(rmse_test, 3))
print("R²  :", round(r2_test, 3))

Tuned XGBoost - Test Metrics
MAE : 5.406
RMSE: 7.426
R²  : 0.565


# Save Final Tuned XGBoost Model

In [9]:
# Save the tuned XGBoost model
joblib.dump(
    xgb_best,
    r"D:\Python\SmartSignalAI\data\xgb_final_model.pkl"
)

print("Tuned XGBoost model saved successfully!")

Tuned XGBoost model saved successfully!
