In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from xgboost import XGBRegressor
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

In [None]:
print("Model 4: Gradient Boosting Regressor")

# Define parameter grid for tuning
gb_param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

# Create and train model with GridSearch
gb_model = GradientBoostingRegressor(random_state=42)
gb_grid = GridSearchCV(gb_model, gb_param_grid,
                      cv=5, scoring='r2',
                      n_jobs=-1, verbose=1)

print("Hyperparameter tuning in progress...")
gb_grid.fit(X_train, y_train)

# Get best model
gb_best = gb_grid.best_estimator_
y_pred_gb = gb_best.predict(X_test)

# Evaluation
r2_gb = r2_score(y_test, y_pred_gb)
mae_gb = mean_absolute_error(y_test, y_pred_gb)
rmse_gb = np.sqrt(mean_squared_error(y_test, y_pred_gb))
cv_gb = cross_val_score(gb_best, X_train, y_train, cv=5, scoring='r2').mean()

In [None]:
print("\n Gradient Boosting Model Trained")
print(f"   Best Parameters: {gb_grid.best_params_}")
print(f"   RÂ² Score: {r2_gb:.4f}")
print(f"   MAE: {mae_gb:.4f} percentage points")
print(f"   RMSE: {rmse_gb:.4f} percentage points")
print(f"   5-Fold CV RÂ²: {cv_gb:.4f}")
print(f"   Improvement over baseline: +{r2_gb - r2_baseline:.4f} RÂ²")

# Feature Importance (Gradient Boosting)
gb_feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': gb_best.feature_importances_,
    'Percent': (gb_best.feature_importances_ * 100).round(2)
}).sort_values('Importance', ascending=False)

print("\nðŸ“Š Gradient Boosting Feature Importance:")
print(gb_feature_importance.to_string(index=False))