In [None]:
from src.utils import load_data, calculate_rmse, cross_validate
from src.gradient_boosting import GradientBoosting
from src.hyperparameter_tuning import grid_search

# Load data
X, y = load_data("california_housing")
print(X.shape, y.shape)
X_sample, y_sample = X[:500], y[:500] 

# Cross-validation with default parameters

model = GradientBoosting(n_estimators=10, learning_rate=0.1, max_depth=3, verbose=True)
mean_score, std_score = cross_validate(model, X_sample, y_sample, k=3, task="regression")
print(f"Baseline RMSE: {mean_score:.2f} ± {std_score:.2f}")

# Hyperparameter tuning
param_grid = {
    "n_estimators": [50, 100],
    "learning_rate": [0.1, 0.05],
    # "max_depth": [3, 5],
    # "reg_lambda": [1.0, 0.5],
    # "reg_alpha": [0.0, 0.1],
    # "colsample": [0.8, 1.0]
}
print("second stage- parameter grid")

best_params, best_score, all_scores = grid_search(X_sample, y_sample, param_grid, task='regression')
print("Best Parameters:", best_params)
print("Best Cross-Validation RMSE:", best_score)

# Train final model with best parameters on full dataset
final_model = GradientBoosting(**best_params)
final_model.fit(X_sample, y_sample)

(20640, 8) (20640,)
Training fold 1/3...
Fitting estimator 1/10
Fitting estimator 2/10
Fitting estimator 3/10
Fitting estimator 4/10
Fitting estimator 5/10
Fitting estimator 6/10
Fitting estimator 7/10
Fitting estimator 8/10
Fitting estimator 9/10
Fitting estimator 10/10
Fold 1, time elapsed: 12.05 seconds
Score: 0.6781870206648357
Training fold 2/3...
Fitting estimator 1/10
Fitting estimator 2/10
Fitting estimator 3/10
Fitting estimator 4/10
Fitting estimator 5/10
Fitting estimator 6/10
Fitting estimator 7/10
Fitting estimator 8/10
Fitting estimator 9/10
Fitting estimator 10/10
Fold 2, time elapsed: 12.52 seconds
Score: 0.38556323394157394
Training fold 3/3...
Fitting estimator 1/10
Fitting estimator 2/10
Fitting estimator 3/10
Fitting estimator 4/10
Fitting estimator 5/10
Fitting estimator 6/10
Fitting estimator 7/10
Fitting estimator 8/10
Fitting estimator 9/10
Fitting estimator 10/10
Fold 3, time elapsed: 5.81 seconds
Score: 0.7496598493485896
Baseline RMSE: 0.60 ± 0.16
second stag

In [1]:
from src.utils import load_data, calculate_rmse, cross_validate
from src.gradient_boosting import GradientBoosting
from src.hyperparameter_tuning import grid_search

# Load data
X, y = load_data("california_housing")
print(X.shape, y.shape)
X_sample, y_sample = X[:500], y[:500] 

# Cross-validation with default parameters
model = GradientBoosting(n_estimators=10, learning_rate=0.1, max_depth=3, verbose=True)
mean_score, std_score = cross_validate(model, X_sample, y_sample, k=3, task="regression")
print(f"Baseline RMSE: {mean_score:.2f} ± {std_score:.2f}")

# Hyperparameter tuning
param_grid = {
    "n_estimators": [50, 100],
    "learning_rate": [0.1, 0.05],
    "max_depth": [3, 5],
    # "reg_lambda": [1.0, 0.5],
    # "reg_alpha": [0.0, 0.1],
    # "colsample": [0.8, 1.0]
}
print("Second stage - parameter grid")

best_params, best_score, all_scores = grid_search(X_sample, y_sample, param_grid, task='regression')
print("Best Parameters:", best_params)
print("Best Cross-Validation RMSE:", best_score)

# Train final model with best parameters on the full dataset
final_model = GradientBoosting(**best_params)
final_model.fit(X_sample, y_sample)

# Evaluate final model performance
final_predictions = final_model.predict(X_sample)
final_rmse = calculate_rmse(y_sample, final_predictions)
print(f"Final RMSE on training data: {final_rmse:.2f}")

(20640, 8) (20640,)
Training fold 1/3...
Fitting estimator 1/10
Fitting estimator 2/10
Fitting estimator 3/10
Fitting estimator 4/10
Fitting estimator 5/10
Fitting estimator 6/10
Fitting estimator 7/10
Fitting estimator 8/10
Fitting estimator 9/10
Fitting estimator 10/10
Fold 1, time elapsed: 4.86 seconds
Score: 0.6780629079829527
Training fold 2/3...
Fitting estimator 1/10
Fitting estimator 2/10
Fitting estimator 3/10
Fitting estimator 4/10
Fitting estimator 5/10
Fitting estimator 6/10
Fitting estimator 7/10
Fitting estimator 8/10
Fitting estimator 9/10
Fitting estimator 10/10
Fold 2, time elapsed: 5.27 seconds
Score: 0.6199861427852384
Training fold 3/3...
Fitting estimator 1/10
Fitting estimator 2/10
Fitting estimator 3/10
Fitting estimator 4/10
Fitting estimator 5/10
Fitting estimator 6/10
Fitting estimator 7/10
Fitting estimator 8/10
Fitting estimator 9/10
Fitting estimator 10/10
Fold 3, time elapsed: 3.39 seconds
Score: 0.5854129337580068
Baseline RMSE: 0.63 ± 0.04
Second stage -