In [None]:
from sklearn.model_selection import RandomizedSearchCV
from catboost import CatBoostRegressor
import numpy as np

# Define the parameter grid for RandomizedSearchCV
param_dist = {
    'iterations': [1000, 1500, 2000, 2500],  # Number of boosting iterations
    'learning_rate': [0.01, 0.05, 0.1, 0.2],  # Learning rate
    'depth': [4, 6, 8, 10],  # Depth of trees
    'l2_leaf_reg': [1, 3, 5, 7, 9],  # L2 regularization coefficient
    'bagging_temperature': [0, 0.2, 0.5, 1.0],  # Bagging temperature
    'border_count': [32, 64, 128],  # Number of splits for features
    'grow_policy': ['SymmetricTree', 'Depthwise', 'Lossguide'],  # Tree growing policy
}

# Initialize the CatBoostRegressor
catboost_model = CatBoostRegressor(loss_function='RMSE', eval_metric='RMSE', random_seed=42, verbose=100, early_stopping_rounds=500)

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=catboost_model,
    param_distributions=param_dist,
    n_iter=20,  # Number of iterations for random search
    scoring='neg_mean_squared_error',  # Scoring metric
    cv=3,  # Cross-validation folds
    verbose=3,
    random_state=42,
    n_jobs=-1  # Use all processors
)

# Perform hyperparameter tuning
random_search.fit(X_train, y_train)

# Print the best parameters and the best score
print(f"Best parameters found: {random_search.best_params_}")
print(f"Best RMSE score: {(-random_search.best_score_)**0.5:.4f}")

# Get the best estimator (model with the best found parameters)
best_catboost_model = random_search.best_estimator_

# Evaluate the best model on the test set
y_pred_reg_best = best_catboost_model.predict(X_test)
mse_best = mean_squared_error(y_test, y_pred_reg_best)
rmse_best = mse_best ** 0.5
print(f'Regression Model RMSE with Best Parameters: {rmse_best:.4f}')


Fitting 3 folds for each of 20 candidates, totalling 60 fits
