In [1]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('../data/train_enc.csv')

train, validation = train_test_split(df, test_size=0.2, random_state=200, shuffle=True)

target = 'SalePrice'
X_train = train.drop(target, axis=1)
y_train = train[target]

X_validation = validation.drop(target, axis=1)
y_validation = validation[target]

In [3]:
# Grid Search

param_grid_extended = {
    'n_estimators': [100, 125, 150, 175, 200, 225, 250, 275, 300],
    'learning_rate': [0.01, 0.05, 0.01, 0.5, 1],
    'max_depth': [3, 5, 7]
}

grid_search = GridSearchCV(xgb.XGBRegressor(), param_grid_extended, cv=3, scoring='r2', return_train_score=True)

# Fit the GridSearchCV
grid_search.fit(X_train, y_train)

# Save the best estimator
gbr_model = grid_search.best_estimator_

# Get the results of the grid search
results = pd.DataFrame(grid_search.cv_results_)

# Print the best parameters and corresponding score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

Best parameters:  {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 300}
Best score:  0.8791048409186314


In [11]:
# Randomized Grid Search

from sklearn.model_selection import RandomizedSearchCV
import numpy as np

param_dist = {
    'n_estimators': np.arange(100, 801, 10),  # Vary from 100 to 500 in steps of 50
    'learning_rate': np.linspace(0.01, 1, 100),
    'max_depth': np.arange(3, 18, 1)  # Vary from 3 to 9
}
# Initialize the RandomizedSearchCV
random_search = RandomizedSearchCV(xgb.XGBRegressor(), param_distributions=param_dist, n_iter=100, cv=3, scoring='r2', random_state=42, return_train_score=True, n_jobs=-1)

# Fit the RandomizedSearchCV
random_search.fit(X_train, y_train)

# Save the best estimator
xgb_model_random = random_search.best_estimator_

# Get the results of the randomized search
results_random = pd.DataFrame(random_search.cv_results_)

# Print the best parameters and corresponding score
print("Best parameters: ", random_search.best_params_)
print("Best score: ", random_search.best_score_)

Best parameters:  {'n_estimators': 610, 'max_depth': 3, 'learning_rate': 0.09}
Best score:  0.8804541321842239


Tried runnning RandomGridSearch in many variations and could not get a score over .89.