# Model 5
Hyperparameter tuning for XGBoost using grid search

## Restult
Best Model: n_estimators =1500, learning_rate=0.05, max_depth=3
Test data score: 0.72199 (low frequency targets removed)
0.72158

max_dept = 3 gave worse results. Lowest RMSE:  0.7227608322274359. Test Score: 0.72195

Next is max_depth = 10


## Future:


In [59]:
# reading data from google drive 
# data paths whether notebook is run locally or google colab
import os
try:
    from google.colab import drive
    drive.mount('/content/drive')
    COMPETETION_PATH = "/content/drive/MyDrive/30-days-of-ml-competition1"
    TRAIN_DATA_PATH = "/content/drive/MyDrive/30-days-of-ml-competition1/data/train.csv"
    TEST_DATA_PATH = "/content/drive/MyDrive/30-days-of-ml-competition1/data/test.csv"
    OUTPUT_PATH = "/content/drive/MyDrive/30-days-of-ml-competition1/output"
except:
    TRAIN_DATA_PATH = os.path.join("data", "train.csv")
    TEST_DATA_PATH = os.path.join("data", "test.csv")

print(f"Training Path {TRAIN_DATA_PATH}")
print(f"Testing Path {TEST_DATA_PATH}")




Training Path data/train.csv
Testing Path data/test.csv


In [60]:

# library imports
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

# preprocessing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# modeling
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# evaluation
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [61]:
def read_organise_data(train=TRAIN_DATA_PATH):
    """read the data from a path and splitting to features and target

    Args:
        train (path, optional): The path of training data file to. Defaults to TRAIN_DATA_PATH.

    Returns:
        X, y: X for features and y for target
    """
    full_df = pd.read_csv(train, index_col="id")
    print(f"Shape of Dataset: {full_df.shape}")
    return full_df

df= read_organise_data()
X_test = pd.read_csv(TEST_DATA_PATH, index_col='id')
print(f"Shape of Test set {X_test.shape}")

Shape of Dataset: (300000, 25)
Shape of Test set (200000, 24)


## Understanding Data

In [62]:
#df = df[(df.target>=6.9) & (df.target<10.4)]
df.shape

(300000, 25)

In [41]:
# checking for null values in the full dataset
df.isnull().any().sum() # no null value in dataset

0

In [63]:
y = df.target.copy()
X = df.drop('target', axis=1).copy()

In [64]:
# categorical columns start with cat
cat_cnames = [cname for cname in X.columns if 'cat' in cname]
# numerical columns starts with cont
num_cnames = [cname for cname in X.columns if 'cont' in cname]

In [65]:
ordinal_encoder = OrdinalEncoder()
X[cat_cnames] = ordinal_encoder.fit_transform(X[cat_cnames])
X_test[cat_cnames] = ordinal_encoder.transform(X_test[cat_cnames])


In [66]:
# splitting the data
X_train, X_valid, y_train, y_valid = train_test_split(X,y, test_size=0.1,
                                                      random_state= 1)

In [67]:

# checking the cardinality of the categorical columns
for cname in cat_cnames:
    num_unique = X[cname].nunique()
    print(f"{cname} has {num_unique}")
    if num_unique > 10:
        print(f"\t{cname} has a high cardinality")

# eventhough cat9 col has more than 10 unique values, we will still use the OnehotEncoder 

cat0 has 2
cat1 has 2
cat2 has 2
cat3 has 4
cat4 has 4
cat5 has 4
cat6 has 8
cat7 has 8
cat8 has 7
cat9 has 15
	cat9 has a high cardinality


In [68]:
X_train.head()

Unnamed: 0_level_0,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,...,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
213795,0.0,0.0,0.0,2.0,1.0,1.0,0.0,4.0,2.0,6.0,...,0.686119,0.430419,0.150978,0.287723,0.226348,0.402948,0.322723,0.449588,0.594781,0.741928
329677,1.0,0.0,0.0,2.0,1.0,1.0,0.0,4.0,2.0,8.0,...,0.89798,0.4008,1.02308,0.205725,0.319528,0.339233,0.277261,0.437804,0.415667,0.392423
333144,1.0,0.0,0.0,2.0,1.0,3.0,0.0,1.0,0.0,2.0,...,0.287527,0.743913,0.396216,0.780235,0.343993,0.662579,0.450997,0.693391,0.968415,0.875109
382830,1.0,1.0,0.0,2.0,1.0,1.0,0.0,4.0,2.0,0.0,...,0.324665,0.420632,0.534724,0.864813,0.436404,0.299311,0.71612,0.176346,0.416044,0.284034
329789,0.0,1.0,0.0,2.0,1.0,1.0,0.0,4.0,0.0,11.0,...,0.280364,0.156929,0.341584,0.375121,0.818845,0.241667,0.753128,0.269681,0.486466,0.251513


In [49]:
# parameters to search 
params = { 'max_depth': [3],
           'learning_rate': [0.01, 0.05],
           'n_estimators': [300, 600, 900, 1200, 1500]}


def param_tuning(params):

        model = XGBRegressor(n_estimators=1000, max_depth=3, 
                        learning_rate=0.01, objective='reg:squarederror', random_state=1)
        grid_search = GridSearchCV(estimator=model, 
                        param_grid=params,
                        scoring='neg_mean_squared_error', 
                        verbose=3,
                        cv=2)
        grid_search.fit(X, y)
        print("Best parameters:", grid_search.best_params_)
        print("Lowest RMSE: ", (-grid_search.best_score_)**0.5)
        return grid_search

In [50]:
grid_search_results = param_tuning(params)

Fitting 2 folds for each of 10 candidates, totalling 20 fits
[CV] learning_rate=0.01, max_depth=3, n_estimators=300 ...............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  learning_rate=0.01, max_depth=3, n_estimators=300, score=-0.664, total=  49.9s
[CV] learning_rate=0.01, max_depth=3, n_estimators=300 ...............


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   49.9s remaining:    0.0s


[CV]  learning_rate=0.01, max_depth=3, n_estimators=300, score=-0.662, total=  48.6s
[CV] learning_rate=0.01, max_depth=3, n_estimators=600 ...............


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  1.6min remaining:    0.0s


[CV]  learning_rate=0.01, max_depth=3, n_estimators=600, score=-0.515, total= 1.7min
[CV] learning_rate=0.01, max_depth=3, n_estimators=600 ...............
[CV]  learning_rate=0.01, max_depth=3, n_estimators=600, score=-0.514, total= 1.5min
[CV] learning_rate=0.01, max_depth=3, n_estimators=900 ...............
[CV]  learning_rate=0.01, max_depth=3, n_estimators=900, score=-0.511, total= 1.8min
[CV] learning_rate=0.01, max_depth=3, n_estimators=900 ...............
[CV]  learning_rate=0.01, max_depth=3, n_estimators=900, score=-0.511, total= 1.8min
[CV] learning_rate=0.01, max_depth=3, n_estimators=1200 ..............
[CV]  learning_rate=0.01, max_depth=3, n_estimators=1200, score=-0.509, total= 2.5min
[CV] learning_rate=0.01, max_depth=3, n_estimators=1200 ..............
[CV]  learning_rate=0.01, max_depth=3, n_estimators=1200, score=-0.509, total= 2.5min
[CV] learning_rate=0.01, max_depth=3, n_estimators=1500 ..............
[CV]  learning_rate=0.01, max_depth=3, n_estimators=1500, scor

[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed: 45.9min finished


Best parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 1500}
Lowest RMSE:  0.7053222498605214


In [51]:
grid_search_results

GridSearchCV(cv=2, error_score=nan,
             estimator=XGBRegressor(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None, gamma=None,
                                    gpu_id=None, importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.01, max_delta_step=None,
                                    max_depth=3, min_child_weight=None,
                                    missing=nan, monotone_constraints=None,
                                    n_estimator...
                                    random_state=1, reg_alpha=None,
                                    reg_lambda=None, scale_pos_weight=None,
                                    subsample=None, tree_method=None,
                                    validate_parameters=None, verbosity=None),

In [69]:
print(f"Best Model {grid_search_results.best_estimator_}")
best_model =  grid_search_results.best_estimator_

Best Model XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.05, max_delta_step=0, max_depth=3,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=1500, n_jobs=0, num_parallel_tree=1,
             objective='reg:squarederror', random_state=1, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)


In [70]:
best_model.fit(X,y)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.05, max_delta_step=0, max_depth=3,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=1500, n_jobs=0, num_parallel_tree=1,
             objective='reg:squarederror', random_state=1, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)

In [77]:
grid_search_results.predict(X_valid)

array([8.218746, 8.227535, 8.113871, ..., 8.464844, 8.390709, 8.352545],
      dtype=float32)

In [71]:
prediction_valid = best_model.predict(X_valid)
print(mean_squared_error(y_valid, prediction_valid)**0.5)

0.7091410079672386


In [79]:
predictions = grid_search_results.predict(X_test)
OUTPUT_PATH = 'output'
def output_submission(prediction, file_name):
    """creating a kaggle submission file

    Args:
        prediction (array): an array of predictions of the test dataset
        file_name (string): a string for the name without the extension
    """
    my_submission = pd.DataFrame({'target': predictions},
                                 index=X_test.index)
    #my_submission.set_index('id')
    file_path = os.path.join(OUTPUT_PATH,file_name)
    my_submission.to_csv(f'{file_path}.csv')
    print(f'A submission file has been made at {file_path}')

In [80]:
output_submission(predictions, "Submission5-4")

A submission file has been made at output/Submission5-4


In [53]:
X

Unnamed: 0_level_0,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,...,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,1.0,1.0,2.0,1.0,1.0,0.0,4.0,2.0,13.0,...,0.610706,0.400361,0.160266,0.310921,0.389470,0.267559,0.237281,0.377873,0.322401,0.869850
2,1.0,1.0,0.0,0.0,1.0,3.0,0.0,5.0,0.0,14.0,...,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
3,0.0,0.0,0.0,2.0,1.0,3.0,0.0,3.0,0.0,5.0,...,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
4,1.0,1.0,0.0,2.0,1.0,3.0,0.0,4.0,2.0,10.0,...,0.284667,0.668980,0.239061,0.732948,0.679618,0.574844,0.346010,0.714610,0.540150,0.280682
6,0.0,0.0,0.0,2.0,1.0,3.0,0.0,4.0,0.0,13.0,...,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499993,1.0,1.0,0.0,0.0,1.0,3.0,0.0,4.0,0.0,8.0,...,0.307883,0.769792,0.450538,0.934360,1.005077,0.853726,0.422541,1.063463,0.697685,0.506404
499996,0.0,1.0,0.0,2.0,1.0,1.0,0.0,4.0,4.0,5.0,...,0.736713,0.528056,0.508502,0.358247,0.257825,0.433525,0.301015,0.268447,0.577055,0.823611
499997,1.0,1.0,0.0,2.0,1.0,2.0,0.0,4.0,6.0,5.0,...,0.277074,0.688747,0.372425,0.364936,0.383224,0.551825,0.661007,0.629606,0.714139,0.245732
499998,0.0,1.0,0.0,2.0,1.0,1.0,0.0,4.0,4.0,8.0,...,0.805963,0.344404,0.424243,0.382028,0.468819,0.351036,0.288768,0.611169,0.380254,0.332030


In [81]:

# parameters to search 
params2 = { 'max_depth': [6],
           'learning_rate': [0.01, 0.05],
           'n_estimators': [300, 600, 900, 1200, 1500]}

def param_tuning(params):

        model = XGBRegressor(n_estimators=1000, max_depth=3, 
                        learning_rate=0.01, objective='reg:squarederror', random_state=1)
        grid_search = GridSearchCV(estimator=model, 
                        param_grid=params,
                        scoring='neg_mean_squared_error', 
                        verbose=3,
                        cv=2)
        grid_search.fit(X, y)
        print("Best parameters:", grid_search.best_params_)
        print("Lowest RMSE: ", (-grid_search.best_score_)**0.5)
        return grid_search

grid_search_results2 = param_tuning(params2)

Fitting 2 folds for each of 10 candidates, totalling 20 fits
[CV] learning_rate=0.01, max_depth=6, n_estimators=300 ...............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  learning_rate=0.01, max_depth=6, n_estimators=300, score=-0.683, total= 1.1min
[CV] learning_rate=0.01, max_depth=6, n_estimators=300 ...............


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.1min remaining:    0.0s


[CV]  learning_rate=0.01, max_depth=6, n_estimators=300, score=-0.681, total= 1.1min
[CV] learning_rate=0.01, max_depth=6, n_estimators=600 ...............


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  2.2min remaining:    0.0s


[CV]  learning_rate=0.01, max_depth=6, n_estimators=600, score=-0.531, total= 2.2min
[CV] learning_rate=0.01, max_depth=6, n_estimators=600 ...............
[CV]  learning_rate=0.01, max_depth=6, n_estimators=600, score=-0.531, total= 2.1min
[CV] learning_rate=0.01, max_depth=6, n_estimators=900 ...............
[CV]  learning_rate=0.01, max_depth=6, n_estimators=900, score=-0.527, total= 3.2min
[CV] learning_rate=0.01, max_depth=6, n_estimators=900 ...............
[CV]  learning_rate=0.01, max_depth=6, n_estimators=900, score=-0.528, total= 3.2min
[CV] learning_rate=0.01, max_depth=6, n_estimators=1200 ..............
[CV]  learning_rate=0.01, max_depth=6, n_estimators=1200, score=-0.525, total= 4.4min
[CV] learning_rate=0.01, max_depth=6, n_estimators=1200 ..............
[CV]  learning_rate=0.01, max_depth=6, n_estimators=1200, score=-0.526, total= 4.3min
[CV] learning_rate=0.01, max_depth=6, n_estimators=1500 ..............
[CV]  learning_rate=0.01, max_depth=6, n_estimators=1500, scor

[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed: 72.1min finished


Best parameters: {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 600}
Lowest RMSE:  0.7227608322274359


In [82]:
predictions = grid_search_results2.predict(X_test)
OUTPUT_PATH = 'output'
def output_submission(prediction, file_name):
    """creating a kaggle submission file

    Args:
        prediction (array): an array of predictions of the test dataset
        file_name (string): a string for the name without the extension
    """
    my_submission = pd.DataFrame({'target': predictions},
                                 index=X_test.index)
    #my_submission.set_index('id')
    file_path = os.path.join(OUTPUT_PATH,file_name)
    my_submission.to_csv(f'{file_path}.csv')
    print(f'A submission file has been made at {file_path}')

In [83]:
output_submission(predictions, "submission5-4")

A submission file has been made at output/submission5-4


In [84]:
## for max_depth =10

# parameters to search 
params10 = { 'max_depth': [10],
           'learning_rate': [0.01, 0.05],
           'n_estimators': [300, 600, 900, 1200, 1500]}


def param_tuning(params):

        model = XGBRegressor(n_estimators=1000, max_depth=3, 
                        learning_rate=0.01, objective='reg:squarederror', random_state=1)
        grid_search = GridSearchCV(estimator=model, 
                        param_grid=params,
                        scoring='neg_mean_squared_error', 
                        verbose=3,
                        cv=2)
        grid_search.fit(X, y)
        print("Best parameters:", grid_search.best_params_)
        print("Lowest RMSE: ", (-grid_search.best_score_)**0.5)
        return grid_search

In [85]:
grid_search_results10 = param_tuning(params10)

Fitting 2 folds for each of 10 candidates, totalling 20 fits
[CV] learning_rate=0.01, max_depth=10, n_estimators=300 ..............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  learning_rate=0.01, max_depth=10, n_estimators=300, score=-0.681, total= 2.0min
[CV] learning_rate=0.01, max_depth=10, n_estimators=300 ..............


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  2.0min remaining:    0.0s


[CV]  learning_rate=0.01, max_depth=10, n_estimators=300, score=-0.680, total= 1.8min
[CV] learning_rate=0.01, max_depth=10, n_estimators=600 ..............


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  3.7min remaining:    0.0s


[CV]  learning_rate=0.01, max_depth=10, n_estimators=600, score=-0.529, total= 4.0min
[CV] learning_rate=0.01, max_depth=10, n_estimators=600 ..............
[CV]  learning_rate=0.01, max_depth=10, n_estimators=600, score=-0.529, total= 3.9min
[CV] learning_rate=0.01, max_depth=10, n_estimators=900 ..............
[CV]  learning_rate=0.01, max_depth=10, n_estimators=900, score=-0.527, total= 6.1min
[CV] learning_rate=0.01, max_depth=10, n_estimators=900 ..............
[CV]  learning_rate=0.01, max_depth=10, n_estimators=900, score=-0.527, total= 6.2min
[CV] learning_rate=0.01, max_depth=10, n_estimators=1200 .............
[CV]  learning_rate=0.01, max_depth=10, n_estimators=1200, score=-0.527, total= 8.6min
[CV] learning_rate=0.01, max_depth=10, n_estimators=1200 .............
[CV]  learning_rate=0.01, max_depth=10, n_estimators=1200, score=-0.527, total= 7.8min
[CV] learning_rate=0.01, max_depth=10, n_estimators=1500 .............
[CV]  learning_rate=0.01, max_depth=10, n_estimators=150

[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed: 142.6min finished


Best parameters: {'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 1200}
Lowest RMSE:  0.7257199867083844


In [87]:
# parameters to search 
params3 = { 'max_depth': [3],
           'learning_rate': [0.01, 0.05],
           'n_estimators': [1600, 1700, 1900]}

def param_tuning(params):

        model = XGBRegressor(n_estimators=1000, max_depth=3, 
                        learning_rate=0.01, objective='reg:squarederror', random_state=1)
        grid_search = GridSearchCV(estimator=model, 
                        param_grid=params,
                        scoring='neg_mean_squared_error', 
                        verbose=3,
                        cv=2)
        grid_search.fit(X, y)
        print("Best parameters:", grid_search.best_params_)
        print("Lowest RMSE: ", (-grid_search.best_score_)**0.5)
        return grid_search

In [88]:
grid_search_results3 = param_tuning(params3)

Fitting 2 folds for each of 6 candidates, totalling 12 fits
[CV] learning_rate=0.01, max_depth=3, n_estimators=1600 ..............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  learning_rate=0.01, max_depth=3, n_estimators=1600, score=-0.530, total= 3.3min
[CV] learning_rate=0.01, max_depth=3, n_estimators=1600 ..............


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  3.3min remaining:    0.0s


[CV]  learning_rate=0.01, max_depth=3, n_estimators=1600, score=-0.531, total= 3.2min
[CV] learning_rate=0.01, max_depth=3, n_estimators=1700 ..............


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  6.5min remaining:    0.0s


[CV]  learning_rate=0.01, max_depth=3, n_estimators=1700, score=-0.530, total= 3.4min
[CV] learning_rate=0.01, max_depth=3, n_estimators=1700 ..............
[CV]  learning_rate=0.01, max_depth=3, n_estimators=1700, score=-0.530, total= 3.3min
[CV] learning_rate=0.01, max_depth=3, n_estimators=1900 ..............
[CV]  learning_rate=0.01, max_depth=3, n_estimators=1900, score=-0.529, total= 3.7min
[CV] learning_rate=0.01, max_depth=3, n_estimators=1900 ..............
[CV]  learning_rate=0.01, max_depth=3, n_estimators=1900, score=-0.529, total= 3.7min
[CV] learning_rate=0.05, max_depth=3, n_estimators=1600 ..............
[CV]  learning_rate=0.05, max_depth=3, n_estimators=1600, score=-0.521, total= 2.8min
[CV] learning_rate=0.05, max_depth=3, n_estimators=1600 ..............
[CV]  learning_rate=0.05, max_depth=3, n_estimators=1600, score=-0.520, total= 2.8min
[CV] learning_rate=0.05, max_depth=3, n_estimators=1700 ..............
[CV]  learning_rate=0.05, max_depth=3, n_estimators=1700, 

[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed: 39.6min finished


Best parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 1900}
Lowest RMSE:  0.7210604314934709


In [89]:
grid_search_results

GridSearchCV(cv=2, error_score=nan,
             estimator=XGBRegressor(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None, gamma=None,
                                    gpu_id=None, importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.01, max_delta_step=None,
                                    max_depth=3, min_child_weight=None,
                                    missing=nan, monotone_constraints=None,
                                    n_estimator...
                                    random_state=1, reg_alpha=None,
                                    reg_lambda=None, scale_pos_weight=None,
                                    subsample=None, tree_method=None,
                                    validate_parameters=None, verbosity=None),