In [1]:
import pandas as pa
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

from xgboost import XGBClassifier

In [2]:
RANDOM_STATE = 73

In [3]:
data = load_iris()

X = data.data
y = data.target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [5]:
pipe = Pipeline(
    [("scaler", StandardScaler()), ("xgb", XGBClassifier(random_state=RANDOM_STATE))],
    memory="tmp",
)

In [6]:
randomized_search_params = {
    "n_estimators": np.arange(100, 1000, 100),
    "max_depth": np.arange(1, 100, 1),
    "learning_rate": np.arange(0.001, 0.1, 0.001),
}

In [7]:
randomized_search = RandomizedSearchCV(
    XGBClassifier(),
    randomized_search_params,
    n_iter=1000,
    scoring="accuracy",
    n_jobs=-1,
    cv=3,
    verbose=1,
)

In [8]:
randomized_search.fit(X_train, y_train)

print(randomized_search.best_params_)

Fitting 3 folds for each of 1000 candidates, totalling 3000 fits


{'n_estimators': 100, 'max_depth': 81, 'learning_rate': 0.014000000000000002}


In [16]:
grid_serach_params = {
    "n_estimators": np.arange(
        randomized_search.best_params_["n_estimators"] - 100
        if randomized_search.best_params_["n_estimators"] > 100
        else 1,
        randomized_search.best_params_["n_estimators"] + 100,
        10,
    ),
    "max_depth": np.arange(
        randomized_search.best_params_["max_depth"] - 10
        if randomized_search.best_params_["max_depth"] > 10
        else 1,
        randomized_search.best_params_["max_depth"] + 10,
        1,
    ),
    "learning_rate": np.arange(
        randomized_search.best_params_["learning_rate"] - 0.01,
        randomized_search.best_params_["learning_rate"] + 0.01,
        0.001,
    ),
}

In [17]:
grid_search = GridSearchCV(
    XGBClassifier(),
    grid_serach_params,
    scoring="accuracy",
    n_jobs=-1,
    cv=3,
    verbose=1,
    return_train_score=True,
)

In [18]:
grid_search.fit(X_train, y_train)

grid_search.best_params_

Fitting 3 folds for each of 8000 candidates, totalling 24000 fits


{'learning_rate': 0.005000000000000002, 'max_depth': 71, 'n_estimators': 161}

In [19]:
grid_search.score(X_test, y_test)

0.9666666666666667

In [20]:
grid_search_results = pa.DataFrame(grid_search.cv_results_)

In [21]:
grid_search_results.to_csv("grid_search_results.csv")

In [22]:
grid_search_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
0,0.052002,0.009574,0.002440,0.000873,0.004,71,1,"{'learning_rate': 0.004000000000000002, 'max_d...",0.95,0.85,0.925,0.908333,0.042492,3321,0.9625,1.0,0.9625,0.975000,0.017678
1,0.066937,0.036821,0.001392,0.000436,0.004,71,11,"{'learning_rate': 0.004000000000000002, 'max_d...",0.95,0.85,0.925,0.908333,0.042492,3321,0.9625,1.0,0.9625,0.975000,0.017678
2,0.052017,0.028058,0.002465,0.001423,0.004,71,21,"{'learning_rate': 0.004000000000000002, 'max_d...",0.95,0.85,0.925,0.908333,0.042492,3321,0.9750,1.0,0.9625,0.979167,0.015590
3,0.017959,0.000142,0.002666,0.000943,0.004,71,31,"{'learning_rate': 0.004000000000000002, 'max_d...",0.95,0.85,0.925,0.908333,0.042492,3321,0.9750,1.0,0.9625,0.979167,0.015590
4,0.023450,0.001189,0.001694,0.000454,0.004,71,41,"{'learning_rate': 0.004000000000000002, 'max_d...",0.95,0.85,0.925,0.908333,0.042492,3321,0.9625,1.0,0.9625,0.975000,0.017678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7995,0.067653,0.004921,0.001667,0.000472,0.023,90,151,"{'learning_rate': 0.023, 'max_depth': 90, 'n_e...",0.95,0.85,0.925,0.908333,0.042492,3321,0.9875,1.0,0.9750,0.987500,0.010206
7996,0.083868,0.011538,0.001334,0.000471,0.023,90,161,"{'learning_rate': 0.023, 'max_depth': 90, 'n_e...",0.95,0.85,0.925,0.908333,0.042492,3321,0.9875,1.0,0.9875,0.991667,0.005893
7997,0.078992,0.009563,0.001805,0.001401,0.023,90,171,"{'learning_rate': 0.023, 'max_depth': 90, 'n_e...",0.95,0.85,0.950,0.916667,0.047140,3141,0.9875,1.0,0.9875,0.991667,0.005893
7998,0.080869,0.005624,0.002083,0.000088,0.023,90,181,"{'learning_rate': 0.023, 'max_depth': 90, 'n_e...",0.95,0.85,0.950,0.916667,0.047140,3141,0.9875,1.0,0.9875,0.991667,0.005893
