In [1]:
# create sklearn pipeline
from sklearn.pipeline import Pipeline
# preprocessing
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
# evaluation metrics
from sklearn.metrics import accuracy_score
# grid search
from sklearn.model_selection import train_test_split, GridSearchCV

In [2]:
from xgboost import XGBClassifier

In [3]:
#load iris dataset
from sklearn.datasets import load_iris

In [4]:
# load IRIS dataset
iris = load_iris()
X, y = iris.data, iris.target

In [5]:
X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [6]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [7]:
# preprocessing
preprocessor = ColumnTransformer([
    ('scaler', StandardScaler(), [0,1,2,3]) # scale all 4 feature columns
])

In [8]:
# define model
model = XGBClassifier()

In [9]:
# define pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])

In [10]:
# define hyperparam grid for XGBoost
param_grid = {
    'model__n_estimators': [100, 200, 300],  # number of trees in the ensemble
    'model__max_depth': [4, 6, 8],  # max depth of individual trees
    'model__learning_rate': [0.1, 0.01, 0.001]  # learning rate of the algorithm
}

In [11]:
# GridSearch
grid_search = GridSearchCV(pipeline, param_grid, cv=5)  # 5 fold cross-validation

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [15]:
# train the model with Grid Search CV
grid_search.fit(X_train, y_train)

In [16]:
# Get the best parameters for the model
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Access individual parameters
best_n_estimators = best_params['model__n_estimators']
best_max_depth = best_params['model__max_depth']
best_learning_rate = best_params['model__learning_rate']

print("Best n_estimators:", best_n_estimators)
print("Best max_depth:", best_max_depth)
print("Best learning_rate:", best_learning_rate)

Best Parameters: {'model__learning_rate': 0.01, 'model__max_depth': 4, 'model__n_estimators': 300}
Best n_estimators: 300
Best max_depth: 4
Best learning_rate: 0.01


In [17]:
# get best model
best_model = grid_search.best_estimator_

In [18]:
# get predictions
y_pred = best_model.predict(X_test)

In [19]:
# evaluation
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9666666666666667
