In [1]:
from warnings import filterwarnings
filterwarnings('ignore')

import numpy as np
import pandas as pd

import xgboost as xgb
from sklearn.model_selection import GridSearchCV, train_test_split

from sklearn.metrics import mean_squared_error, classification_report

In [2]:
data = np.loadtxt('./dermatology.data', delimiter=',', converters={33: lambda x:int(x == '?'), 34: lambda x:int(x) - 1})

In [3]:
X = data[:, :33]
Y = data[:, 34]

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [5]:
xgb_model = xgb.XGBClassifier(
    objective= 'binary:logistic',
    nthread=1,
    seed=42,
    eval_metric='merror'
)

In [6]:
xgb_model.fit(X_train, Y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              eval_metric='merror', gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=1, nthread=1,
              num_parallel_tree=1, objective='multi:softprob', predictor='auto',
              random_state=42, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              seed=42, subsample=1, tree_method='exact', ...)

In [7]:
xgb_predict = xgb_model.predict(X_test)

In [8]:
xgb_error = np.sqrt(mean_squared_error(Y_test, xgb_predict))

xgb_error

0.3813850356982369

In [9]:
gs_parameters = {
    'kernel':[
        'rbf',
        'linear',
        'poly'
    ],
    'C':[
        1,
        5,
        10,
        15,
        20
    ]
}

gs = GridSearchCV(xgb_model, gs_parameters, cv = 5, verbose = 2)
gs.fit(X_train, Y_train)

Fitting 5 folds for each of 15 candidates, totalling 75 fits
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[CV] END ....................................C=1, kernel=rbf; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[CV] END ....................................C=1, kernel=rbf; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
 

[CV] END ....................................C=5, kernel=rbf; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[CV] END ....................................C=5, kernel=rbf; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[CV] END ....................................C=5, kernel=rbf; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by lan

[CV] END ................................C=10, kernel=linear; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[CV] END ................................C=10, kernel=linear; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[CV] END ................................C=10, kernel=linear; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by lan

[CV] END ................................C=15, kernel=linear; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[CV] END ................................C=15, kernel=linear; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[CV] END ..................................C=15, kernel=poly; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by lan

[CV] END ..................................C=20, kernel=poly; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[CV] END ..................................C=20, kernel=poly; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[CV] END ..................................C=20, kernel=poly; total time=   0.0s
Parameters: { "C", "kernel" } might not be used.

  This could be a false alarm, with some parameters getting used by lan

GridSearchCV(cv=5,
             estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                     colsample_bylevel=1, colsample_bynode=1,
                                     colsample_bytree=1,
                                     enable_categorical=False,
                                     eval_metric='merror', gamma=0, gpu_id=-1,
                                     importance_type=None,
                                     interaction_constraints='',
                                     learning_rate=0.300000012,
                                     max_delta_step=0, max_depth=6,
                                     min_child_weight=1, missing=nan,
                                     monotone_constraints='()',
                                     n_estimators=100, n_jobs=1, nthread=1,
                                     num_parallel_tree=1,
                                     objective='multi:softprob',
                                     predict

In [10]:
gs.best_params_

{'C': 1, 'kernel': 'rbf'}

In [11]:
gs.best_estimator_

XGBClassifier(C=1, base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              eval_metric='merror', gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', kernel='rbf',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=1, nthread=1, num_parallel_tree=1,
              objective='multi:softprob', predictor='auto', random_state=42,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=None, seed=42, ...)

In [12]:
gs_best_predict = gs.best_estimator_.predict(X_test)

In [13]:
print(classification_report(Y_test, gs_best_predict))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        40
         1.0       0.83      0.94      0.88        16
         2.0       1.00      1.00      1.00        19
         3.0       0.92      0.80      0.86        15
         4.0       1.00      1.00      1.00        16
         5.0       1.00      1.00      1.00         4

    accuracy                           0.96       110
   macro avg       0.96      0.96      0.96       110
weighted avg       0.97      0.96      0.96       110

