# XG Boost Grid Search

In [1]:
import pandas as pd
import xgboost
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error, make_scorer
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.model_selection import cross_val_score
%run -i ./Model_Eval.ipynb

In [2]:
training_data = pd.read_csv('../Datasets/training_data_full.csv').sample(frac=1)

In [3]:
# import from Model_Eval.ipynb
numerical_features = get_numerical_features(training_data)

In [4]:
features = training_data[numerical_features]
labels = training_data['DIABETES_3Y_Change_Percentage']

X_train, X_test, Y_train, Y_test = train_test_split(features, labels, test_size = 0.2, random_state = 42)

In [5]:
xgb_model = XGBRegressor(use_rmm=True)

In [6]:
parameters = {
    'max_depth': [1,3,5,7],
    'eta': [0.1,0.3,0.5],
    'n_estimators': [50,200,400]
}

grid_search = GridSearchCV(
    estimator=xgb_model,
    param_grid=parameters,
    cv=5,
    n_jobs=8,
    verbose=2
)

In [None]:
grid_search.fit(features, labels)

Fitting 5 folds for each of 60 candidates, totalling 300 fits
[CV] END .............eta=0.1, max_depth=1, n_estimators=200; total time=   2.8s
[CV] END ............eta=0.1, max_depth=1, n_estimators=1000; total time=  25.9s
[CV] END ............eta=0.1, max_depth=3, n_estimators=1000; total time=  57.5s
[CV] END ............eta=0.1, max_depth=5, n_estimators=1000; total time= 1.7min
[CV] END .............eta=0.1, max_depth=7, n_estimators=400; total time=  40.7s
[CV] END ............eta=0.1, max_depth=7, n_estimators=1000; total time= 2.0min
[CV] END .............eta=0.1, max_depth=9, n_estimators=400; total time= 1.3min
[CV] END .............eta=0.3, max_depth=1, n_estimators=400; total time=   6.2s
[CV] END ............eta=0.3, max_depth=1, n_estimators=1000; total time=   8.5s
[CV] END ............eta=0.3, max_depth=1, n_estimators=1000; total time=  27.6s
[CV] END ..............eta=0.3, max_depth=3, n_estimators=50; total time=   1.2s
[CV] END ..............eta=0.3, max_depth=3, n_

In [None]:
grid_search.best_params_

In [None]:
# xgb_model = XGBRegressor(use_rmm=True, eta=0.01, max_depth=10, n_estimators=5000)
# xgb_model.fit(X_train, Y_train)
# test_pred = xgb_model.predict(X_test)
# r2_score(Y_test, test_pred)