Model Reference: https://www.datacamp.com/tutorial/xgboost-in-python and https://www.datatechnotes.com/2019/06/regression-example-with-xgbregressor-in.html


Tuning Reference: https://www.kaggle.com/code/prashant111/a-guide-on-xgboost-hyperparameters-tuning

In [9]:
import pandas as pd
import numpy as np
import pickle

# Models
import xgboost as xgb

#Tuning and Cross Validation
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, accuracy_score
from sklearn.model_selection import cross_val_score
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

import warnings
warnings.filterwarnings("ignore")

#### Importing Data

In [2]:
x_train = pd.read_csv('../../Data Files/Training Data/x_train.csv')
x_test = pd.read_csv('../../Data Files/Training Data/x_test.csv')
y_train = pd.read_csv('../../Data Files/Training Data/y_train.csv')
y_test = pd.read_csv('../../Data Files/Training Data/y_test.csv')

#### Defining XGBoost Architecture

In [3]:
xgbr = xgb.XGBRegressor(verbosity=0) 

print(xgbr)

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)


#### Training XGBoost

In [4]:
xgbr.fit(x_train, y_train)

In [5]:
y_pred = xgbr.predict(x_test)
mae = mean_absolute_error(y_test, y_pred)
print("mean_absolute_error = {:.3}".format(mae)) 

mean_absolute_error = 1.11


In [6]:
score = xgbr.score(x_train, y_train)  

print("Training score: ", score)

Training score:  0.5268115671434962


In [7]:
scores = cross_val_score(xgbr, x_train, y_train, cv=5)
print("Mean cross-validation score: %.2f" % scores.mean())

Mean cross-validation score: 0.36


#### Tuning for Hyperparameters

In [10]:
space={'max_depth': hp.quniform("max_depth", 3, 18, 1),
        'gamma': hp.uniform ('gamma', 1,9),
        'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
        'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
        'n_estimators': 1500,
        'seed': 0
    }

In [11]:
def objective(space):
    tuning=xgb.XGBRegressor(
                    n_estimators =space['n_estimators'], max_depth = int(space['max_depth']), gamma = space['gamma'],
                    reg_alpha = int(space['reg_alpha']),min_child_weight=int(space['min_child_weight']),
                    colsample_bytree=int(space['colsample_bytree']))
    
    evaluation = [(x_train, y_train), (x_test, y_test)]
    
    tuning.fit(x_train, y_train,
            eval_set=evaluation, eval_metric="auc",
            early_stopping_rounds=25,verbose=False)
    

    pred = tuning.predict(x_test)
    accuracy = accuracy_score(y_test, pred>0.5)
    print ("SCORE:", accuracy)
    return {'loss': -accuracy, 'status': STATUS_OK }

In [12]:
trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 100,
                        trials = trials)



SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     


0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     


0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     


0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       


SCORE:                                                                                                                     
0.008977236293683874                                                                                                       




SCORE:                                                                                                                     
0.008977236293683874                                                                                                       
100%|███████████████████████████████████████████████| 100/100 [16:02<00:00,  9.62s/trial, best loss: -0.008977236293683874]


In [13]:
print("The best hyperparameters are : ","\n")
print(best_hyperparams)

The best hyperparameters are :  

{'colsample_bytree': 0.9991054651345141, 'gamma': 6.34759019586084, 'max_depth': 5.0, 'min_child_weight': 2.0, 'reg_alpha': 146.0, 'reg_lambda': 0.14687871034108724}


#### Generating Predictions

In [28]:
best_hyperparams["max_depth"] = int(best_hyperparams["max_depth"])

In [35]:
tuned_xgboost = xgb.XGBRegressor(**best_hyperparams)
tuned_xgboost.fit(x_train, y_train)
y_pred_xgb = tuned_xgboost.predict(x_test)

score = xgbr.score(x_train, y_train)
print("Training score: ", score)

Training score:  0.5268115671434962


In [36]:
scores = cross_val_score(tuned_xgboost, x_train, y_train,cv=5)
print("Mean cross-validation score: %.2f" % scores.mean())

Mean cross-validation score: 0.37


In [37]:
mae_after_tuning = mean_absolute_error(y_test, y_pred)
print("mean_absolute_error = {:.3}".format(mae_after_tuning)) 

mean_absolute_error = 1.11


#### Saving Model File and Predictions

In [38]:
save_path = '../../Data Files/'
pickle.dump(tuned_xgboost, open(save_path + 'Model Files/' + 'xgb.pkl', 'wb'))
np.savetxt(save_path + 'Predictions/' + 'xgboost_output.csv', y_pred_xgb, delimiter=",")