<a href="https://colab.research.google.com/github/hjn14133/Machine-Learning/blob/main/XGBoost_Classifier_Wholesale_customers_data_Hyperparameters_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# import pandas for data wrangling
import pandas as pd

# import numpy for Scientific computations
import numpy as np

# import machine learning libraries
import xgboost as xgb
from sklearn.metrics import accuracy_score

# import packages for hyperparameters tuning
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

In [2]:
df = pd.read_csv("Wholesale customers data.csv")

In [3]:
X = df.drop('Channel', axis=1)

y = df['Channel']

y[y == 2] = 0

y[y == 1] = 1

In [4]:
X.head()

Unnamed: 0,Region,Fresh,Milk,Grocery,Frozen,Detergents_Paper,Delicassen
0,3,12669,9656,7561,214,2674,1338
1,3,7057,9810,9568,1762,3293,1776
2,3,6353,8808,7684,2405,3516,7844
3,3,13265,1196,4221,6404,507,1788
4,3,22615,5410,7198,3915,1777,5185


In [5]:
y.head()

0    0
1    0
2    0
3    1
4    0
Name: Channel, dtype: int64

In [6]:
# Split data into separate training and test set
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size = 0.3,
                                                    random_state = 0)

In [7]:
# Bayesian Optimization with HYPEROPT

# Initialize domain space for range of values

space={'max_depth': hp.quniform("max_depth", 3, 18, 1),
        'gamma': hp.uniform ('gamma', 1,9),
        'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
        'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
        'n_estimators': 180,
        'seed': 0
    }

In [8]:
# Define Objective function

def objective(space):
    clf=xgb.XGBClassifier(
                    n_estimators =space['n_estimators'], 
                    max_depth = int(space['max_depth']), 
                    gamma = space['gamma'],
                    reg_alpha = int(space['reg_alpha']),
                    min_child_weight=int(space['min_child_weight']),
                    colsample_bytree=int(space['colsample_bytree']))
    
    evaluation = [( X_train, y_train), ( X_test, y_test)]
    
    clf.fit(X_train, y_train,
            eval_set=evaluation, eval_metric="auc",
            early_stopping_rounds=10,verbose=False)
    

    pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, pred>0.5)
    print ("SCORE:", accuracy)
    return {'loss': -accuracy, 'status': STATUS_OK }

In [9]:
# Optimization algorithm
trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 100,
                        trials = trials)

SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.6515151515151515
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.6515151515151515
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.8712121212121212
SCORE:
0.3484848484848485
SCORE:
0.8712121212121212
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.8712121212121212
SCORE:
0.3484848484848485
SCORE:
0.8712121212121212
SCORE:
0.6515151515151515
SCORE:
0.3484848484848485
SCORE:
0.6515151515151515
SCORE:
0.8712121212121212
SCORE:
0.8712121212121212
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.8712121212121212
SCORE:
0.6515151515151515
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.3484848484848485
SCORE:
0.8712121212121212
SCORE:
0.3484848484848485
SCORE:
0.6515151515151515
SCORE:
0.348

In [10]:
print("The best hyperparameters are : ","\n")
print(best_hyperparams)

The best hyperparameters are :  

{'colsample_bytree': 0.748380944328856, 'gamma': 4.776022454086848, 'max_depth': 9.0, 'min_child_weight': 2.0, 'reg_alpha': 67.0, 'reg_lambda': 0.17725432207300074}
