In [6]:
import numpy as np
import xgboost as xgb
import warnings
from hyperopt import fmin, tpe, STATUS_OK, STATUS_FAIL, Trials
from hyperopt import hp
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier

In [3]:
data = load_breast_cancer()
X = data['data']
y = data['target']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [4]:
space = {
    'max_depth' : hp.choice('max_depth', range(5, 30, 1)),
    'learning_rate' : hp.quniform('learning_rate', 0.01, 0.5, 0.01),
    'n_estimators' : hp.choice('n_estimators', range(20, 205, 5)),
    'gamma' : hp.quniform('gamma', 0, 0.50, 0.01),
    'min_child_weight' : hp.quniform('min_child_weight', 1, 10, 1),
    'subsample' : hp.quniform('subsample', 0.1, 1, 0.01),
    'colsample_bytree' : hp.quniform('colsample_bytree', 0.1, 1.0, 0.01)
    }

In [26]:
def objective(space, x, y):

    warnings.filterwarnings(action='ignore')

    classifier = XGBClassifier(n_estimators = space['n_estimators'],
                            max_depth = int(space['max_depth']),
                            learning_rate = space['learning_rate'],
                            gamma = space['gamma'],
                            min_child_weight = space['min_child_weight'],
                            subsample = space['subsample'],
                            colsample_bytree = space['colsample_bytree']
                            ,use_label_encoder=False
                            ,eval_metric="logloss"
                            )
    classifier.fit(x, y)

    # Applying k-Fold Cross Validation

    rocauc = cross_val_score(estimator=classifier, X=X_train, y=y_train, cv=3, scoring="roc_auc")
    CrossValMean = rocauc.mean()

    print(f"CrossValMean: {CrossValMean}")

    return {'loss':1-CrossValMean, 'status': STATUS_OK }

In [27]:
cost = lambda x: objective(x, x=X_train, y=y_train)

In [28]:
trials = Trials()

best = fmin(fn=cost,
            space=space,
            algo=tpe.suggest,
            max_evals=50,
            trials=trials)

print("Best: ", best)

 # Fitting XGBoost to the Training set



CrossValMean: 0.9924490591000308
CrossValMean: 0.9914981410984215
CrossValMean: 0.9965689033911835
CrossValMean: 0.9910028607048242
CrossValMean: 0.8011554130768718
CrossValMean: 0.9950430616379324
CrossValMean: 0.9864547296244351
CrossValMean: 0.9743207933009576
CrossValMean: 0.9788928483969558
CrossValMean: 0.9838938820034793
CrossValMean: 0.9880639754733123
CrossValMean: 0.9946325449300805
CrossValMean: 0.9905092817865809
CrossValMean: 0.9870812621448767
CrossValMean: 0.9910647260306646
CrossValMean: 0.9718508632999717
CrossValMean: 0.9954805793285056
CrossValMean: 0.9950362239332385
CrossValMean: 0.9919708650173487
CrossValMean: 0.992343440414969
CrossValMean: 0.9952990780229811
CrossValMean: 0.9904125521176214
CrossValMean: 0.9953633365454643
CrossValMean: 0.9921304936186722
CrossValMean: 0.995472374082873
CrossValMean: 0.9955489404738053
CrossValMean: 0.9949685465584084
CrossValMean: 0.9954040129375734
CrossValMean: 0.9946000737836042
CrossValMean: 0.9929976656394208
CrossValMean

In [None]:
classifier = XGBClassifier(n_estimators = best['n_estimators'],

                            max_depth = best['max_depth'],

                            learning_rate = best['learning_rate'],

                            gamma = best['gamma'],

                            min_child_weight = best['min_child_weight'],

                            subsample = best['subsample'],

                            colsample_bytree = best['colsample_bytree']

                            )

 

classifier.fit(X_train, y_train)

 

# Applying k-Fold Cross Validation

from sklearn.model_selection import cross_val_score

accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)

CrossValMean = accuracies.mean()

print("Final CrossValMean: ", CrossValMean)

 

CrossValSTD = accuracies.std()

 

# Predicting the Test set results

y_pred = classifier.predict(X_test)

y_pred = pd.DataFrame(y_pred)

y_pred.columns = ['Survived']

submission = submission.join(y_pred)

 

# Exporting dataset to csv

submission.to_csv("Titanic_Submission.csv", index=False, sep=',')