# XGBOOST

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

In [None]:
data_train = pd.read_csv('training_processed.csv')
data_valid = pd.read_csv('validation_processed.csv')

labels_train = data_train['ACTIVE']# ground truth labels
features_train = data_train.drop(['ACTIVE'], axis=1) 
labels_valid = data_valid['ACTIVE']# ground truth labels
features_valid = data_valid.drop(['ACTIVE'], axis=1)

In [None]:
xgb = XGBClassifier()
xgb_parameters = {'learning_rate': [0.001, 0.01, 0.1, 0.2, 0.3], 'max_depth': [3, 4, 5, 6, 7, 8, 9, 10], 'n_estimators': [100, 200, 300, 400, 500]}
cross_val = KFold(n_splits=5)
xgb_grid_search = GridSearchCV(estimator=xgb, param_grid=xgb_parameters, cv=cross_val, scoring=['roc_auc','accuracy'], refit='roc_auc')
xgb_grid_result = xgb_grid_search.fit(features_train, labels_train)
# summarize results
print("Best: %f using %s" % (xgb_grid_result.best_score_, xgb_grid_result.best_params_))
print("Average AUC for XGBoost: ", np.mean(xgb_grid_result.cv_results_['mean_test_roc_auc']))
print("Average Accuracy for XGBoost: ", np.mean(xgb_grid_result.cv_results_['mean_test_accuracy']))

In [None]:
results = pd.DataFrame(xgb_grid_result.cv_results_)

In [None]:
plot = sns.barplot(data=results, x='param_max_depth', y='mean_test_roc_auc', hue='param_learning_rate', palette='Pastel2_r')
plot.set(ylim=(0.6, 1))
plot.set(title='ROC_AUC of XGBoost with K-Fold-Cross Validation\n')

In [None]:
plot = sns.barplot(data=results, x='param_n_estimators', y='mean_test_roc_auc', hue='param_learning_rate', palette='Pastel2_r')
plot.set(ylim=(0.6, 1))
plot.set(title='ROC_AUC of XGBoost with K-Fold-Cross Validation\n')

In [None]:
plot = sns.barplot(data=results, x='param_max_depth', y='mean_test_accuracy', hue='param_learning_rate', palette='Pastel2_r')
plot.set(ylim=(0.6, 1))
plot.set(title='Accuracy of an Artificial Neural Network with K-Fold-Cross Validation\n')

In [None]:
plot = sns.barplot(data=results, x='param_n_estimators', y='mean_test_accuracy', hue='param_learning_rate', palette='Pastel2_r')
plot.set(ylim=(0.6, 1))
plot.set(title='Accuracy of an Artificial Neural Network with K-Fold-Cross Validation\n')