From 0cb88b2ba7b039552da4ac8d779f708993c1d072 Mon Sep 17 00:00:00 2001
From: Vladimir Iglovikov <viglovikov@trueaccord.com>
Date: Tue, 25 Oct 2016 11:56:34 -0700
Subject: [PATCH 1/3] Added method points_to_csv that saves known data points
 to csv file

---
 bayes_opt/bayesian_optimization.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/bayes_opt/bayesian_optimization.py b/bayes_opt/bayesian_optimization.py
index 4c74334e2..06c7eb128 100644
--- a/bayes_opt/bayesian_optimization.py
+++ b/bayes_opt/bayesian_optimization.py
@@ -327,3 +327,17 @@ def maximize(self,
         # Print a final report if verbose active.
         if self.verbose:
             self.plog.print_summary()
+
+    def points_to_csv(self, file_name):
+        """
+        After training all points for which we know target variable
+        (both from initialization and optimization) are saved
+
+        :param file_name: name of the file where points will be saved in the csv format
+
+        :return: None
+        """
+        import pandas as pd
+        points_df = pd.DataFrame(self.X, columns=self.keys)
+        points_df['target'] = self.Y
+        points_df.to_csv(file_name, index=False)
\ No newline at end of file

From eda6bed4ee3970c5c927bf24332fc737deb7d512 Mon Sep 17 00:00:00 2001
From: Vladimir Iglovikov <viglovikov@trueaccord.com>
Date: Tue, 25 Oct 2016 17:33:16 -0700
Subject: [PATCH 2/3] Added xgboost example

---
 examples/xgb_example.py | 79 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 examples/xgb_example.py

diff --git a/examples/xgb_example.py b/examples/xgb_example.py
new file mode 100644
index 000000000..b638cf756
--- /dev/null
+++ b/examples/xgb_example.py
@@ -0,0 +1,79 @@
+"""
+Baysian hyperparameter optimization [https://github.com/fmfn/BayesianOptimization]
+for Mean Absoulte Error objective
+on default features for https://www.kaggle.com/c/allstate-claims-severity
+"""
+
+__author__ = "Vladimir Iglovikov"
+
+import pandas as pd
+import xgboost as xgb
+from sklearn.preprocessing import LabelEncoder
+from sklearn.metrics import mean_absolute_error
+from bayes_opt import BayesianOptimization
+from tqdm import tqdm
+
+
+def xgb_evaluate(min_child_weight,
+                 colsample_bytree,
+                 max_depth,
+                 subsample,
+                 gamma,
+                 alpha):
+
+    params['min_child_weight'] = int(min_child_weight)
+    params['cosample_bytree'] = max(min(colsample_bytree, 1), 0)
+    params['max_depth'] = int(max_depth)
+    params['subsample'] = max(min(subsample, 1), 0)
+    params['gamma'] = max(gamma, 0)
+    params['alpha'] = max(alpha, 0)
+
+
+    cv_result = xgb.cv(params, xgtrain, num_boost_round=num_rounds, nfold=5,
+             seed=random_state,
+             callbacks=[xgb.callback.early_stop(50)])
+
+    return -cv_result['test-mae-mean'].values[-1]
+
+
+def prepare_data():
+    train = pd.read_csv('../input/train.csv')
+    categorical_columns = train.select_dtypes(include=['object']).columns
+
+    for column in tqdm(categorical_columns):
+        le = LabelEncoder()
+        train[column] = le.fit_transform(train[column])
+
+    y = train['loss']
+
+    X = train.drop(['loss', 'id'], 1)
+    xgtrain = xgb.DMatrix(X, label=y)
+
+    return xgtrain
+
+
+if __name__ == '__main__':
+    xgtrain = prepare_data()
+
+    num_rounds = 3000
+    random_state = 2016
+    num_iter = 25
+    init_points = 5
+    params = {
+        'eta': 0.1,
+        'silent': 1,
+        'eval_metric': 'mae',
+        'verbose_eval': True,
+        'seed': random_state
+    }
+
+    xgbBO = BayesianOptimization(xgb_evaluate, {'min_child_weight': (1, 20),
+                                                'colsample_bytree': (0.5, 1),
+                                                'max_depth': (5, 15),
+                                                'subsample': (0.5, 1),
+                                                'gamma': (0, 10),
+                                                'alpha': (0, 10),
+                                                })
+
+    xgbBO.maximize(init_points=init_points, n_iter=num_iter)
+

From 0e95b604efae17157786f9980b912a34633791c4 Mon Sep 17 00:00:00 2001
From: Vladimir Iglovikov <viglovikov@trueaccord.com>
Date: Tue, 25 Oct 2016 11:56:34 -0700
Subject: [PATCH 3/3] Added method points_to_csv that saves known data points
 to csv file

---
 bayes_opt/bayesian_optimization.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/bayes_opt/bayesian_optimization.py b/bayes_opt/bayesian_optimization.py
index 06c7eb128..eda86e139 100644
--- a/bayes_opt/bayesian_optimization.py
+++ b/bayes_opt/bayesian_optimization.py
@@ -337,7 +337,7 @@ def points_to_csv(self, file_name):
 
         :return: None
         """
-        import pandas as pd
-        points_df = pd.DataFrame(self.X, columns=self.keys)
-        points_df['target'] = self.Y
-        points_df.to_csv(file_name, index=False)
\ No newline at end of file
+
+        points = np.hstack((self.X, np.expand_dims(self.Y, axis=1)))
+        header = ', '.join(self.keys + ['target'])
+        np.savetxt(file_name, points, header=header, delimiter=',')
\ No newline at end of file