diff --git a/bayes_opt/bo/GP.py b/bayes_opt/GP.py similarity index 98% rename from bayes_opt/bo/GP.py rename to bayes_opt/GP.py index d7e6ad0e1..e674b5cd1 100644 --- a/bayes_opt/bo/GP.py +++ b/bayes_opt/GP.py @@ -11,7 +11,7 @@ Fernando Nogueira ''' -# Python 2.7 users. + from __future__ import print_function from __future__ import division @@ -22,10 +22,7 @@ from scipy.optimize import minimize from math import exp, fabs, sqrt, log, pi -from ..support.objects import covariance, sample_covariance, kernels, acquisition, print_info -#from help_functions import covariance, sample_covariance, kernels, acquisition, print_info - - +from support.objects import covariance, sample_covariance, kernels, acquisition, print_info diff --git a/bayes_opt/README b/bayes_opt/README new file mode 100644 index 000000000..504cf1de3 --- /dev/null +++ b/bayes_opt/README @@ -0,0 +1,18 @@ +Working Python implementation of global optimization with gaussian processes. + +—Under (constant) development! (See the wiki for more information.) + +This is a constrained global optimization package built upon bayesian inference and gaussian process, that attempts to find the maximum value of an unknown function in as few iterations as possible. This technique is particularly suited for optimization of high cost functions, situations where the balance between exploration and exploitation is important. + +This package was motivated by hyper-parameter optimization of machine leaning algorithms when performing cross validation. Some of the design choices were clearly made with this setting in mind and ultimately a out-of-the-box cross validation optimization object will be implemented (soon). + +Disclaimer: This project is under active development, some of its functionalities and sintaxes are bound to change, sometimes dramatically. If you find a bug, or anything that needs correction, please let me know. + +Basic dependencies are Scipy, Numpy. Examples dependencies also include matplotlib and sklearn. + +References for this implementation can be found in: + +http://papers.nips.cc/paper/4522-practical-bayesian-optimization-of-machine-learning-algorithms.pdf +http://arxiv.org/pdf/1012.2599v1.pdf +http://www.gaussianprocess.org/gpml/ +https://www.youtube.com/watch?v=vz3D36VXefI&index=10&list=PLE6Wd9FR--EdyJ5lbFl8UuGjecvVw66F6 diff --git a/bayes_opt/__init__.py b/bayes_opt/__init__.py index 261acec26..84345577c 100644 --- a/bayes_opt/__init__.py +++ b/bayes_opt/__init__.py @@ -1,4 +1 @@ -from .bo import bayes_opt -from .bo import GP -#from .cv import magic_box -from .cv.magic_box import magic_box_classifier +import bayes_opt \ No newline at end of file diff --git a/bayes_opt/bo/bayes_opt.py b/bayes_opt/bayes_opt.py similarity index 99% rename from bayes_opt/bo/bayes_opt.py rename to bayes_opt/bayes_opt.py index 097beac43..529eb4412 100644 --- a/bayes_opt/bo/bayes_opt.py +++ b/bayes_opt/bayes_opt.py @@ -12,7 +12,6 @@ Fernando Nogueira ''' -# Python 2.7 users. from __future__ import print_function from __future__ import division @@ -25,10 +24,8 @@ from math import exp, fabs, sqrt, log, pi -from .GP import GP -from ..support.objects import covariance, sample_covariance, kernels, acquisition, print_info -#from help_functions import covariance, sample_covariance, kernels, acquisition, print_info - +from GP import GP +from support.objects import covariance, sample_covariance, kernels, acquisition, print_info diff --git a/bayes_opt/bo/__init__.py b/bayes_opt/bo/__init__.py deleted file mode 100644 index c08d36ac9..000000000 --- a/bayes_opt/bo/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .bayes_opt import bayes_opt -from .GP import GP diff --git a/bayes_opt/cv/__init__.py b/bayes_opt/cv/__init__.py deleted file mode 100644 index fbb77a113..000000000 --- a/bayes_opt/cv/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .magic_box import magic_box_classifier diff --git a/bayes_opt/cv/magic_box.py b/bayes_opt/cv/magic_box.py deleted file mode 100644 index ce5735dfd..000000000 --- a/bayes_opt/cv/magic_box.py +++ /dev/null @@ -1,323 +0,0 @@ -''' -Black-box, self optimizing, classification and regression tool. - -Still under construction! -''' - - - -import numpy -from ..bo.bayes_opt import bayes_opt - -from sklearn import metrics -from sklearn.cross_validation import cross_val_score - -from sklearn.linear_model import LogisticRegression -from sklearn.svm import SVC -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import GradientBoostingClassifier - - -classification_scores = {'accuracy' : metrics.accuracy_score,\ - 'average_precision' : metrics.average_precision_score,\ - 'f1' : metrics.f1_score,\ - 'precision' : metrics.precision_score,\ - 'recall' : metrics.recall_score,\ - 'roc_auc' : metrics.roc_auc_score,} - - - -class magic_box_classifier: - - - # --------------------------------------------- // --------------------------------------------- # - # --------------------------------------------- // --------------------------------------------- # - def __init__(self, score_metric, n_folds = 10, n_jobs = 1, iterations = 50): - - self.sm = score_metric - self.folds = n_folds - self.njobs = n_jobs - self.it = iterations - - self.logit_argmax = {'C' : 1, 'intercept_scaling' : 1} - self.svm_argmax = {'C' : 1, 'gamma' : 0.0} - self.randf_argmax = {'n_estimators' : 10, 'min_samples_split' : 2, 'min_samples_leaf' : 1,\ - 'max_depth' : 3, 'max_features' : 'sqrt'} - self.gbt_argmax = {'n_estimators' : 10, 'min_samples_split' : 2, 'min_samples_leaf' : 1,\ - 'max_depth' : 3, 'max_features' : 'sqrt'} - - self.coefs = {'c_logit' : 0.25, 'c_svm' : 0.25, 'c_randf' : 0.25, 'c_gbt' : 0.25} - - self.logit_model = 0 - self.svm_model = 0 - self.randf_model = 0 - self.gbt_model = 0 - - - # --------------------------------------------- // --------------------------------------------- # - def get_parameters(self): - - models = ['logit', 'svm', 'rf', 'gbt'] - paras = [self.logit_argmax, self.svm_argmax, self.randf_argmax, self.gbt_argmax] - - return dict(zip(models, paras)) - - def get_coefficients(self): - - return self.coefs - - # --------------------------------------------- // --------------------------------------------- # - def logit_cv(self, C, intercept_scaling): - return numpy.mean(cross_val_score(estimator = LogisticRegression(C = C, intercept_scaling = intercept_scaling),\ - X = self.x, y = self.y, scoring = self.sm, cv = self.folds, n_jobs = self.njobs)) - - def svm_cv(self, C, gamma): - return numpy.mean(cross_val_score(estimator = SVC(C = C, gamma = gamma, probability = True, random_state = 0),\ - X = self.x, y = self.y, scoring = self.sm, cv = self.folds, n_jobs = self.njobs)) - - def randf_cv(self, n_estimators, min_samples_split, min_samples_leaf, max_depth, max_features): - - estima = RandomForestClassifier(n_estimators = int(n_estimators),\ - min_samples_split = int(min_samples_split),\ - min_samples_leaf = int(min_samples_leaf),\ - max_depth= int(max_depth),\ - max_features = max_features,\ - random_state = 1) - - return numpy.mean(cross_val_score(estimator = estima,\ - X = self.x,\ - y = self.y,\ - scoring = self.sm,\ - cv = self.folds,\ - n_jobs = self.njobs)) - - - def gbt_cv(self, n_estimators, min_samples_split, min_samples_leaf, max_depth, max_features): - - estima = GradientBoostingClassifier(n_estimators = int(n_estimators),\ - min_samples_split = int(min_samples_split),\ - min_samples_leaf = int(min_samples_leaf),\ - max_depth= int(max_depth),\ - max_features = max_features,\ - random_state = 2) - - return numpy.mean(cross_val_score(estimator = estima,\ - X = self.x,\ - y = self.y,\ - scoring = self.sm,\ - cv = self.folds,\ - n_jobs = self.njobs)) - - # --------------------------------------------- // --------------------------------------------- # - def ensemble(self, c1, c2, c3, c4, pred1, pred2, pred3, pred4, y, scoring): - psum = c1 + c2 + c3 + c4 - return scoring(y, (c1/psum)*pred1 + (c2/psum)*pred2 + (c3/psum)*pred3 + (c4/psum)*pred4) - - # --------------------------------------------- // --------------------------------------------- # - def skf_proba(self, clf_logit, clf_svm, clf_randf, clf_gbt): - - from sklearn.cross_validation import StratifiedKFold - skfold = StratifiedKFold(self.y, n_folds = self.folds) - - prediction = numpy.zeros((len(self.y), 4)) - - for itr, ite in skfold: - xtrain, xtest = self.x[itr], self.x[ite] - ytrain, ytest = self.y[itr], self.y[ite] - - clf_logit.fit(xtrain, ytrain) - clf_svm.fit(xtrain, ytrain) - clf_randf.fit(xtrain, ytrain) - clf_gbt.fit(xtrain, ytrain) - - prediction[ite, 0] = clf_logit.predict_proba(xtest)[:, 1] - prediction[ite, 1] = clf_svm.predict_proba(xtest)[:, 1] - prediction[ite, 2] = clf_randf.predict_proba(xtest)[:, 1] - prediction[ite, 3] = clf_gbt.predict_proba(xtest)[:, 1] - - - return prediction - - - def skf(self, clf_logit, clf_svm, clf_randf, clf_gbt): - - from sklearn.cross_validation import StratifiedKFold - skfold = StratifiedKFold(self.y, n_folds = self.folds) - - prediction = numpy.zeros((len(self.y), 4)) - - for itr, ite in skfold: - xtrain, xtest = self.x[itr], self.x[ite] - ytrain, ytest = self.y[itr], self.y[ite] - - clf_logit.fit(xtrain, ytrain) - clf_svm.fit(xtrain, ytrain) - clf_randf.fit(xtrain, ytrain) - clf_gbt.fit(xtrain, ytrain) - - prediction[ite, 0] = clf_logit.predict(xtest) - prediction[ite, 1] = clf_svm.predict(xtest) - prediction[ite, 2] = clf_randf.predict(xtest) - prediction[ite, 3] = clf_gbt.predict(xtest) - - return prediction - - # --------------------------------------------- // --------------------------------------------- # - def model_opt(self, model, params, name, log_max = False): - - print('Optimizing %s' % name) - - bo = bayes_opt(model, params) - if log_max: - max_val, argmax = bo.log_maximize(restarts = 100, init_points = 10, verbose = 1, num_it = self.it) - else: - max_val, argmax = bo.maximize(restarts = 100, init_points = 10, verbose = 1, num_it = self.it) - - print('Best score found with %s: %f' % (name, max_val)) - - #Return intergers for trees - if 'n_estimators' in params.keys(): - argmax['n_estimators'] = int(argmax['n_estimators']) - argmax['min_samples_split'] = int(argmax['min_samples_split']) - argmax['min_samples_leaf'] = int(argmax['min_samples_leaf']) - argmax['max_depth'] = int(argmax['max_depth']) - - return argmax - - # --------------------------------------------- // --------------------------------------------- # - def best_args(self, x, y): - - self.x = x - self.y = y - - logit_params = {'C' : (0.0001, 100), 'intercept_scaling' : (0.0001, 10)} - svm_params = {'C' : (0.0001, 100), 'gamma' : (0.00001, 10)} - randf_params = {'n_estimators' : (10, 1000),\ - 'min_samples_split' : (2, 200),\ - 'min_samples_leaf' : (1, 100),\ - 'max_depth' : (1, 20),\ - 'max_features' : (0.1, 0.999)} - gbt_params = {'n_estimators' : (10, 1000),\ - 'min_samples_split' : (2, 200),\ - 'min_samples_leaf' : (1, 100),\ - 'max_depth' : (1, 20),\ - 'max_features' : (0.1, 0.999)} - - self.logit_argmax = self.model_opt(self.logit_cv, logit_params, 'Logistic Regression', log_max = True) - self.svm_argmax = self.model_opt(self.svm_cv, svm_params, 'SVM', log_max = True) - self.randf_argmax = self.model_opt(self.randf_cv, randf_params, 'Random Forest') - self.gbt_argmax = self.model_opt(self.gbt_cv, gbt_params, 'Gradient Boosted Trees') - - - # --------------------------------------------- // --------------------------------------------- # - def make_ensemble(self): - - print('Ensemble time') - self.logit_model = LogisticRegression(**self.logit_argmax) - self.svm_model = SVC(**self.svm_argmax) - self.randf_model = RandomForestClassifier(**self.randf_argmax) - self.gbt_model = GradientBoostingClassifier(**self.gbt_argmax) - - self.svm_model.set_params(random_state = 0) - self.randf_model.set_params(random_state = 1) - self.gbt_model.set_params(random_state = 2) - - coefs_range = {'c_logit' : (1e-15, 1), 'c_svm' : (1e-15, 1), 'c_randf' : (1e-15, 1), 'c_gbt' : (1e-15, 1)} - - if self.sm == 'roc_auc': - self.svm_model.set_params(probability = True) - - pred = self.skf_proba(self.logit_model, self.svm_model, self.randf_model, self.gbt_model) - bo_ense = bayes_opt(lambda c_logit, c_svm, c_randf, c_gbt: self.ensemble(\ - c1 = c_logit,\ - c2 = c_svm,\ - c3 = c_randf,\ - c4 = c_gbt,\ - pred1 = pred[:, 0],\ - pred2 = pred[:, 1],\ - pred3 = pred[:, 2],\ - pred4 = pred[:, 3],\ - y = self.y,\ - scoring = classification_scores[self.sm]), coefs_range) - - best_ensemble, self.coefs = bo_ense.maximize(restarts = 100, init_points = 10, verbose = 1, num_it = 50) - - else: - pred = self.skf(self.logit_model, self.svm_model, self.randf_model, self.gbt_model) - bo_ense = bayes_opt(lambda c_logit, c_svm, c_randf, c_gbt: self.ensemble(\ - c1 = c_logit,\ - c2 = c_svm,\ - c3 = c_randf,\ - c4 = c_gbt,\ - pred1 = pred[:, 0],\ - pred2 = pred[:, 1],\ - pred3 = pred[:, 2],\ - pred4 = pred[:, 3],\ - y = self.y,\ - scoring = classification_scores[self.sm]), coefs_range) - - best_ensemble, self.coefs = bo_ense.maximize(restarts = 100, init_points = 10, verbose = 1, num_it = 50) - - print('Best ensemble score: %f' % best_ensemble) - - -# --------------------------------------------- // --------------------------------------------- # -# --------------------------------------------- // --------------------------------------------- # - def fit(self, x, y): - '''Try some basic ones and return best + score - like NB, LR, RF, that kind of stuff''' - - - self.best_args(x, y) - - self.make_ensemble() - - - print('Fitting the models at last.') - - self.logit_model.fit(self.x, self.y) - self.svm_model.fit(self.x, self.y) - self.randf_model.fit(self.x, self.y) - self.gbt_model.fit(self.x, self.y) - - print('Done, best single model and parameters...') - - -# --------------------------------------------- // --------------------------------------------- # -# --------------------------------------------- // --------------------------------------------- # - def predict_proba(self, x): - print('starting prob prediction') - - logit_pred = self.logit_model.predict_proba(x) - svm_pred = self.svm_model.predict_proba(x) - randf_pred = self.randf_model.predict_proba(x) - gbt_pred = self.gbt_model.predict_proba(x) - - - wsum = self.coefs['c_logit'] + self.coefs['c_svm'] + self.coefs['c_randf'] + self.coefs['c_gbt'] - - ense = (self.coefs['c_logit']/wsum) * logit_pred +\ - (self.coefs['c_svm']/wsum) * svm_pred +\ - (self.coefs['c_randf']/wsum) * randf_pred +\ - (self.coefs['c_gbt']/wsum) * gbt_pred - - return ense - -''' - def predict(self, x): - print('starting prediction') - - logit_pred = self.logit_model.predict(x) - svm_pred = self.svm_model.predict(x) - randf_pred = self.randf_model.predict(x) - - - wsum = self.coefs['c_logit'] + self.coefs['c_svm'] + self.coefs['c_randf'] - - ense = (self.coefs['c_logit']/wsum) * logit_pred +\ - (self.coefs['c_svm']/wsum) * svm_pred +\ - (self.coefs['c_randf']/wsum) * randf_pred - - return ense -''' - - diff --git a/bayes_opt/examples/sklearn_example.py b/bayes_opt/examples/sklearn_example.py new file mode 100644 index 000000000..c6c830125 --- /dev/null +++ b/bayes_opt/examples/sklearn_example.py @@ -0,0 +1,97 @@ +# Python 2.7 users. +from __future__ import print_function +from __future__ import division + +#Testing on the Iris data set +from sklearn.datasets import load_iris +from sklearn.cross_validation import KFold + +#Classifiers to compare +from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import RandomForestClassifier +from sklearn.svm import SVC +#Score metric +from sklearn.metrics import precision_score +#Bayes optimization +from bayes_opt.bo.bayes_opt import bayes_opt + + +# Load data set and target values +data = load_iris()['data'] +target = load_iris()['target'] + +# LogisticRegression CV +def LR_CV(c, ints, kf): + + clf = LogisticRegression(C = c, intercept_scaling = ints, random_state = 2) + res = 0 + + for itr, ite in kf: + xtrain, xtest = data[itr], data[ite] + ytrain, ytest = target[itr], target[ite] + + clf.fit(xtrain, ytrain) + pred = clf.predict(xtest) + + res += precision_score(ytest, pred) + + return res/5 + +# SVM CV +def SVR_CV(c, g, e): + + clf = SVC(C = c, gamma = g, tol = e, random_state = 2) + res = 0 + + for itr, ite in kf: + xtrain, xtest = data[itr], data[ite] + ytrain, ytest = target[itr], target[ite] + + clf.fit(xtrain, ytrain) + pred = clf.predict(xtest) + + res += precision_score(ytest, pred) + + return res/5 + +#Random Forest CV +def RF_CV(trees, split, leaf): + + clf = RandomForestClassifier(\ + n_estimators = int(trees),\ + min_samples_split = int(split),\ + min_samples_leaf = int(leaf),\ + max_features = None,\ + random_state = 3,\ + n_jobs = -1) + + res = 0 + + for itr, ite in kf: + xtrain, xtest = data[itr], data[ite] + ytrain, ytest = target[itr], target[ite] + + clf.fit(xtrain, ytrain) + pred = clf.predict(xtest) + + res += precision_score(ytest, pred) + + return res/5 + + +if __name__ == "__main__": + + kf = KFold(len(target), n_folds = 5, shuffle = True, random_state = 1) + + # Search for a good set of parameters for logistic regression + bo_LR = bayes_opt(lambda c, ints: LR_CV(c, ints, kf), {'c' : (0.001, 100), 'ints' : (0.001, 100)}) + ylr, xlr = bo_LR.log_maximize(num_it = 25) + + # Search for a good set of parameters for support vector machine + bo_SVR = bayes_opt(SVR_CV, {'c' : (0.001, 100), 'g' : (0.0001, 1), 'e' : (0.001, 10)}) + bo_SVR.initialize({'c' : 0.1, 'g' : .01, 'e' : 0.005}) + ysvr, xsvr = bo_SVR.log_maximize(init_points = 5, restarts = 15, num_it = 25) + + # Search for a good set of parameters for random forest. + bo_RF = bayes_opt(RF_CV, {'trees' : (10, 200), 'split' : (2, 20), 'leaf' : (1, 10)}) + yrf, xrf = bo_RF.maximize(init_points = 5, restarts = 15, num_it = 25) diff --git a/bayes_opt/examples/visualization.py b/bayes_opt/examples/visualization.py new file mode 100644 index 000000000..11ec1e859 --- /dev/null +++ b/bayes_opt/examples/visualization.py @@ -0,0 +1,235 @@ +# Python 2.7 users. +from __future__ import print_function +from __future__ import division + +import numpy +import matplotlib.pyplot as plt +from math import log, fabs, sqrt, exp + +from bayes_opt.bo.bayes_opt import bayes_opt +from bayes_opt.bo.GP import GP +from bayes_opt.support.objects import acquisition + +# ------------------------------ // ------------------------------ // ------------------------------ # +# ------------------------------ // ------------------------------ // ------------------------------ # +def my_function1(x): + return 2.5*exp(-(x - 2)**2) + 5*exp(-(x - 5)**2)+\ + 3*exp(-(x/2 - 4)**2)+ 8*exp(-2*(x - 11)**2) + +def my_function2(x): + return exp(-(x - 2)**2) + 5*exp(-(x - 5)**2) +\ + 3*exp(-(x/2 - 4)**2) + \ + 8*exp(-10*(x - 0.1)**2) - exp(-2*(x - 9)**2) + +# ------------------------------ // ------------------------------ // ------------------------------ # +# ------------------------------ // ------------------------------ // ------------------------------ # +def show_functions(grid, log_grid): + data1 = numpy.asarray([my_function1(x) for x in grid]) + data2 = numpy.asarray([my_function2(x) for x in numpy.arange(0.01,13,0.01)]) + + ax1 = plt.subplot(1, 1, 1) + ax1.grid(True, color='k', linestyle='--', linewidth=1, alpha = 0.5) + p1, = ax1.plot(grid, data1) + plt.show() + + ax2 = plt.subplot(2, 1, 1) + ax2.grid(True, color='k', linestyle='--', linewidth=1, alpha = 0.5) + p2, = ax2.plot(grid, data2) + + ax3 = plt.subplot(2, 1, 2) + ax3.grid(True, color='k', linestyle='--', linewidth=1, alpha = 0.5) + p3, = ax3.plot(log_grid, data2) + + plt.show() + +# ------------------------------ // ------------------------------ // ------------------------------ # +def gp1(grid): + x = numpy.asarray([3,6,8,10]).reshape((4, 1)) + y = numpy.asarray([my_function1(x[0]) for x in x]) + + gp = GP(kernel = 'squared_exp', theta = 1, l = 1) + gp.fit(x, y) + + mean, var = gp.fast_predict(grid.reshape((len(grid), 1))) + + # ------------------------------ // ------------------------------ # + ax1 = plt.subplot(2, 1, 1) + ax1.grid(True, color='k', linestyle='--', linewidth= 0.8, alpha = 0.4) + + + p1, = ax1.plot(x, y, 'b-', marker='o', color = 'k') + p2, = ax1.plot(grid, [(mean[i] + 2*sqrt(fabs(var[i]))) for i in range(len(mean))]) + p3, = ax1.plot(grid, [(mean[i] - 2*sqrt(fabs(var[i]))) for i in range(len(mean))]) + p4, = ax1.plot(grid, numpy.asarray([my_function1(x) for x in grid]), 'm--') + p5, = ax1.plot(grid, [mean[i] for i in range(len(mean))], 'y') + + p1.set_linestyle(' ') + ax1.legend([p1, p2, p3, p4, p5],\ + ['Data','Upper 95% bound','Lower 95% bound', 'True function', 'Predicted mean'], loc = 2) + + # ------------------------------ // ------------------------------ # + x = numpy.arange(0, 14, 1).reshape((14, 1)) + y = numpy.asarray([my_function1(x[0]) for x in x]) + + gp = GP(kernel = 'squared_exp', theta = 0.5, l = .9) + gp.fit(x, y) + + mean, var = gp.fast_predict(grid.reshape((len(grid), 1))) + + # ------------------------------ // ------------------------------ # + ax2 = plt.subplot(2, 1, 2) + ax2.grid(True, color='k', linestyle='--', linewidth=.8, alpha = 0.4) + + p12, = ax2.plot(x, y, 'b-', marker='o', color = 'k') + p22, = ax2.plot(grid, [(mean[i] + 2*sqrt(fabs(var[i]))) for i in range(len(mean))]) + p32, = ax2.plot(grid, [(mean[i] - 2*sqrt(fabs(var[i]))) for i in range(len(mean))]) + p42, = ax2.plot(grid, numpy.asarray([my_function1(x) for x in grid]), 'm--') + p52, = ax2.plot(grid, [mean[i] for i in range(len(mean))], 'y') + + p12.set_linestyle(' ') + ax2.legend([p12, p22, p32, p42, p52],\ + ['Data','Upper 95% bound','Lower 95% bound', 'True function', 'Predicted mean'], loc = 2) + + plt.show() + + +# ------------------------------ // ------------------------------ // ------------------------------ # +def find_max(grid): + + + bo = bayes_opt(my_function1, {'x' : (0, 13)}) + ymax, xmax, y, x = bo.maximize(init_points = 5, full_out = True) + + ax = plt.subplot(1,1,1) + ax.grid(True, color='k', linestyle='--', linewidth=.8, alpha = 0.4) + + p1, = ax.plot(x, y, 'b-', marker='o', color = 'k') + p1.set_linestyle(' ') + p2, = ax.plot(grid, numpy.asarray([my_function1(x) for x in grid]), 'm-') + + ax.legend([p1, p2],\ + ['Sampled points','Target function'], loc = 2) + + plt.show() + + return x + +# ------------------------------ // ------------------------------ // ------------------------------ # +def gp2(grid, sampled_x): + x = sampled_x + y = numpy.asarray([my_function1(x) for x in x]) + + gp = GP(kernel = 'squared_exp') + gp.best_fit(x, y) + + mean, var = gp.fast_predict(grid.reshape((len(grid), 1))) + ymax = y.max() + + ac = acquisition() + ucb = ac.full_UCB(mean, var) + ei = ac.full_EI(ymax, mean, var) + poi = ac.full_PoI(ymax, mean, var) + + # ------------------------------ // ------------------------------ # + ax1 = plt.subplot(2, 1, 1) + ax1.grid(True, color='k', linestyle='--', linewidth= 0.8, alpha = 0.4) + + p1, = ax1.plot(x, y, 'b-', marker='o', color = 'k') + p2, = ax1.plot(grid, [(mean[i] + 2*sqrt(fabs(var[i]))) for i in range(len(mean))]) + p3, = ax1.plot(grid, [(mean[i] - 2*sqrt(fabs(var[i]))) for i in range(len(mean))]) + p4, = ax1.plot(grid, numpy.asarray([my_function1(x) for x in grid]), 'm--') + p5, = ax1.plot(grid, [mean[i] for i in range(len(mean))], 'y') + + p1.set_linestyle(' ') + ax1.legend([p1, p2, p3, p4, p5],\ + ['Data','Upper 95% bound','Lower 95% bound', 'True function', 'Predicted mean'], loc = 3) + + + ax2 = plt.subplot(2,1,2) + ax2.grid(True, color='k', linestyle='--', linewidth= 0.8, alpha = 0.4) + p21, = ax2.plot(grid, ucb/ucb.max(), 'r') + p22, = ax2.plot(grid, ei/(ei.max() + 1e-6), 'orange') + p23, = ax2.plot(grid, poi, 'green') + ax2.legend([p21, p22, p23], ['Upper Confidence Bound', 'Expected Improvement', 'Probability of Improvement'], loc = 3) + + plt.show() + + +# ------------------------------ // ------------------------------ // ------------------------------ # +def find_max_log(grid, log_grid): + + bo = bayes_opt(my_function2, {'x' : (0.01, 13)}) + ymax, xmax, y, x = bo.log_maximize(init_points = 5, full_out = True) + + ax = plt.subplot(1,1,1) + ax.grid(True, color='k', linestyle='--', linewidth=.8, alpha = 0.4) + + p1, = ax.plot(numpy.log10(x/0.01) / log(13/0.01, 10), y, 'b-', marker='o', color = 'k') + p1.set_linestyle(' ') + p2, = ax.plot(log_grid, numpy.asarray([my_function2(x) for x in grid]), 'm-') + + ax.legend([p1, p2],\ + ['Sampled points','Target function'], loc = 2) + + plt.show() + + return x + +# ------------------------------ // ------------------------------ // ------------------------------ # +def gp3_log(grid, log_grid, sampled_x): + '''This is broken, something wrong with the GP and plots, fix it!''' + x = sampled_x + y = numpy.asarray([my_function2(x) for x in x])#numpy.asarray([my_function2(0.01 * (10 ** (x * log(13/0.01, 10)))) for x in x]) + + gp = GP(kernel = 'squared_exp') + gp.best_fit(x, y) + + mean, var = gp.fast_predict(grid.reshape((len(log_grid), 1))) + ymax = y.max() + + ac = acquisition() + ucb = ac.full_UCB(mean, var) + ei = ac.full_EI(ymax, mean, var) + poi = ac.full_PoI(ymax, mean, var) + + # ------------------------------ // ------------------------------ # + ax1 = plt.subplot(2, 1, 1) + ax1.grid(True, color='k', linestyle='--', linewidth= 0.8, alpha = 0.4) + + p1, = ax1.plot(numpy.log10(x/0.01) / log(13/0.01, 10), y, 'b-', marker='o', color = 'k') + p2, = ax1.plot(log_grid, [(mean[i] + 2*sqrt(fabs(var[i]))) for i in range(len(mean))]) + p3, = ax1.plot(log_grid, [(mean[i] - 2*sqrt(fabs(var[i]))) for i in range(len(mean))]) + p4, = ax1.plot(log_grid, numpy.asarray([my_function2(x) for x in grid]), 'm--') + p5, = ax1.plot(log_grid, [mean[i] for i in range(len(mean))], 'y') + + p1.set_linestyle(' ') + ax1.legend([p1, p2, p3, p4, p5],\ + ['Data','Upper 95% bound','Lower 95% bound', 'True function', 'Predicted mean'], loc = 3) + + + ax2 = plt.subplot(2,1,2) + ax2.grid(True, color='k', linestyle='--', linewidth= 0.8, alpha = 0.4) + p21, = ax2.plot(log_grid, ucb/ucb.max(), 'r') + p22, = ax2.plot(log_grid, ei/(ei.max() + 1e-6), 'orange') + p23, = ax2.plot(log_grid, poi, 'green') + ax2.legend([p21, p22, p23], ['Upper Confidence Bound', 'Expected Improvement', 'Probability of Improvement'], loc = 3) + + plt.show() + + +# ------------------------------ // ------------------------------ // ------------------------------ # +# ------------------------------ // ------------------------------ // ------------------------------ # +if __name__ == "__main__": + + grid = numpy.arange(0.01,13,0.01) + log_grid = numpy.log10(numpy.arange(0.01,13,0.01)/0.01)/log(13/0.01, 10) + + # ------------------------------ // ------------------------------ # + show_functions(grid, log_grid) + gp1(grid) + + sampled_x = find_max(grid) + gp2(grid, sampled_x) + + sampled_x_log = find_max_log(grid, log_grid) + gp3_log(grid, log_grid, sampled_x_log)