In [2]:
!pip install bayesian-optimization

Collecting bayesian-optimization
  Downloading bayesian-optimization-1.2.0.tar.gz (14 kB)
Building wheels for collected packages: bayesian-optimization
  Building wheel for bayesian-optimization (setup.py) ... [?25l[?25hdone
  Created wheel for bayesian-optimization: filename=bayesian_optimization-1.2.0-py3-none-any.whl size=11685 sha256=0c9da020baa66b92f7e72c186de4349e577141426a5543df68b7da600e9a14f3
  Stored in directory: /root/.cache/pip/wheels/fd/9b/71/f127d694e02eb40bcf18c7ae9613b88a6be4470f57a8528c5b
Successfully built bayesian-optimization
Installing collected packages: bayesian-optimization
Successfully installed bayesian-optimization-1.2.0


In [3]:
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.svm import SVC

from bayes_opt import BayesianOptimization
from bayes_opt.util import Colours

In [4]:
def get_data():
    """Synthetic binary classification dataset."""
    data, targets = make_classification(
        n_samples=1000,
        n_features=45,
        n_informative=12,
        n_redundant=7,
        random_state=134985745,
    )
    return data, targets

In [5]:
def svc_cv(C, gamma, data, targets):
    """SVC cross validation.
    This function will instantiate a SVC classifier with parameters C and
    gamma. Combined with data and targets this will in turn be used to perform
    cross validation. The result of cross validation is returned.
    Our goal is to find combinations of C and gamma that maximizes the roc_auc
    metric.
    """
    estimator = SVC(C=C, gamma=gamma, random_state=2)
    cval = cross_val_score(estimator, data, targets, scoring='roc_auc', cv=4)
    return cval.mean()

In [6]:
def rfc_cv(n_estimators, min_samples_split, max_features, data, targets):
    """Random Forest cross validation.
    This function will instantiate a random forest classifier with parameters
    n_estimators, min_samples_split, and max_features. Combined with data and
    targets this will in turn be used to perform cross validation. The result
    of cross validation is returned.
    Our goal is to find combinations of n_estimators, min_samples_split, and
    max_features that minimzes the log loss.
    """
    estimator = RFC(
        n_estimators=n_estimators,
        min_samples_split=min_samples_split,
        max_features=max_features,
        random_state=2
    )
    cval = cross_val_score(estimator, data, targets,
                           scoring='neg_log_loss', cv=4)
    return cval.mean()


In [7]:
def optimize_svc(data, targets):
    """Apply Bayesian Optimization to SVC parameters."""
    def svc_crossval(expC, expGamma):
        """Wrapper of SVC cross validation.
        Notice how we transform between regular and log scale. While this
        is not technically necessary, it greatly improves the performance
        of the optimizer.
        """
        C = 10 ** expC
        gamma = 10 ** expGamma
        return svc_cv(C=C, gamma=gamma, data=data, targets=targets)

    optimizer = BayesianOptimization(
        f=svc_crossval,
        pbounds={"expC": (-3, 2), "expGamma": (-4, -1)},
        random_state=1234,
        verbose=2
    )
    optimizer.maximize(n_iter=10)

    print("Final result:", optimizer.max)

In [8]:
def optimize_rfc(data, targets):
    """Apply Bayesian Optimization to Random Forest parameters."""
    def rfc_crossval(n_estimators, min_samples_split, max_features):
        """Wrapper of RandomForest cross validation.
        Notice how we ensure n_estimators and min_samples_split are casted
        to integer before we pass them along. Moreover, to avoid max_features
        taking values outside the (0, 1) range, we also ensure it is capped
        accordingly.
        """
        return rfc_cv(
            n_estimators=int(n_estimators),
            min_samples_split=int(min_samples_split),
            max_features=max(min(max_features, 0.999), 1e-3),
            data=data,
            targets=targets,
        )

    optimizer = BayesianOptimization(
        f=rfc_crossval,
        pbounds={
            "n_estimators": (10, 250),
            "min_samples_split": (2, 25),
            "max_features": (0.1, 0.999),
        },
        random_state=1234,
        verbose=2
    )
    optimizer.maximize(n_iter=10)

    print("Final result:", optimizer.max)

In [9]:
# Generate synthetic binary classification dataset
data, targets = get_data()

In [11]:
print(Colours.yellow("--- Optimizing SVM ---"))
optimize_svc(data, targets)

[93m--- Optimizing SVM ---[0m
|   iter    |  target   |   expC    | expGamma  |
-------------------------------------------------
| [0m 1       [0m | [0m 0.882   [0m | [0m-2.042   [0m | [0m-2.134   [0m |
| [95m 2       [0m | [95m 0.9142  [0m | [95m-0.8114  [0m | [95m-1.644   [0m |
| [95m 3       [0m | [95m 0.9402  [0m | [95m 0.8999  [0m | [95m-3.182   [0m |
| [0m 4       [0m | [0m 0.9174  [0m | [0m-1.618   [0m | [0m-1.594   [0m |
| [0m 5       [0m | [0m 0.9345  [0m | [0m 1.791   [0m | [0m-1.372   [0m |
| [0m 6       [0m | [0m 0.9007  [0m | [0m 2.0     [0m | [0m-4.0     [0m |
| [0m 7       [0m | [0m 0.9091  [0m | [0m-3.0     [0m | [0m-1.0     [0m |
| [0m 8       [0m | [0m 0.8354  [0m | [0m-0.4191  [0m | [0m-4.0     [0m |
| [95m 9       [0m | [95m 0.9716  [0m | [95m 2.0     [0m | [95m-2.542   [0m |
| [95m 10      [0m | [95m 0.9729  [0m | [95m 1.468   [0m | [95m-2.346   [0m |
| [0m 11      [0m | [0m 0.971

In [12]:
print(Colours.green("--- Optimizing Random Forest ---"))
optimize_rfc(data, targets)

[92m--- Optimizing Random Forest ---[0m
|   iter    |  target   | max_fe... | min_sa... | n_esti... |
-------------------------------------------------------------
| [0m 1       [0m | [0m-0.3777  [0m | [0m 0.2722  [0m | [0m 16.31   [0m | [0m 115.1   [0m |
| [95m 2       [0m | [95m-0.3648  [0m | [95m 0.806   [0m | [95m 19.94   [0m | [95m 75.42   [0m |
| [0m 3       [0m | [0m-0.3774  [0m | [0m 0.3485  [0m | [0m 20.44   [0m | [0m 240.0   [0m |
| [95m 4       [0m | [95m-0.3557  [0m | [95m 0.8875  [0m | [95m 10.23   [0m | [95m 130.2   [0m |
| [0m 5       [0m | [0m-0.3664  [0m | [0m 0.7144  [0m | [0m 18.39   [0m | [0m 98.86   [0m |
| [0m 6       [0m | [0m-0.6459  [0m | [0m 0.999   [0m | [0m 2.0     [0m | [0m 10.0    [0m |
| [95m 7       [0m | [95m-0.3528  [0m | [95m 0.5371  [0m | [95m 8.287   [0m | [95m 127.9   [0m |
| [95m 8       [0m | [95m-0.3504  [0m | [95m 0.999   [0m | [95m 2.0     [0m | [95m 249.6   [0m 