In [1]:
import numpy as np
import pandas as pd
import json
import os
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.svm import SVC
from bayes_opt import BayesianOptimization
from bayes_opt.util import Colours
from bayes_opt import UtilityFunction
import copy
from functools import partial

In [2]:
def get_data():
    """Synthetic binary classification dataset."""
    data, targets = make_classification(
        n_samples=5000,
        n_features=100,
        n_informative=80,
        #n_redundant=2,
        n_classes = 5,
        #random_state=134985745,
    )
    return data, targets


def svc_cv(expC, expGamma, X, Y):
    """SVC cross validation.
    This function will instantiate a SVC classifier with parameters C and
    gamma. Combined with data and targets this will in turn be used to perform
    cross validation. The result of cross validation is returned.
    Our goal is to find combinations of C and gamma that maximizes the roc_auc
    metric.
    """
    """Wrapper of SVC cross validation.
    Notice how we transform between regular and log scale. While this
    is not technically necessary, it greatly improves the performance
    of the optimizer.
    """
    C = 10 ** expC
    gamma = 10 ** expGamma
    estimator = SVC(C=C, gamma=gamma, random_state=2)
    cval = cross_val_score(estimator, X, Y, scoring='f1_weighted', cv=4)
    return cval.mean()


def rfc_cv(n_estimators, min_samples_split, max_features, X, Y):
    """Random Forest cross validation.
    This function will instantiate a random forest classifier with parameters
    n_estimators, min_samples_split, and max_features. Combined with data and
    targets this will in turn be used to perform cross validation. The result
    of cross validation is returned.
    Our goal is to find combinations of n_estimators, min_samples_split, and
    max_features that minimzes the log loss.
    """
    estimator = RFC(
        n_estimators=int(n_estimators),
        min_samples_split=int(min_samples_split),
        max_features=max_features,
        random_state=2
    )
    cval = cross_val_score(estimator, X, Y,
                           scoring='f1_weighted', cv=4)
    return cval.mean()


In [None]:
X, Y = get_data()
print(Colours.yellow("--- Optimizing SVM ---"))
n_iter = 10

black_box_funs = [svc_cv, rfc_cv]
pbounds_lst = [{
                    "expC": (-3, 2), 
                    "expGamma": (-4, -1)},
               {
                    "n_estimators": (10, 250),
                    "min_samples_split": (2, 25),
                    "max_features": (0.1, 0.999),
                }
              ]
for idx, black_box_fptr in enumerate(black_box_funs):
    print("---------- Optimizing {}--------------".format(black_box_fptr))
    optimizer = BayesianOptimization(
        f=partial(black_box_fptr, X=X, Y=Y),
        pbounds=pbounds_lst[idx],
        verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
        random_state=65535,
    )
    utility = UtilityFunction(kind="ucb", kappa=2.5, xi=0.0)
    if idx == 0:
        optimizer.probe(
        params={"expC":-1.002, "expGamma": -2.002},
        lazy=True,
        )
    else:
        optimizer.probe(
        params={"n_estimators":123, "min_samples_split": 21, "max_features":0.8888},
        lazy=True,
        )
    optimizer.maximize(init_points=30, n_iter=n_iter)
    '''
    for _ in range(n_iter):
        next_point = optimizer.suggest(utility)
        params = copy.copy(next_point)
        params.update({"X":X, "Y":Y})
        target = black_box_fptr(**params)
        optimizer.register(params=next_point, target=target)    
        print(target, next_point)
    '''    
    print(optimizer.max)


[93m--- Optimizing SVM ---[0m
---------- Optimizing <function svc_cv at 0x7f347f0ab2f0>--------------
|   iter    |  target   |   expC    | expGamma  |
-------------------------------------------------


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


| [0m 1       [0m | [0m 0.06716 [0m | [0m-1.002   [0m | [0m-2.002   [0m |


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
