In [11]:
# Install Sckit-learn Optimization Library
!pip install scikit-optimize

Collecting scikit-optimize
[?25l  Downloading https://files.pythonhosted.org/packages/8b/03/be33e89f55866065a02e515c5b319304a801a9f1027a9b311a9b1d1f8dc7/scikit_optimize-0.8.1-py2.py3-none-any.whl (101kB)
[K     |███▎                            | 10kB 10.7MB/s eta 0:00:01[K     |██████▌                         | 20kB 1.8MB/s eta 0:00:01[K     |█████████▊                      | 30kB 2.2MB/s eta 0:00:01[K     |█████████████                   | 40kB 2.6MB/s eta 0:00:01[K     |████████████████▏               | 51kB 2.0MB/s eta 0:00:01[K     |███████████████████▍            | 61kB 2.3MB/s eta 0:00:01[K     |██████████████████████▊         | 71kB 2.5MB/s eta 0:00:01[K     |██████████████████████████      | 81kB 2.8MB/s eta 0:00:01[K     |█████████████████████████████▏  | 92kB 2.9MB/s eta 0:00:01[K     |████████████████████████████████| 102kB 2.3MB/s 
Collecting pyaml>=16.9
  Downloading https://files.pythonhosted.org/packages/15/c4/1310a054d33abc318426a956e7d6df0df76a6ddf

In [16]:
import pandas as pd
import numpy as np
import warnings

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn import datasets
from functools import partial
from skopt import space,gp_minimize

warnings.filterwarnings("ignore")

In [5]:
 #--------- Custom Dataset -----------#

iris = datasets.load_iris()
X = iris.data[:,:2]
y = iris.target

 #--------- Custom Dataset -----------#

## Bayesian Optimization using Gaussian Process

In [8]:
def optimize(params,param_names,x,y):
  params = dict(zip(param_names,params))

  #--------- Custom Code/Model -----------#

  model = RandomForestClassifier(**params)
  kf = StratifiedKFold(n_splits=5)

  #--------- Custom Code/Model -----------#

  # Cross Validation split
  accuracies = []
  for idx in kf.split(X=x,y=y):
    train_idx,val_idx = idx[0],idx[1]
    # Train
    Xtrain = x[train_idx]
    ytrain = y[train_idx]
    # Validation
    Xval = x[val_idx]
    yval = y[val_idx]

    model.fit(Xtrain,ytrain)
    pred = model.predict(Xval)
    fold_acc = accuracy_score(yval,pred)
    accuracies.append(fold_acc)
  
  return -1.0 * np.mean(accuracies)

In [15]:
param_space = [
    space.Integer(3,15,name="max_depth"),
    space.Integer(100,600,name="n_estimators"),
    space.Categorical(["gini","entropy"],name="criterion"),
    space.Real(0.01,1,prior="uniform",name="max_features")
]

param_names = [
    "max_depth",
    "n_estimators",
    "criterion",
    "max_features"
]

optimization_function = partial(optimize,param_names=param_names,x = X,y=y)

In [18]:
result = gp_minimize(optimization_function,
                     dimensions=param_space,
                     n_calls=15,
                     n_random_starts=10,
                     verbose=10)

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 2.8340
Function value obtained: -0.8067
Current minimum: -0.8067
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 1.7087
Function value obtained: -0.7400
Current minimum: -0.8067
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 2.8642
Function value obtained: -0.7467
Current minimum: -0.8067
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.8689
Function value obtained: -0.7200
Current minimum: -0.8067
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 1.4288
Function value obtained: -0.7400
Current minimum: -0.8067
Iteration No: 6 started. 

In [20]:
print(dict(zip(param_names,result.x)))

{'max_depth': 4, 'n_estimators': 429, 'criterion': 'entropy', 'max_features': 0.7333104175258193}
