In [18]:
# create dataset and model sklearn
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

In [2]:
from sklearn.ensemble import RandomForestClassifier

In [3]:
def fitting_model(parameters):
    N_SAMPLES = 1000
    x, y = make_classification(n_samples=N_SAMPLES, n_features=100, n_classes=2, random_state=123)
    
    n_estimators = parameters.get('n_estimators')
    max_depth = parameters.get('max_depth')
    min_samples_split = parameters.get('min_samples_split')
    min_samples_leaf = parameters.get('min_samples_leaf')
    
    rfc = RandomForestClassifier(n_estimators=n_estimators,
                                 max_depth=max_depth,
                                 min_samples_split=min_samples_split,
                                 min_samples_leaf=min_samples_leaf)
    rfc.fit(x, y)
    y_pred_proba = rfc.predict_proba(x)
    
    y_pred = y_pred_proba.argmax(1)
    print("Accuracy: ", accuracy_score(y, y_pred))

    # evaluate
    y_onehot = OneHotEncoder().fit_transform(y.reshape(-1, 1))
    loss = np.power(y_onehot.toarray() - y_pred_proba, 2)
    mse_loss = np.mean(loss)
    sem = loss.std() / np.sqrt(N_SAMPLES)
    
    return mse_loss, sem

In [4]:
from ax.core.search_space import SearchSpace
from ax import RangeParameter, ParameterType

In [5]:
n_estimators = RangeParameter(name="n_estimators",
                               parameter_type=ParameterType.INT,
                               lower=10, 
                               upper=100, 
                               log_scale=False)

max_depth = RangeParameter(name="max_depth",
                           parameter_type=ParameterType.INT,
                           lower=1, upper=5, 
                           log_scale=False)

min_samples_split = RangeParameter(name="min_samples_split",
                                   parameter_type=ParameterType.INT,
                                   lower=2,
                                   upper=5,
                                   log_scale=False)

min_samples_leaf = RangeParameter(name="min_samples_leaf",
                                  parameter_type=ParameterType.INT,
                                  lower=1,
                                  upper=5,
                                  log_scale=False)

search_space = SearchSpace([n_estimators, max_depth, min_samples_split, min_samples_leaf])

In [6]:
def evaluate(parameters):
    return {"objective": fitting_model(parameters)}

In [7]:
from ax.core.simple_experiment import SimpleExperiment

In [8]:
exp = SimpleExperiment(
    name="simple_experiment",
    search_space=search_space,
    evaluation_function=evaluate,
    objective_name="objective",
    minimize=True
)

In [9]:
from ax.modelbridge.registry import Models

In [10]:
sobol = Models.SOBOL(exp.search_space)

In [11]:
# initiallization trials
for i in range(1):
    exp.new_trial(generator_run=sobol.gen(1))

In [12]:
for i in range(10):
    print(f"Running GP+EI optimization trial {i+1}/25...")
    # Reinitialize GP+EI model at each step with updated data.
    gpei = Models.BOTORCH(experiment=exp, data=exp.eval())
    batch = exp.new_trial(generator_run=gpei.gen(1))
    
print("Done!")

Running GP+EI optimization trial 1/25...
Accuracy:  0.966


[INFO 01-29 05:32:35] ax.modelbridge.transforms.standardize_y: Outcome objective is constant, within tolerance.


Running GP+EI optimization trial 2/25...
Accuracy:  0.993
Running GP+EI optimization trial 3/25...
Accuracy:  0.988
Running GP+EI optimization trial 4/25...
Accuracy:  0.974
Running GP+EI optimization trial 5/25...
Accuracy:  0.991
Running GP+EI optimization trial 6/25...
Accuracy:  0.983
Running GP+EI optimization trial 7/25...
Accuracy:  0.995
Running GP+EI optimization trial 8/25...
Accuracy:  0.991
Running GP+EI optimization trial 9/25...
Accuracy:  0.988
Running GP+EI optimization trial 10/25...
Accuracy:  0.992
Done!


In [19]:
d_trial = pd.DataFrame([])
for idx, trial in exp.trials.items():
    trial_data = exp.eval_trial(trial)
    d_trial = pd.concat((d_trial, trial_data.df), axis=0)    

Accuracy:  0.991


In [20]:
d_trial

Unnamed: 0,arm_name,metric_name,mean,sem,trial_index
0,0_0,objective,0.193959,0.000727,0
0,1_0,objective,0.055459,0.001352,1
0,2_0,objective,0.055164,0.001709,2
0,3_0,objective,0.085462,0.002008,3
0,4_0,objective,0.069883,0.001366,4
0,5_0,objective,0.080039,0.001928,5
0,6_0,objective,0.073261,0.001405,6
0,7_0,objective,0.053331,0.001432,7
0,8_0,objective,0.094133,0.001707,8
0,9_0,objective,0.071654,0.00154,9


In [22]:
d_trial.sort_values('mean')

Unnamed: 0,arm_name,metric_name,mean,sem,trial_index
0,7_0,objective,0.053331,0.001432,7
0,2_0,objective,0.055164,0.001709,2
0,1_0,objective,0.055459,0.001352,1
0,10_0,objective,0.068096,0.001352,10
0,4_0,objective,0.069883,0.001366,4
0,9_0,objective,0.071654,0.00154,9
0,6_0,objective,0.073261,0.001405,6
0,5_0,objective,0.080039,0.001928,5
0,3_0,objective,0.085462,0.002008,3
0,8_0,objective,0.094133,0.001707,8
