In [1]:
# create dataset and model sklearn
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

In [2]:
from sklearn.ensemble import RandomForestClassifier

In [3]:
def fitting_model(parameters):
    N_SAMPLES = 1000
    x, y = make_classification(n_samples=N_SAMPLES, n_features=100, n_classes=2, random_state=123)
    
    n_estimators = parameters.get('n_estimators')
    max_depth = parameters.get('max_depth')
    min_samples_split = parameters.get('min_samples_split')
    min_samples_leaf = parameters.get('min_samples_leaf')
    
    rfc = RandomForestClassifier(n_estimators=n_estimators,
                                 max_depth=max_depth,
                                 min_samples_split=min_samples_split,
                                 min_samples_leaf=min_samples_leaf)
    rfc.fit(x, y)
    y_pred_proba = rfc.predict_proba(x)
    
    y_pred = y_pred_proba.argmax(1)

    # evaluate
    y_onehot = OneHotEncoder().fit_transform(y.reshape(-1, 1))
    loss = np.power(y_onehot.toarray() - y_pred_proba, 2)
    mse_loss = np.mean(loss)
    sem = loss.std() / np.sqrt(N_SAMPLES)
    
    return mse_loss, sem

In [4]:
from ax.core.search_space import SearchSpace
from ax import RangeParameter, ParameterType

In [5]:
n_estimators = RangeParameter(name="n_estimators",
                               parameter_type=ParameterType.INT,
                               lower=10, 
                               upper=100, 
                               log_scale=False)

max_depth = RangeParameter(name="max_depth",
                           parameter_type=ParameterType.INT,
                           lower=1, upper=5, 
                           log_scale=False)

min_samples_split = RangeParameter(name="min_samples_split",
                                   parameter_type=ParameterType.INT,
                                   lower=2,
                                   upper=5,
                                   log_scale=False)

min_samples_leaf = RangeParameter(name="min_samples_leaf",
                                  parameter_type=ParameterType.INT,
                                  lower=1,
                                  upper=5,
                                  log_scale=False)

search_space = SearchSpace([n_estimators, max_depth, min_samples_split, min_samples_leaf])

In [6]:
def evaluate(parameters):
    return {"objective": fitting_model(parameters)}

In [7]:
from ax.core.simple_experiment import SimpleExperiment

In [8]:
exp = SimpleExperiment(
    name="simple_experiment",
    search_space=search_space,
    evaluation_function=evaluate,
    objective_name="objective",
    minimize=True
)

In [9]:
from ax.modelbridge.registry import Models

In [10]:
sobol = Models.SOBOL(exp.search_space)

In [11]:
# initiallization trials
for i in range(5):
    exp.new_trial(generator_run=sobol.gen(1))

In [18]:
exp.eval().df

Unnamed: 0,arm_name,metric_name,mean,sem,trial_index
0,0_0,objective,0.05218,0.001301,0
1,1_0,objective,0.187331,0.000844,1
2,2_0,objective,0.144746,0.001481,2
3,3_0,objective,0.169322,0.001174,3
4,4_0,objective,0.075209,0.001284,4


In [19]:
for i in range(10):
    print(f"Running GP+EI optimization trial {i+1}/10...")
    # Reinitialize GP+EI model at each step with updated data.
    gpei = Models.BOTORCH(experiment=exp, data=exp.eval())
    batch = exp.new_trial(generator_run=gpei.gen(1))
    
print("Done!")

Running GP+EI optimization trial 1/10...
Running GP+EI optimization trial 2/10...



The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.



Running GP+EI optimization trial 3/10...



The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.



Running GP+EI optimization trial 4/10...



The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.



Running GP+EI optimization trial 5/10...



The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.



Running GP+EI optimization trial 6/10...



The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.



Running GP+EI optimization trial 7/10...



The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.



Running GP+EI optimization trial 8/10...



The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.



Running GP+EI optimization trial 9/10...



The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.



Running GP+EI optimization trial 10/10...



The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.



Done!


In [None]:
d_trial = pd.DataFrame([])
for idx, trial in exp.trials.items():
    trial_data = exp.eval_trial(trial)
    d_trial = pd.concat((d_trial, trial_data.df), axis=0)    

In [None]:
d_trial

In [None]:
d_trial.sort_values('mean')