In [23]:
import pandas as pd

train_df = pd.read_csv('../data/train_fp.csv', index_col='Unnamed: 0')
test_df = pd.read_csv('../data/test_fp.csv', index_col='Unnamed: 0')

In [24]:
print("Train DF shape: {}".format(train_df.shape),
      "Test DF shape: {}".format(test_df.shape))

Train DF shape: (514, 4097) Test DF shape: (128, 4097)


In [25]:
from xgboost import XGBRegressor

In [26]:
from ax.service.ax_client import AxClient, ObjectiveProperties
from ax.service.utils.report_utils import exp_to_df
from ax.utils.notebook.plotting import init_notebook_plotting, render

In [27]:
ax_client = AxClient()

[INFO 10-25 16:56:48] ax.service.ax_client: Starting optimization with verbose logging. To disable logging, set the `verbose_logging` argument to `False`. Note that float values in the logs are rounded to 6 decimal points.


In [28]:
# Create an experiment with required arguments: name, parameters, and objective_name.
ax_client.create_experiment(
    name="xgb_hyperparameter_search",  # The name of the experiment.
    parameters=[
        {
            "name": "learning_rate",  # The name of the parameter.
            "type": "range",  # The type of the parameter ("range", "choice" or "fixed").
            "bounds": [1e-3, 1],  # The bounds for range parameters. 
            # "values" The possible values for choice parameters .
            # "value" The fixed value for fixed parameters.
            "value_type": "float",  # Optional, the value type ("int", "float", "bool" or "str"). Defaults to inference from type of "bounds".
            "log_scale": False,  # Optional, whether to use a log scale for range parameters. Defaults to False.
            # "is_ordered" Optional, a flag for choice parameters.
        },
        {
            "name": "max_depth",
            "type": "range",
            "bounds": [1, 6],
            "value_type": "int"
        },
        {
            "name": "colsample_bytree",
            "type": "range",
            "bounds": [0, 1],
            "value_type": "float"
        },
        {
            "name": "reg_alpha",
            "type": "range",
            "bounds": [1e-6, 10],
            "value_type": "float",
            "log_scale": True
        }
    ],
    objectives={"r2": ObjectiveProperties(minimize=False)},  # The objective name and minimization setting.
    # parameter_constraints: Optional, a list of strings of form "p1 >= p2" or "p1 + p2 <= some_bound".
    # outcome_constraints: Optional, a list of strings of form "constrained_metric <= some_bound".
    overwrite_existing_experiment=True,
)

[INFO 10-25 16:56:49] ax.service.utils.instantiation: Created search space: SearchSpace(parameters=[RangeParameter(name='learning_rate', parameter_type=FLOAT, range=[0.001, 1.0]), RangeParameter(name='max_depth', parameter_type=INT, range=[1, 6]), RangeParameter(name='colsample_bytree', parameter_type=FLOAT, range=[0.0, 1.0]), RangeParameter(name='reg_alpha', parameter_type=FLOAT, range=[1e-06, 10.0], log_scale=True)], parameter_constraints=[]).


[INFO 10-25 16:56:49] ax.modelbridge.dispatch_utils: Using Models.BOTORCH_MODULAR since there is at least one ordered parameter and there are no unordered categorical parameters.
[INFO 10-25 16:56:49] ax.modelbridge.dispatch_utils: Calculating the number of remaining initialization trials based on num_initialization_trials=None max_initialization_trials=None num_tunable_parameters=4 num_trials=None use_batch_trials=False
[INFO 10-25 16:56:49] ax.modelbridge.dispatch_utils: calculated num_initialization_trials=8
[INFO 10-25 16:56:49] ax.modelbridge.dispatch_utils: num_completed_initialization_trials=0 num_remaining_initialization_trials=8
[INFO 10-25 16:56:49] ax.modelbridge.dispatch_utils: `verbose`, `disable_progbar`, and `jit_compile` are not yet supported when using `choose_generation_strategy` with ModularBoTorchModel, dropping these arguments.
[INFO 10-25 16:56:49] ax.modelbridge.dispatch_utils: Using Bayesian Optimization generation strategy: GenerationStrategy(name='Sobol+BoTorc

In [29]:
X_train = train_df.iloc[:,:-1]
y_train = train_df.label.values

X_test = test_df.iloc[:,:-1]
y_test = test_df.label.values

In [30]:
def train_evaluate(parametrization):
    xgb_model = XGBRegressor(random_state=42, n_jobs=-1, **parametrization)
    xgb_model.fit(X_train, y_train)
    return xgb_model.score(X_test, y_test)

In [31]:
ax_client.attach_trial(
    parameters={"learning_rate":0.1,
                "max_depth": 2,
                "colsample_bytree": 0.5,
                "reg_alpha": 0.1}
)

[INFO 10-25 16:56:49] ax.core.experiment: Attached custom parameterizations [{'learning_rate': 0.1, 'max_depth': 2, 'colsample_bytree': 0.5, 'reg_alpha': 0.1}] as trial 0.


({'learning_rate': 0.1,
  'max_depth': 2,
  'colsample_bytree': 0.5,
  'reg_alpha': 0.1},
 0)

In [32]:
baseline_parameters = ax_client.get_trial_parameters(trial_index=0)
ax_client.complete_trial(trial_index=0, raw_data=train_evaluate(baseline_parameters))

[INFO 10-25 16:56:54] ax.service.ax_client: Completed trial 0 with data: {'r2': (0.751956, None)}.


In [33]:
for i in range(25):
    parameters, trial_index = ax_client.get_next_trial()
    # Local evaluation here can be replaced with deployment to external system.
    ax_client.complete_trial(trial_index=trial_index, raw_data=train_evaluate(parameters))


Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 10-25 16:56:54] ax.service.ax_client: Generated new trial 1 with parameters {'learning_rate': 0.574489, 'max_depth': 6, 'colsample_bytree': 0.060488, 'reg_alpha': 9e-06} using model Sobol.
[INFO 10-25 16:56:57] ax.service.ax_client: Completed trial 1 with data: {'r2': (0.784741, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 10-25 16:56:57] ax.service.ax_client: Generated new trial 2 with parameters {'learning_rate': 0.219636, 'max_depth': 1, 'colsample_bytree': 0.723913, 'reg_alpha': 3.66897} using model Sobol.
[INFO 10-25 16:57:00] ax.service.ax_client: Completed trial 2 with data: {'r2': (0.724724, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 10-25 16:57:00] ax.service.ax_client: Generated new trial 3 with parameters {'learning_rate': 0

In [34]:
ax_client.get_trials_data_frame()



Unnamed: 0,trial_index,arm_name,trial_status,generation_method,r2,learning_rate,max_depth,colsample_bytree,reg_alpha
0,0,0_0,COMPLETED,Manual,0.751956,0.1,2,0.5,0.1
1,1,1_0,COMPLETED,Sobol,0.784741,0.574489,6,0.060488,9e-06
2,2,2_0,COMPLETED,Sobol,0.724724,0.219636,1,0.723913,3.66897
3,3,3_0,COMPLETED,Sobol,0.870054,0.451961,5,0.322058,0.000849
4,4,4_0,COMPLETED,Sobol,0.795635,0.849713,3,0.89203,0.108189
5,5,5_0,COMPLETED,Sobol,0.632996,0.986875,4,0.52832,0.012206
6,6,6_0,COMPLETED,Sobol,0.857541,0.337849,3,0.193089,8.4e-05
7,7,7_0,COMPLETED,Sobol,0.871079,0.117475,5,0.852757,1.138117
8,8,8_0,COMPLETED,Sobol,0.794878,0.715798,1,0.42334,2e-06
9,9,9_0,COMPLETED,BoTorch,0.862812,0.306009,5,0.389098,0.000106


In [35]:
best_parameters, values = ax_client.get_best_parameters()
best_parameters

{'learning_rate': 0.13295750580281185,
 'max_depth': 6,
 'colsample_bytree': 0.8723563799338128,
 'reg_alpha': 0.0037713319149620733}

In [36]:
mean, covariance = values
mean

{'r2': 0.879350265191172}

In [37]:
render(ax_client.get_contour_plot(param_x="learning_rate", param_y="colsample_bytree", metric_name="r2"))

[INFO 10-25 16:58:43] ax.service.ax_client: Retrieving contour plot with parameter 'learning_rate' on X-axis and 'colsample_bytree' on Y-axis, for metric 'r2'. Remaining parameters are affixed to the middle of their range.


In [38]:
render(
    ax_client.get_optimization_trace()
)  

In [39]:
ax_client.get_trials_data_frame().to_csv("../data/optimization_results/XGB_optimization.csv")

