In [1]:
# execute this in command line on all machines to be used as workers before initiating the hyperparamer search 
# ! pip install -U clearml-agent==0.15.0
# ! clearml-agent daemon --queue default

# pip install with locked versions
! pip install -U pandas==1.0.3
! pip install -U clearml>=0.16.2
! pip install -U optuna==2.0.0



In [2]:
from clearml.automation import UniformParameterRange, UniformIntegerParameterRange
from clearml.automation import HyperParameterOptimizer
from clearml.automation.optuna import OptimizerOptuna

from clearml import Task

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
task = Task.init(project_name='Hyperparameter Optimization with Optuna',
                 task_name='Hyperparameter Search',
                 task_type=Task.TaskTypes.optimizer)


ClearML Task: created new task id=bbe932f1f53b45889eda5a09eccb136d
2022-07-11 23:42:30,630 - clearml.Task - INFO - No repository found, storing script code instead
ClearML results page: https://clearml-app.art.azure.cse-cst.gc.ca/projects/28770c0a8a46400ebbdb429d170a2a5a/experiments/bbe932f1f53b45889eda5a09eccb136d/output/log
ClearML results page: https://clearml-app.art.azure.cse-cst.gc.ca/projects/28770c0a8a46400ebbdb429d170a2a5a/experiments/bbe932f1f53b45889eda5a09eccb136d/output/log
ClearML Monitor: GPU monitoring failed getting GPU reading, switching off GPU monitoring


In [4]:
#####################################################################
### Don't forget to replace this default id with your own task id ###
#####################################################################
TEMPLATE_TASK_ID = '4bc2b595cb2c45e88f69a8e4d20b24f2'

In [5]:
optimizer = HyperParameterOptimizer(
    base_task_id=TEMPLATE_TASK_ID,  # This is the experiment we want to optimize
    # here we define the hyper-parameters to optimize
    hyper_parameters=[
        UniformIntegerParameterRange('number_of_epochs', min_value=2, max_value=6, step_size=2),
        UniformIntegerParameterRange('batch_size', min_value=8, max_value=16, step_size=2),
        UniformIntegerParameterRange('ngrams', min_value=2, max_value=4, step_size=1),
        UniformParameterRange('base_lr', min_value=0.4, max_value=1.0, step_size=0.2),
    ],
    # setting the objective metric we want to maximize/minimize
    objective_metric_title='accuracy',
    objective_metric_series='total',
    objective_metric_sign='max',  # maximize or minimize the objective metric

    # setting optimizer - clearml supports GridSearch, RandomSearch, OptimizerBOHB and OptimizerOptuna
    optimizer_class=OptimizerOptuna,
    
    # Configuring optimization parameters
    execution_queue='queue-dev',  # queue to schedule the experiments for execution
    max_number_of_concurrent_tasks=2,  # number of concurrent experiments
    optimization_time_limit=60.,  # set the time limit for the optimization process
    compute_time_limit=120,  # set the compute time limit (sum of execution time on all machines)
    total_max_jobs=20,  # set the maximum number of experiments for the optimization. 
                        # Converted to total number of iteration for OptimizerBOHB
    min_iteration_per_job=15000,  # minimum number of iterations per experiment, till early stopping
    max_iteration_per_job=150000,  # maximum number of iterations per experiment
)



In [None]:
optimizer.set_report_period(1)  # setting the time gap between two consecutive reports
optimizer.start()  
optimizer.wait()  # wait until process is done
optimizer.stop()  # make sure background optimization stopped

Progress report #0 completed, sleeping for 0.25 minutes
2022-07-11 23:43:40,458 - clearml.automation.optimization - INFO - Creating new Task: {'number_of_epochs': 4, 'batch_size': 14, 'ngrams': 3, 'base_lr': 0.6000000000000001}
2022-07-11 23:43:40,720 - clearml.automation.optimization - INFO - Creating new Task: {'number_of_epochs': 2, 'batch_size': 16, 'ngrams': 4, 'base_lr': 0.6000000000000001}
Progress report #1 completed, sleeping for 1.0 minutes
Progress report #2 completed, sleeping for 1.0 minutes
Progress report #3 completed, sleeping for 1.0 minutes
Progress report #4 completed, sleeping for 1.0 minutes
Progress report #5 completed, sleeping for 1.0 minutes
Progress report #6 completed, sleeping for 1.0 minutes
Progress report #7 completed, sleeping for 1.0 minutes
Progress report #8 completed, sleeping for 1.0 minutes
Progress report #9 completed, sleeping for 1.0 minutes
Progress report #10 completed, sleeping for 1.0 minutes
Progress report #11 completed, sleeping for 1.0 m

[33m[W 2022-07-11 23:55:41,393][0m Trial 0 failed, because the returned value from the objective function cannot be cast to float. Returned value is: None[0m


OptunaObjective result metric=None, iteration None
2022-07-11 23:55:41,549 - clearml.automation.optimization - INFO - Creating new Task: {'number_of_epochs': 4, 'batch_size': 12, 'ngrams': 3, 'base_lr': 0.4}


[33m[W 2022-07-11 23:55:41,739][0m Trial 1 failed, because the returned value from the objective function cannot be cast to float. Returned value is: None[0m


OptunaObjective result metric=None, iteration None
2022-07-11 23:55:41,880 - clearml.automation.optimization - INFO - Creating new Task: {'number_of_epochs': 4, 'batch_size': 12, 'ngrams': 2, 'base_lr': 0.4}
Progress report #13 completed, sleeping for 1.0 minutes
Progress report #14 completed, sleeping for 1.0 minutes
Progress report #15 completed, sleeping for 1.0 minutes
Progress report #16 completed, sleeping for 1.0 minutes


In [None]:
# optimization is completed, print the top performing experiments id
k = 3
top_exp = optimizer.get_top_experiments(top_k=k)
print('Top {} experiments are:'.format(k))
for n, t in enumerate(top_exp, 1):
    print('Rank {}: task id={} |result={}'
          .format(n, t.id, t.get_last_scalar_metrics()['accuracy']['total']['last']))