-
Notifications
You must be signed in to change notification settings - Fork 5
/
hyperparameter_optimization.py
460 lines (378 loc) · 22.7 KB
/
hyperparameter_optimization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
# '''
# Created on 12 May 2022
# @author: Alessandro Lovo
'''
description
-----------
This module is used to optimize the hyperparameters of the neural network. It is based on optuna, which is a
hyperparameter optimization framework. It is a wrapper around Learn2_new.py.
usage
-----
First you need to move the code to a desired folder by running
python hyperparameter_optimization.py <folder>
This will copy this code and its dependencies to your desired location and will create a config file from the default
values in the functions specified in this module (Just like for Learn2_new.py).
`cd` into your folder and have a look at the config file, modify all the parameters you want BEFORE the first run,
but AFTER the first successful run the config file becomes read-only. There is a reason for it, so don't try to modify it anyways!
When running the code you can specify some parameters to deviate from their default value, for example running inside
python hyperparameter_optimization.py n_trials=10
will run the code with all parameters at their default values but `n_trials` will select 10 trials for optuna to optimize the hyperparameters with (optuna will only be given 10 runs in this case)
Other parameters include:
study_name: (string)
The name of the study which tells optuna how to call the file storing the trials. We recommend only one study per folder, otherwise the way optuna labels the runs (IDs) is not consistent with `runs.json`
count_pruned: (bool)
Whether optuna counts the runs which were pruned, i.e. the runs that were stopped because they looked not promising
config.json recommendations which overwrite Learn2_new.py defaults:
---------------------------
config["run_kwargs"]["k_fold_cross_val_kwargs"]["load_from"] = False,
config["run_kwargs"]["k_fold_cross_val_kwargs"]["prune_threshold"] = 0.25,
config["run_kwargs"]["k_fold_cross_val_kwargs"]["min_folds_before_pruning"] = 2,
config["run_kwargs"]["k_fold_cross_val_kwargs"]["train_model_kwargs"]["enable_early_stopping"] = True,
config["run_kwargs"]["k_fold_cross_val_kwargs"]["train_model_kwargs"]["early_stopping_kwargs"]["patience"] = 5,
config["run_kwargs"]["k_fold_cross_val_kwargs"]["optimal_checkpoint_kwargs"]["collective"] = False,
'''
from ast import arg, literal_eval
import Learn2_new as ln
logger = ln.logger
ut = ln.ut
np = ln.np
keras = ln.keras
pd = ln.pd
import optuna
# log to stdout
import logging
import sys
import os
if __name__ == '__main__':
logging.getLogger().level = logging.INFO
logging.getLogger().handlers = [logging.StreamHandler(sys.stdout)]
class ScoreOptimizer():
"""
This class is used to optimize the hyperparameters of the machine learning model. It uses the Optuna library.
The class takes in a trainer object, a study_name, and a dictionary of common_kwargs as arguments.
The trainer object is used to train the machine learning model and evaluate its performance.
The study_name is used to name the Optuna study, which stores the results of the optimization
process. The common_kwargs are additional arguments that are passed to the trainer object when training the model.
"""
def __init__(self, trainer, study_name='', common_kwargs=None, repetitions=1):
self.trainer = trainer
self.common_kwargs = common_kwargs or {}
name_kwargs = {k:v for k,v in self.common_kwargs.items() if not k.startswith('prune')} # ignore kwargs related to pruning in the name of the study
name = ln.make_run_name(study_name, **name_kwargs)
self.study = optuna.create_study(study_name=name, storage=f'sqlite:///{name}.db', load_if_exists=True)
self._pruned_trials = 0 # number of pruned trials in the last optimize run
self.repetitions = repetitions
if self.repetitions > 1:
self.trainer.skip_existing_run = False
def objective(self, trial):
#### select hyperparameters ####
"""
The ScoreOptimizer class has an objective method that defines the objective function for the Optuna study.
This method takes in a trial object from Optuna and uses it to suggest hyperparameters for the machine learning model.
These hyperparameters are then passed to the trainer object to train the model and evaluate its performance.
The performance score is returned as the result of the objective function.
"""
hyp = {}
# oncomment a portion of the code which you would like to engage for optimization
# optimizing learning rate, batch size and regularization:
"""lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True) # learning rate
lr = literal_eval(f'{lr:.7f}') # limit the resolution of the learning rate
lr_min = trial.suggest_float('lr_min', 1e-7, lr, log=True)
lr_min = literal_eval(f'{lr_min:.7f}')
hyp['lr'] = lr
hyp['lr_min'] = lr_min
hyp['epoch_tol'] = trial.suggest_int('epoch_tol', 1, 5)
hyp['decay'] = (literal_eval(f"{trial.suggest_float(f'decay', 0.05, 0.5, log=True):.05f}"))
hyp['warmup'] = trial.suggest_categorical(f'warmup', [True, False])
#hyp["conv_skip"] = [[0,2], [3,5], [6,8]]
ResNet_trial = trial.suggest_categorical(f'warmup', [True, False])
hyp['ResNet'] = ResNet_trial
if ResNet_trial:
hyp["conv_skip"] = [[0,2],[3,5],[6,8]]
else:
hyp["conv_skip"] = None
hyp['batch_size'] = trial.suggest_int('batch_size', 32, 512, log=True)
hyp['batch_normalizations'] = trial.suggest_categorical('batch_normalizations', [True, False])
hyp['conv_dropouts'] = literal_eval(f"{trial.suggest_float('conv_dropouts', 0, 0.8, step=0.08):.2f}")
hyp['conv_l2coef'] = literal_eval(f"{trial.suggest_float(f'conv_l2coef', 1e-6, 1e6, log=True):.7f}")
hyp['conv_activations'] = trial.suggest_categorical(f'conv_activations', ['relu', 'elu','LeakyRelu'])
conv_channel_first_trial = trial.suggest_categorical(f'conv_channel_first', [4, 8, 16, 32])
hyp['conv_channel_first'] = conv_channel_first_trial
hyp['conv_channels']: list(conv_channel_first_trial*np.array([32,32,32, 64,64,64, 128,128,128, 256,256]))
hyp['dense_units'] = []
hyp['dense_dropouts'] = []
hyp['dense_l2coef'] = []
hyp['dense_activations'] = []
for i in range(2-1):
if i == 0:
dense_units_trial = trial.suggest_categorical(f'dense_units_{i+1}', [32, 64, 128, 256])
dense_dropouts_trial = literal_eval(f"{trial.suggest_float(f'dense_dropouts_{i+1}', 0, 0.8, step=0.08):.2f}")
dense_l2coef_trial = literal_eval(f"{trial.suggest_float(f'dense_l2coef_{i+1}', 1e-6, 1e6, log=True):.7f}")
dense_activations_trial = trial.suggest_categorical(f'dense_activations_{i+1}', ['relu', 'elu','LeakyRelu'])
hyp['dense_units'].append(dense_units_trial)
hyp['dense_dropouts'].append(dense_dropouts_trial)
hyp['dense_l2coef'].append(dense_l2coef_trial)
hyp['dense_activations'].append(dense_activations_trial)
hyp['dense_units'].append(2)
hyp['dense_dropouts'].append(None)
hyp['dense_l2coef'].append(None)
hyp['dense_activations'].append(None)"""
#hyp['time_start'] = trial.suggest_int(f'time_start', 31-15, 31-1)
#hyp['Z_DIM'] = trial.suggest_int('Z_DIM', 2, 256)
n_rnn_layers = 3 #trial.suggest_int('n_rnn_layers', 1, 4)
lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True) # learning rate
lr = literal_eval(f'{lr:.7f}') # limit the resolution of the learning rate
lr_min = trial.suggest_float('lr_min', 1e-7, lr, log=True)
lr_min = literal_eval(f'{lr_min:.7f}')
hyp['lr'] = lr
hyp['lr_min'] = lr_min
hyp['epoch_tol'] = trial.suggest_int('epoch_tol', 1, 5)
hyp['decay'] = (literal_eval(f"{trial.suggest_float(f'decay', 0.05, 0.5, log=True):.05f}"))
hyp['warmup'] = trial.suggest_categorical(f'warmup', [True, False])
hyp['batch_size'] = trial.suggest_categorical(f'batch_size', [32, 64, 128, 256, 512])
hyp['rnn_units'] = []
hyp['rnn_dropouts'] = []
hyp['rnn_l2coef'] = []
hyp['rnn_activations'] = []
hyp['rnn_return_sequences'] = []
unique_layers = False # controls whether to reuse the same value for each layer
for i in range(n_rnn_layers):
if unique_layers or i == 0:
rnn_units_trial = trial.suggest_int(f'rnn_units_{i+1}', 8, 256)
rnn_dropouts_trial = literal_eval(f"{trial.suggest_float(f'rnn_dropouts_{i+1}', 0, 0.8, step=0.01):.2f}")
rnn_l2coef_trial = literal_eval(f"{trial.suggest_float(f'rnn_l2coef_{i+1}', 1e-6, 1e6, log=True):.7f}")
rnn_activations_trial = trial.suggest_categorical(f'rnn_activations_{i+1}', ['relu', 'elu','LeakyRelu'])
hyp['rnn_units'].append(rnn_units_trial)
hyp['rnn_dropouts'].append(rnn_dropouts_trial)
hyp['rnn_l2coef'].append(rnn_l2coef_trial)
hyp['rnn_activations'].append(rnn_activations_trial)
if i < n_rnn_layers - 1:
hyp['rnn_return_sequences'].append(True)
else:
hyp['rnn_return_sequences'].append(False)
"""# optimizing learning rate and batch size:
lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True) # learning rate
lr = literal_eval(f'{lr:.7f}') # limit the resolution of the learning rate
lr_min = trial.suggest_float('lr_min', 1e-7, lr, log=True)
lr_min = literal_eval(f'{lr_min:.7f}')
hyp['lr'] = lr
hyp['lr_min'] = lr_min
hyp['epoch_tol'] = trial.suggest_int('epoch_tol', 1, 5)
hyp['decay'] = (literal_eval(f"{trial.suggest_float(f'decay', 0.01, 1, log=True):.05f}"))
hyp['warmup'] = trial.suggest_categorical(f'warmup', [True, False])
#hyp['batch_size'] = trial.suggest_int('batch_size', 128, 2048, log=True)"""
""" # optimizing batch normalization, l2 coefs and dropouts layerwise:
hyp['batch_normalizations'] = []
hyp['conv_dropouts'] = []
hyp['conv_l2coef'] = []
conv_channels = ut.extract_nested(self.trainer.config_dict,'conv_channels')
for i in range(len(conv_channels)):
hyp['batch_normalizations'].append(trial.suggest_categorical(f'batch_normalizations_{i+1}', [True, False]))
hyp['conv_dropouts'].append(literal_eval(f"{trial.suggest_float(f'conv_dropouts_{i+1}', 0, 0.8, step=0.01):.2f}"))
hyp['conv_l2coef'].append(literal_eval(f"{trial.suggest_float(f'conv_l2coef_{i+1}', 1e-6, 1e6, log=True):.7f}"))
hyp['conv_l2coef'].append(literal_eval(f"{trial.suggest_float(f'conv_l2coef_{i+1}', 1e-6, 1e6, log=True):.7f}"))
hyp['dense_units'] = []
hyp['dense_dropouts'] = []
hyp['dense_l2coef'] = []
ense_units = ut.extract_nested(self.trainer.config_dict,'dense_units')
for i in range(len(dense_units)-1):
hyp['dense_dropouts'].append(literal_eval(f"{trial.suggest_float(f'dense_dropouts_{i+1}', 0, 0.8, step=0.01):.2f}"))
hyp['dense_l2coef'].append(literal_eval(f"{trial.suggest_float(f'dense_l2coef_{i+1}', 1e-6, 1e6, log=True):.7f}"))
hyp['dense_units'].append(2)
hyp['dense_dropouts'].append(False)
"""
"""# Optimizing number of layers, kernel sizes, strides and dense units
n_conv_layers = trial.suggest_int('n_conv_layers', 1, 6)
hyp['conv_channels'] = []
hyp['kernel_sizes'] = []
hyp['strides'] = []
hyp['padding'] = ['same']*(n_conv_layers - 1) + ['valid']
for i in range(n_conv_layers):
hyp['conv_channels'].append(trial.suggest_int(f'conv_channels_{i+1}', 32, 128))
hyp['kernel_sizes'].append(trial.suggest_int(f'kernel_sizes_{i+1}', 2, 10))
hyp['strides'].append(trial.suggest_int(f'strides_{i+1}', 1, 2))
# fully connected layers
n_dense_layers = trial.suggest_int('n_dense_layers', 1, 3)
hyp['dense_units'] = []
hyp['dense_dropouts'] = [0.3]*(n_dense_layers - 1) + [None]
hyp['dense_activations'] = ['relu']*(n_dense_layers - 1) + [None]
hyp['dense_l2coef'] = [0.001]*(n_dense_layers - 1) + [None]
for i in range(n_dense_layers - 1):
hyp['dense_units'].append(trial.suggest_int(f'dense_units_{i+1}', 8, 128))
hyp['dense_units'].append(2)"""
""" # Optimizing number of dense layers and regularizers (in this study we skip conv layers)
hyp['Z_DIM'] = trial.suggest_int('Z_DIM', 2, 256)
n_dense_layers = trial.suggest_int('n_dense_layers', 1, 5)
hyp['dense_units'] = []
hyp['dense_dropouts'] = []
hyp['dense_l2coef'] = []
hyp['dense_activations'] = []
unique_layers = False # controls whether to reuse the same value for each layer
for i in range(n_dense_layers-1):
if unique_layers or i == 0:
dense_units_trial = trial.suggest_int(f'dense_units_{i+1}', 8, 256)
dense_dropouts_trial = literal_eval(f"{trial.suggest_float(f'dense_dropouts_{i+1}', 0, 0.8, step=0.01):.2f}")
dense_l2coef_trial = literal_eval(f"{trial.suggest_float(f'dense_l2coef_{i+1}', 1e-6, 1e6, log=True):.7f}")
dense_activations_trial = trial.suggest_categorical(f'dense_activations_{i+1}', ['relu', 'elu'])
hyp['dense_units'].append(dense_units_trial)
hyp['dense_dropouts'].append(dense_dropouts_trial)
hyp['dense_l2coef'].append(dense_l2coef_trial)
hyp['dense_activations'].append(dense_activations_trial)
hyp['dense_units'].append(2)
hyp['dense_dropouts'].append(None)
hyp['dense_l2coef'].append(None)
hyp['dense_activations'].append(None)"""
"""# Optimizing filter sizes, kernel sizes, weight decay
unique_layers = False # controls whether to reuse the same value for each layer
n_conv_layers = trial.suggest_int('n_conv_layers', 1, 6)
hyp['n_conv_layers'] = n_conv_layers
hyp['conv_channels'] = []
hyp['kernel_sizes'] = []
hyp['batch_normalizations'] = []
hyp['conv_dropouts'] = []
hyp['conv_l2coef'] = []
hyp['strides'] = []
hyp['padding'] = ['same']*(n_conv_layers - 1) + ['valid']
for i in range(n_conv_layers):
if unique_layers or i == 0:
conv_channels = trial.suggest_int(f'conv_channels_{i+1}', 8, 256)
kernel_sizes = trial.suggest_int(f'kernel_sizes_{i+1}', 2, 10)
batch_normalizations_trial = trial.suggest_categorical(f'batch_normalizations_{i+1}', [True, False])
conv_dropouts_trial = literal_eval(f"{trial.suggest_float(f'conv_dropouts_{i+1}', 0, 0.8, step=0.01):.2f}")
conv_l2coef_trial = literal_eval(f"{trial.suggest_float(f'conv_l2coef_{i+1}', 1e-6, 1e6, log=True):.7f}")
strides_trial = trial.suggest_int(f'strides_{i+1}', 1, 3)
hyp['conv_channels'].append(conv_channels)
hyp['kernel_sizes'].append(kernel_sizes)
hyp['batch_normalizations'].append(batch_normalizations_trial)
hyp['conv_dropouts'].append(conv_dropouts_trial)
hyp['conv_l2coef'].append(conv_l2coef_trial)
hyp['strides'].append(strides_trial)
n_dense_layers = trial.suggest_int('n_dense_layers', 1, 4)
hyp['n_dense_layers'] = n_dense_layers
hyp['dense_units'] = []
hyp['dense_dropouts'] = []
hyp['dense_l2coef'] = []
for i in range(n_dense_layers-1):
if unique_layers or i == 0:
dense_units = trial.suggest_int(f'dense_units_{i+1}', 8, 256)
dense_dropouts_trial = literal_eval(f"{trial.suggest_float(f'dense_dropouts_{i+1}', 0, 0.8, step=0.01):.2f}")
dense_l2coef_trial = literal_eval(f"{trial.suggest_float(f'dense_l2coef_{i+1}', 1e-6, 1e6, log=True):.7f}")
hyp['dense_units'].append(dense_units)
hyp['dense_dropouts'].append(dense_dropouts_trial)
hyp['dense_l2coef'].append(dense_l2coef_trial)
hyp['dense_units'].append(2)
hyp['dense_dropouts'].append(None)
hyp['dense_l2coef'].append(None)"""
"""# Optimizing layerwise batchnormalization dropouts and weight decay
hyp['batch_normalizations'] = []
hyp['conv_dropouts'] = []
hyp['conv_l2coef'] = []
unique_layers = False # controls whether to reuse the same value for each layer
conv_channels = ut.extract_nested(self.trainer.config_dict,'conv_channels')
for i in range(len(conv_channels)):
if unique_layers or i == 0:
batch_normalizations_trial = trial.suggest_categorical(f'batch_normalizations_{i+1}', [True, False])
conv_dropouts_trial = literal_eval(f"{trial.suggest_float(f'conv_dropouts_{i+1}', 0, 0.8, step=0.01):.2f}")
conv_l2coef_trial = literal_eval(f"{trial.suggest_float(f'conv_l2coef_{i+1}', 1e-6, 1e6, log=True):.7f}")
hyp['batch_normalizations'].append(batch_normalizations_trial)
hyp['conv_dropouts'].append(conv_dropouts_trial)
hyp['conv_l2coef'].append(conv_l2coef_trial)
hyp['dense_dropouts'] = []
hyp['dense_l2coef'] = []
dense_units = ut.extract_nested(self.trainer.config_dict,'dense_units')
for i in range(len(dense_units)-1):
if unique_layers or i == 0:
dense_dropouts_trial = literal_eval(f"{trial.suggest_float(f'dense_dropouts_{i+1}', 0, 0.8, step=0.01):.2f}")
dense_l2coef_trial = literal_eval(f"{trial.suggest_float(f'dense_l2coef_{i+1}', 1e-6, 1e6, log=True):.7f}")
hyp['dense_dropouts'].append(dense_dropouts_trial)
hyp['dense_l2coef'].append(dense_l2coef_trial)
hyp['dense_dropouts'].append(None)
hyp['dense_l2coef'].append(None)"""
# remove arguments that remained empty lists (this facilitates commenting lines to remove kwargs to optimize)
kw_to_remove = []
for k,v in hyp.items():
if isinstance(v, list) and len(v) == 0:
kw_to_remove.append(k)
if k in self.common_kwargs:
raise KeyError(f'{k} appears both in common kwargs and hyp')
for k in kw_to_remove:
logger.warning(f'removing hyperparameter {k} from the optimization list')
hyp.pop(k)
#### run
run_args = {**self.common_kwargs, **hyp}
logger.info(f'{run_args = }')
scores = []
for rep in range(self.repetitions):
try:
score, info = self.trainer._run(**run_args)
scores.append(score)
if info['status'] == 'FAILED': # most likely due to invalid network architecture
self._pruned_trials += 1
raise optuna.TrialPruned(f"Run failed: pruning.") # we prune the trial
## we could prune also PRUNED runs, but since we have access to a partial score on the few first folds we can keep them to instruct optuna
except KeyboardInterrupt:
raise
except optuna.TrialPruned:
raise
except Exception as e:
# we get an exception that is not handled by Trainer._run
raise RuntimeError("If upon_failed_run was set to 'continue', something very bad happened if we reached this block") from e
return np.mean(scores)
def optimize(self, n_trials=20, count_pruned=True, **kwargs):
"""
This method is used to run the optimization process. It takes in the number of trials to run and a dictionary of
additional arguments for the Optuna study. The method runs the optimization process and prints the results to the
console. The results are also stored in the Optuna study.
"""
# add telegram logger
th = self.trainer.telegram(**self.trainer.telegram_kwargs)
logger.log(45, f"Starting {n_trials} runs")
_n_trials = n_trials
try:
while _n_trials:
self._pruned_trials = 0
self.study.optimize(self.objective, n_trials=_n_trials, **kwargs)
logger.log(45, f'Completed {_n_trials} runs, {self._pruned_trials} of which failed due to invalid network architecture')
if count_pruned:
_n_trials = 0 # if we consider also the pruned runs, there is no second round
else:
_n_trials = self._pruned_trials # new number of trials for the next round
logger.log(45, f'Starting another {_n_trials} runs')
logger.log(45, '\n\nAll runs completed!')
finally:
# remove telegram logger
if th is not None:
logger.handlers.remove(th)
logger.log(45, 'Removed telegram logger')
def main():
if ln.deal_with_lock(additional_files=[ln.Path(__file__).resolve()]):
return
arg_dict = ln.parse_command_line()
trainer_kwargs = ln.get_default_params(ln.Trainer) # extract default parameters for Trainer class
trainer_kwargs.pop('config')
trainer_kwargs.pop('root_folder') # this two parameters cannot be changed
trainer_kwargs['upon_failed_run'] = 'continue'
for k in arg_dict:
if k in trainer_kwargs:
trainer_kwargs[k] = arg_dict.pop(k) # add kwargs parsed from the input to hyperparameter_optimization.py
# create trainer
trainer = ln.Trainer(config='./config.json', **trainer_kwargs) #create Trainer class based on `config.json` and trainer_kwargs supplied above
# deal with telegram kwargs
for k in trainer.telegram_kwargs:
if k in arg_dict:
trainer.telegram_kwargs[k] = arg_dict.pop(k)
# check conditions
if trainer.config_dict_flat['load_from'] is not None:
raise ValueError('load_from is not None!')
study_name = arg_dict.pop('study_name', 'study') # optuna stores its experiments in the file `{name}.db`
n_trials = arg_dict.pop('n_trials', None)
count_pruned = arg_dict.pop('count_pruned', True)
repetitions = arg_dict.pop('repetitions', 1)
if not n_trials:
raise ValueError('You must provide a valid number of trials with n_trials=<number of trials>')
# create a ScoreOptimizer
so = ScoreOptimizer(trainer=trainer, study_name=study_name, common_kwargs=arg_dict, repetitions=repetitions)
# run
so.optimize(n_trials=n_trials, count_pruned=count_pruned)
if __name__ == '__main__':
main()