-
Notifications
You must be signed in to change notification settings - Fork 35
/
nasbench_1shot1.py
459 lines (377 loc) · 19.6 KB
/
nasbench_1shot1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
"""
Interface to Benchmarks of Nasbench 1shot 1
https://github.com/automl/nasbench-1shot1/tree/master/nasbench_analysis/
How to use this benchmark:
--------------------------
We recommend using the containerized version of this benchmark.
If you want to use this benchmark locally (without running it via the corresponding container),
you need to perform the following steps.
1. Download data
================
The data will be downloaded automatically.
Note: However, if you use the benchmark locally, you can specify also the data directory (path to the folder, where the
nasbench_full.tfrecord is) by hand.
In this case you can download the data with the following command.
```
wget https://storage.googleapis.com/nasbench/nasbench_full.tfrecord
```
Remark: it is important to select the full tf record and not the 'only_108' record to perform multi-fidelity
optimization.
Recommend: ``Python >= 3.6.0``
2. Clone and install
====================
```
cd /path/to/HPOBench
pip install .[nasbench_1shot1]
pip install git+https://github.com/google-research/nasbench.git@master
git clone https://github.com/automl/nasbench-1shot1/tree/master/nasbench_analysis/
3. Environment setup
====================
To use the nasbench_analysis package, add the path to this folder to your PATH variable.
```
export PATH=/Path/to/nasbench-1shot1:$PATH
```
Changelog:
==========
0.0.4
* New container release due to a general change in the communication between container and HPOBench.
Works with HPOBench >= v0.0.8
0.0.3:
* Standardize the structure of the meta information
0.0.2:
* The objective function takes as input now the parameter run_index. Allowed values are Tuple(0-2), 0, 1, 2, None.
This value specifies which seeds are used. The user can specify a single index or a tuple with indices.
0.0.1:
* First implementation
"""
import logging
from pathlib import Path
from typing import Union, Dict, Any, Tuple, List
from ast import literal_eval
import ConfigSpace as CS
import numpy as np
from nasbench import api
from nasbench.api import OutOfDomainError
from hpobench.abstract_benchmark import AbstractBenchmark
from hpobench.util.data_manager import NASBench_101DataManager
from hpobench.util import rng_helper
from nasbench_analysis.search_spaces.search_space_1 import SearchSpace1 # noqa
from nasbench_analysis.search_spaces.search_space_2 import SearchSpace2 # noqa
from nasbench_analysis.search_spaces.search_space_3 import SearchSpace3 # noqa
from nasbench_analysis.utils import INPUT, OUTPUT, CONV1X1, CONV3X3, MAXPOOL3X3 # noqa
__version__ = '0.0.4'
logger = logging.getLogger('NasBench1shot1')
class NASBench1shot1BaseBenchmark(AbstractBenchmark):
def __init__(self, data_path: Union[Path, str, None] = None,
rng: Union[np.random.RandomState, int, None] = None):
"""
Baseclass for the nasbench 1shot1 benchmarks.
Please install the benchmark first. Place the data under ``data_path``.
Parameters
----------
data_path : str, Path, None
Path to the nasbench record. It is recommend to use the full record!
rng : np.random.RandomState, int, None
Random seed for the benchmarks
"""
super(NASBench1shot1BaseBenchmark, self).__init__(rng=rng)
data_manager = NASBench_101DataManager(data_path)
self.api = data_manager.load()
self.search_space = None
# pylint: disable=arguments-differ
@AbstractBenchmark.check_parameters
def objective_function(self, configuration: Union[CS.Configuration, Dict],
fidelity: Union[CS.Configuration, Dict, None] = None,
run_index: Union[int, Tuple, List, None] = (0, 1, 2),
rng: Union[np.random.RandomState, int, None] = None,
**kwargs) -> Dict:
"""
Query the NAS1shot1-benchmark using a given configuration and an epoch (=budget).
Only data for the budgets 4, 12, 36, 108 are available.
Parameters
----------
configuration : Dict, CS.Configuration
fidelity: Dict, None
Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
run_index : int, Tuple, None
The nas benchmark has for each configuration-budget-pair results from 3 different runs.
- If multiple `run_id`s are given as Tuple/List, the benchmark returns the mean over the given runs.
- By default (no parameter is specified) all runs are used. A specific run can be chosen by setting the
`run_id` to a value from [0, 3]. While the performance is averaged across the `run_index`, the costs are
the sum of the runtime per `run_index`.
- When this value is explicitly set to `None`, the function will use a random seed.
rng : np.random.RandomState, int, None
Random seed to use in the benchmark.
To prevent overfitting on a single seed, it is possible to pass a
parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
If this parameter is not given, the default random state is used.
kwargs
Returns
-------
Dict -
function_value : validation error
cost : runtime
info : Dict
train_accuracies
test_accuracies
valid_accuracies
training_times
fidelity : used fidelities in this evaluation
data : additional data such as trainable parameters and used operations
"""
self.rng = rng_helper.get_rng(rng, self_rng=self.rng)
run_index = self._check_run_index(run_index)
configuration = self._parse_configuration(configuration)
train_accuracies = []
valid_accuracies = []
test_accuracies = []
training_times = []
additional = {}
failure = False
for run_id in run_index:
data = self._query_benchmark(config=configuration, fidelity=fidelity, run_index=run_id)
train_accuracies.append(data['train_accuracy'])
valid_accuracies.append(data['validation_accuracy'])
test_accuracies.append(data['test_accuracy'])
training_times.append(data['training_time'])
# Since those information are the same for all run ids, just store one of them.
additional = {'trainable_parameters': data['trainable_parameters'],
'module_operations': data['module_operations']}
failure = failure or ('info' in data and data['info'] == 'failure')
return {'function_value': float(1 - np.mean(valid_accuracies)),
'cost': float(np.sum(training_times)),
'info': {'fidelity': fidelity,
'train_accuracies': train_accuracies,
'valid_accuracies': valid_accuracies,
'test_accuracies': test_accuracies,
'training_times': training_times,
'data': additional,
'failure': 'False' if not failure else 'True'
}
}
@AbstractBenchmark.check_parameters
def objective_function_test(self, configuration: Union[Dict, CS.Configuration],
fidelity: Union[CS.Configuration, Dict, None] = None,
rng: Union[np.random.RandomState, int, None] = None,
**kwargs) -> Dict:
"""
Validate a configuration on the maximum available budget (108) and on all three seeds.
Parameters
----------
configuration : Dict, CS.Configuration
fidelity: Dict, None
Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
rng : np.random.RandomState, int, None
Random seed to use in the benchmark. To prevent overfitting on a single seed, it is
possible to pass a parameter ``rng`` as 'int' or 'np.random.RandomState' to this
function. If this parameter is not given, the default random state is used.
kwargs
Returns
-------
Dict -
function_value : test error on largest fidelity.
cost : runtime
info : Dict
train_accuracies
test_accuracies
valid_accuracies
training_times
fidelity : used fidelities in this evaluation
data : additional data such as trainable parameters and used operations
"""
assert fidelity['budget'] == 108, 'Only test data for the 108th epoch is available.'
result = self.objective_function(configuration=configuration, fidelity=fidelity, run_index=(0, 1, 2), rng=rng)
result['function_value'] = float(1 - np.mean(result['info']['test_accuracies']))
return result
@staticmethod
def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
raise NotImplementedError
@staticmethod
def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
"""
Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
the NASBench1shot1.
Parameters
----------
seed : int, None
Fixing the seed for the ConfigSpace.ConfigurationSpace
Returns
-------
ConfigSpace.ConfigurationSpace
"""
seed = seed if seed is not None else np.random.randint(1, 100000)
fidel_space = CS.ConfigurationSpace(seed=seed)
fidel_space.add_hyperparameters([
CS.OrdinalHyperparameter('budget', sequence=[4, 12, 36, 108], default_value=108)
])
return fidel_space
@staticmethod
def get_meta_information() -> Dict:
""" Returns the meta information for the benchmark """
return {'name': 'NAS-Bench-1Shot1: Benchmarking and Dissecting One-shot Neural Architecture Search',
'references': ['@inproceedings{Zela2020NAS-Bench-1Shot1:, '
'title = {NAS-Bench-1Shot1: '
' Benchmarking and Dissecting One-shot Neural Architecture Search},'
'author = {Arber Zela and Julien Siems and Frank Hutter},'
'booktitle = {International Conference on Learning Representations},'
'year = {2020},'
'url = {https://openreview.net/forum?id=SJx9ngStPH}}',
],
'code': 'https://github.com/automl/nasbench-1shot1',
}
def _check_run_index(self, run_index):
if isinstance(run_index, int):
assert 0 <= run_index <= 2, f'run_index must be in [0, 2], not {run_index}'
run_index = (run_index, )
elif isinstance(run_index, (Tuple, List)):
assert 0 < len(run_index) <= 3, 'run_index must not be empty'
assert min(run_index) >= 0 and max(run_index) <= 2, \
f'all run_index values must be in [0, 2], but were {run_index}'
if len(set(run_index)) != len(run_index):
logger.debug('There are some values more than once in the run_index. We remove the redundant entries.')
run_index = tuple(set(run_index))
elif run_index is None:
logger.debug('The run index is explicitly set to None! A random seed will be selected.')
run_index = tuple(self.rng.choice((0, 1, 2), size=1))
else:
raise ValueError(f'run index must be one of Tuple or Int, but was {type(run_index)}')
return run_index
def _query_benchmark(self, config: Dict, fidelity: Dict, run_index: int) -> Dict:
adjacency_matrix, node_list = self.search_space.convert_config_to_nasbench_format(config)
if isinstance(self, NASBench1shot1SearchSpace3Benchmark):
node_list = [INPUT, *node_list, OUTPUT]
else:
node_list = [INPUT, *node_list, CONV1X1, OUTPUT]
adjacency_list = adjacency_matrix.astype(np.int).tolist()
model_spec = api.ModelSpec(matrix=adjacency_list, ops=node_list)
try:
nasbench_data = self._query_api(model_spec=model_spec, run_index=run_index, epochs=int(fidelity['budget']))
except api.OutOfDomainError:
return {"trainable_parameters": 0,
"training_time": 0,
"train_accuracy": 0,
"validation_accuracy": 0,
"test_accuracy": 0,
"module_operations": 0,
"info": "failure"}
nasbench_data.pop('module_adjacency')
return nasbench_data
def _query_api(self, model_spec, run_index: int, epochs=108, stop_halfway=False):
"""
NOTE:
Copied from https://github.com/google-research/nasbench/blob/b94247037ee470418a3e56dcb83814e9be83f3a8/nasbench/api.py#L204-L263 # noqa
We changed the function in such a way that we now can specified the run index (index of the evaluation) which was
in the original code sampled randomly.
OLD DOCSTRING:
Fetch one of the evaluations for this model spec.
Each call will sample one of the config['num_repeats'] evaluations of the
model. This means that repeated queries of the same model (or isomorphic
models) may return identical metrics.
This function will increment the budget counters for benchmarking purposes.
See self.training_time_spent, and self.total_epochs_spent.
This function also allows querying the evaluation metrics at the halfway
point of training using stop_halfway. Using this option will increment the
budget counters only up to the halfway point.
Args:
model_spec: ModelSpec object.
epochs: number of epochs trained. Must be one of the evaluated number of
epochs, [4, 12, 36, 108] for the full dataset.
stop_halfway: if True, returned dict will only contain the training time
and accuracies at the halfway point of training (num_epochs/2).
Otherwise, returns the time and accuracies at the end of training
(num_epochs).
Returns:
dict containing the evaluated data for this object.
Raises:
OutOfDomainError: if model_spec or num_epochs is outside the search space.
"""
if epochs not in self.api.valid_epochs:
raise OutOfDomainError('invalid number of epochs, must be one of %s'
% self.api.valid_epochs)
fixed_stat, computed_stat = self.api.get_metrics_from_spec(model_spec)
# MODIFICATION: Use the run index instead of the sampled one.
# ORIGINAL CODE:
# sampled_index = random.randint(0, self.config['num_repeats'] - 1)
computed_stat = computed_stat[epochs][run_index]
data = {}
data['module_adjacency'] = fixed_stat['module_adjacency']
data['module_operations'] = fixed_stat['module_operations']
data['trainable_parameters'] = fixed_stat['trainable_parameters']
if stop_halfway:
data['training_time'] = computed_stat['halfway_training_time']
data['train_accuracy'] = computed_stat['halfway_train_accuracy']
data['validation_accuracy'] = computed_stat['halfway_validation_accuracy']
data['test_accuracy'] = computed_stat['halfway_test_accuracy']
else:
data['training_time'] = computed_stat['final_training_time']
data['train_accuracy'] = computed_stat['final_train_accuracy']
data['validation_accuracy'] = computed_stat['final_validation_accuracy']
data['test_accuracy'] = computed_stat['final_test_accuracy']
self.api.training_time_spent += data['training_time']
if stop_halfway:
self.api.total_epochs_spent += epochs // 2
else:
self.api.total_epochs_spent += epochs
return data
def _parse_configuration(self, configuration: Dict):
"""
Since the categorical hyperparameters are stored as strings (otherwise they would not be json serializable),
we need to cast them back to type tuple.
In the original configuration space all hyperparameters are either of type string or tuple.
In the modified configuration space, the tuple hp are also strings.
Parameters
----------
configuration : Dict.
Returns
-------
Dict - configuration with the correct types
"""
# make sure that it is a dictionary and not a CS.Configuration.
if isinstance(configuration, CS.Configuration):
configuration = configuration.get_dictionary()
return {k: literal_eval(v) if isinstance(v, str) and v[0] == '(' else v
for k, v in configuration.items()}
@staticmethod
def _get_configuration_space(search_space: Any, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
""" Helper function to pass a seed to the configuration space """
seed = seed if seed is not None else np.random.randint(1, 100000)
original_cs = search_space.get_configuration_space()
# The categorical hyperparameter of this benchmark consist of some tuple(tuple(int, int)). This is not
# json serializable with the configspace serializer. Therefore, we cast it to a string.
hps = []
for hp in original_cs.get_hyperparameters():
# the configspaces of this benchmark have only categorical hp
# --> so they will all have the attribute 'default value'
if isinstance(hp.default_value, tuple):
hp = CS.CategoricalHyperparameter(hp.name,
choices=[str(choice) for choice in hp.choices],
default_value=str(hp.default_value))
hps.append(hp)
cs = CS.ConfigurationSpace()
cs.add_hyperparameters(hps)
cs.seed(seed)
return cs
class NASBench1shot1SearchSpace1Benchmark(NASBench1shot1BaseBenchmark):
def __init__(self, data_path: Union[Path, str, None] = None,
rng: Union[np.random.RandomState, int, None] = None):
super(NASBench1shot1SearchSpace1Benchmark, self).__init__(data_path=data_path, rng=rng)
self.search_space = SearchSpace1()
@staticmethod
def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
return NASBench1shot1BaseBenchmark._get_configuration_space(SearchSpace1(), seed)
class NASBench1shot1SearchSpace2Benchmark(NASBench1shot1BaseBenchmark):
def __init__(self, data_path: Union[Path, str, None] = None,
rng: Union[np.random.RandomState, int, None] = None):
super(NASBench1shot1SearchSpace2Benchmark, self).__init__(data_path=data_path, rng=rng)
self.search_space = SearchSpace2()
@staticmethod
def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
return NASBench1shot1BaseBenchmark._get_configuration_space(SearchSpace2(), seed)
class NASBench1shot1SearchSpace3Benchmark(NASBench1shot1BaseBenchmark):
def __init__(self, data_path: Union[Path, str, None] = None,
rng: Union[np.random.RandomState, int, None] = None):
super(NASBench1shot1SearchSpace3Benchmark, self).__init__(data_path=data_path, rng=rng)
self.search_space = SearchSpace3()
@staticmethod
def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
return NASBench1shot1BaseBenchmark._get_configuration_space(SearchSpace3(), seed)