-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
base_classes.py
132 lines (115 loc) · 4.95 KB
/
base_classes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import logging
from typing import Any, Callable, Dict, List, Optional, Tuple
from deepchem.data import Dataset
from deepchem.trans import Transformer
from deepchem.models import Model
from deepchem.metrics import Metric
logger = logging.getLogger(__name__)
def _convert_hyperparam_dict_to_filename(hyper_params: Dict[str, Any]) -> str:
"""Helper function that converts a dictionary of hyperparameters to a string that can be a filename.
Parameters
----------
hyper_params: Dict
Maps string of hyperparameter name to int/float/string/list etc.
Returns
-------
filename: str
A filename of form "_key1_value1_value2_..._key2..."
"""
filename = ""
keys = sorted(hyper_params.keys())
for key in keys:
filename += "_%s" % str(key)
value = hyper_params[key]
if isinstance(value, int):
filename += "_%s" % str(value)
elif isinstance(value, float):
filename += "_%f" % value
else:
filename += "%s" % str(value)
return filename
class HyperparamOpt(object):
"""Abstract superclass for hyperparameter search classes.
This class is an abstract base class for hyperparameter search
classes in DeepChem. Hyperparameter search is performed on
`dc.models.Model` classes. Each hyperparameter object accepts a
`dc.models.Model` class upon construct. When the `hyperparam_search`
class is invoked, this class is used to construct many different
concrete models which are trained on the specified training set and
evaluated on a given validation set.
Different subclasses of `HyperparamOpt` differ in the choice of
strategy for searching the hyperparameter evaluation space. This
class itself is an abstract superclass and should never be directly
instantiated.
"""
def __init__(self, model_builder: Callable[..., Model]):
"""Initialize Hyperparameter Optimizer.
Note this is an abstract constructor which should only be used by
subclasses.
Parameters
----------
model_builder: constructor function.
This parameter must be constructor function which returns an
object which is an instance of `dc.models.Model`. This function
must accept two arguments, `model_params` of type `dict` and
`model_dir`, a string specifying a path to a model directory.
See the example.
"""
if self.__class__.__name__ == "HyperparamOpt":
raise ValueError(
"HyperparamOpt is an abstract superclass and cannot be directly instantiated. \
You probably want to instantiate a concrete subclass instead.")
self.model_builder = model_builder
def hyperparam_search(self,
params_dict: Dict,
train_dataset: Dataset,
valid_dataset: Dataset,
metric: Metric,
output_transformers: List[Transformer] = [],
nb_epoch: int = 10,
use_max: bool = True,
logdir: Optional[str] = None,
**kwargs) -> Tuple[Model, Dict, Dict]:
"""Conduct Hyperparameter search.
This method defines the common API shared by all hyperparameter
optimization subclasses. Different classes will implement
different search methods but they must all follow this common API.
Parameters
----------
params_dict: Dict
Dictionary mapping strings to values. Note that the
precise semantics of `params_dict` will change depending on the
optimizer that you're using. Depending on the type of
hyperparameter optimization, these values can be
ints/floats/strings/lists/etc. Read the documentation for the
concrete hyperparameter optimization subclass you're using to
learn more about what's expected.
train_dataset: Dataset
dataset used for training
valid_dataset: Dataset
dataset used for validation(optimization on valid scores)
metric: Metric
metric used for evaluation
output_transformers: list[Transformer]
Transformers for evaluation. This argument is needed since
`train_dataset` and `valid_dataset` may have been transformed
for learning and need the transform to be inverted before
the metric can be evaluated on a model.
nb_epoch: int, (default 10)
Specifies the number of training epochs during each iteration of optimization.
use_max: bool, optional
If True, return the model with the highest score. Else return
model with the minimum score.
logdir: str, optional
The directory in which to store created models. If not set, will
use a temporary directory.
Returns
-------
Tuple[`best_model`, `best_hyperparams`, `all_scores`]
`(best_model, best_hyperparams, all_scores)` where `best_model` is
an instance of `dc.models.Model`, `best_hyperparams` is a
dictionary of parameters, and `all_scores` is a dictionary mapping
string representations of hyperparameter sets to validation
scores.
"""
raise NotImplementedError