Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
add infer_and_cast (#2324)
Browse files Browse the repository at this point in the history
* add infer_and_cast

* remove print statement + add comment

* address PR feedback

* pylint
  • Loading branch information
joelgrus committed Jan 10, 2019
1 parent 059b057 commit 71ebcd8
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 6 deletions.
53 changes: 49 additions & 4 deletions allennlp/common/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,46 @@ def evaluate_snippet(_filename: str, expr: str, **_kwargs) -> str:

logger = logging.getLogger(__name__) # pylint: disable=invalid-name

# pylint: disable=inconsistent-return-statements
def infer_and_cast(value: Any):
"""
In some cases we'll be feeding params dicts to functions we don't own;
for example, PyTorch optimizers. In that case we can't use ``pop_int``
or similar to force casts (which means you can't specify ``int`` parameters
using environment variables). This function takes something that looks JSON-like
and recursively casts things that look like (bool, int, float) to (bool, int, float).
"""
# pylint: disable=too-many-return-statements
if isinstance(value, (int, float, bool)):
# Already one of our desired types, so leave as is.
return value
elif isinstance(value, list):
# Recursively call on each list element.
return [infer_and_cast(item) for item in value]
elif isinstance(value, dict):
# Recursively call on each dict value.
return {key: infer_and_cast(item) for key, item in value.items()}
elif isinstance(value, str):
# If it looks like a bool, make it a bool.
if value.lower() == "true":
return True
elif value.lower() == "false":
return False
else:
# See if it could be an int.
try:
return int(value)
except ValueError:
pass
# See if it could be a float.
try:
return float(value)
except ValueError:
# Just return it as a string.
return value
else:
raise ValueError(f"cannot infer type of {value}")
# pylint: enable=inconsistent-return-statements

def unflatten(flat_dict: Dict[str, Any]) -> Dict[str, Any]:
"""
Expand Down Expand Up @@ -259,18 +299,23 @@ def pop_choice(self, key: str, choices: List[Any], default_to_first_choice: bool
raise ConfigurationError(message)
return value

def as_dict(self, quiet=False):
def as_dict(self, quiet: bool = False, infer_type_and_cast: bool = False):
"""
Sometimes we need to just represent the parameters as a dict, for instance when we pass
them to a Keras layer(so that they can be serialised).
them to PyTorch code.
Parameters
----------
quiet: bool, optional (default = False)
Whether to log the parameters before returning them as a dict.
"""
if infer_type_and_cast:
params_as_dict = infer_and_cast(self.params)
else:
params_as_dict = self.params

if quiet:
return self.params
return params_as_dict

def log_recursively(parameters, history):
for key, value in parameters.items():
Expand All @@ -285,7 +330,7 @@ def log_recursively(parameters, history):
"used subsequently.")
logger.info("CURRENTLY DEFINED PARAMETERS: ")
log_recursively(self.params, self.history)
return self.params
return params_as_dict

def as_flat_dict(self):
"""
Expand Down
26 changes: 25 additions & 1 deletion allennlp/tests/common/params_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import pytest

from allennlp.common.params import Params, unflatten, with_fallback, parse_overrides
from allennlp.common.params import Params, unflatten, with_fallback, parse_overrides, infer_and_cast
from allennlp.common.testing import AllenNlpTestCase


Expand Down Expand Up @@ -314,3 +314,27 @@ def test_to_file(self):
assert json.dumps(expected_ordered_params_dict) == json.dumps(ordered_params_dict)
# check without preference orders doesn't give error
params.to_file(file_path)

def test_infer_and_cast(self):
lots_of_strings = {
"a": ["10", "1.3", "true"],
"b": {"x": 10, "y": "20.1", "z": "other things"},
"c": "just a string"
}

casted = {
"a": [10, 1.3, True],
"b": {"x": 10, "y": 20.1, "z": "other things"},
"c": "just a string"
}

assert infer_and_cast(lots_of_strings) == casted

contains_bad_data = {"x": 10, "y": int}
with pytest.raises(ValueError, match="cannot infer type"):
infer_and_cast(contains_bad_data)

params = Params(lots_of_strings)

assert params.as_dict() == lots_of_strings
assert params.as_dict(infer_type_and_cast=True) == casted
27 changes: 27 additions & 0 deletions allennlp/tests/training/optimizer_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# pylint: disable=invalid-name
import pytest

from allennlp.common.testing import AllenNlpTestCase
from allennlp.data import Vocabulary
from allennlp.common.params import Params
Expand Down Expand Up @@ -75,6 +77,31 @@ def test_optimizer_parameter_groups(self):
assert len(param_groups[2]['params']) == 3


def test_parameter_type_inference(self):
# Should work ok even with lr as a string
optimizer_params = Params({
"type": "sgd",
"lr": "0.1"
})

parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
optimizer = Optimizer.from_params(parameters, optimizer_params)

assert optimizer.defaults["lr"] == 0.1

# But should crash (in the Pytorch code) if we don't do the type inference
optimizer_params = Params({
"type": "sgd",
"lr": "0.1",
"infer_type_and_cast": False
})

parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]

with pytest.raises(TypeError):
optimizer = Optimizer.from_params(parameters, optimizer_params)


class TestDenseSparseAdam(AllenNlpTestCase):

def setUp(self):
Expand Down
9 changes: 8 additions & 1 deletion allennlp/training/optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,14 @@ def from_params(cls, model_parameters: List, params: Params): # type: ignore
else:
num_parameters += parameter_group.numel()
logger.info("Number of trainable parameters: %s", num_parameters)
return Optimizer.by_name(optimizer)(parameter_groups, **params.as_dict()) # type: ignore

# By default we cast things that e.g. look like floats to floats before handing them
# to the Optimizer constructor, but if you want to disable that behavior you could add a
# "infer_type_and_cast": false
# key to your "trainer.optimizer" config.
infer_type_and_cast = params.pop_bool("infer_type_and_cast", True)
params_as_dict = params.as_dict(infer_type_and_cast=infer_type_and_cast)
return Optimizer.by_name(optimizer)(parameter_groups, **params_as_dict) # type: ignore

# We just use the Pytorch optimizers, so here we force them into
# Registry._registry so we can build them from params.
Expand Down

0 comments on commit 71ebcd8

Please sign in to comment.