diff --git a/dask/config.py b/dask/config.py index d2bee117855..c9eabf08f89 100644 --- a/dask/config.py +++ b/dask/config.py @@ -9,7 +9,7 @@ except ImportError: yaml = None -from .compatibility import makedirs, builtins, Mapping, string_types +from .compatibility import makedirs, builtins, Mapping no_default = '__no_default__' @@ -38,6 +38,29 @@ defaults = [] +def canonical_name(k, config): + """Return the canonical name for a key. + + Handles user choice of '-' or '_' conventions by standardizing on whichever + version was set first. If a key already exists in either hyphen or + underscore form, the existing version is the canonical name. If neither + version exists the original key is used as is. + """ + try: + if k in config: + return k + except TypeError: + # config is not a mapping, return the same name as provided + return k + + altk = k.replace('_', '-') if '_' in k else k.replace('-', '_') + + if altk in config: + return altk + + return k + + def update(old, new, priority='new'): """ Update a nested dictionary with values from another @@ -68,11 +91,10 @@ def update(old, new, priority='new'): dask.config.merge """ for k, v in new.items(): - if k not in old and isinstance(v, Mapping): - old[k] = {} + k = canonical_name(k, old) if isinstance(v, Mapping): - if old[k] is None: + if k not in old or old[k] is None: old[k] = {} update(old[k], v, priority=priority) else: @@ -104,38 +126,6 @@ def merge(*dicts): return result -def normalize_key(key): - """ Replaces underscores with hyphens in string keys - - Parameters - ---------- - key : string, int, or float - Key to assign. - """ - if isinstance(key, string_types): - key = key.replace('_', '-') - return key - - -def normalize_nested_keys(config): - """ Replaces underscores with hyphens for keys for a nested Mapping - - Examples - -------- - >>> a = {'x': 1, 'y_1': {'a_2': 2}} - >>> normalize_nested_keys(a) - {'x': 1, 'y-1': {'a-2': 2}} - """ - config_norm = {} - for key, value in config.items(): - if isinstance(value, Mapping): - value = normalize_nested_keys(value) - key_norm = normalize_key(key) - config_norm[key_norm] = value - - return config_norm - - def collect_yaml(paths=paths): """ Collect configuration from yaml files @@ -166,7 +156,6 @@ def collect_yaml(paths=paths): try: with open(path) as f: data = yaml.safe_load(f.read()) or {} - data = normalize_nested_keys(data) configs.append(data) except (OSError, IOError): # Ignore permission errors @@ -184,7 +173,6 @@ def collect_env(env=None): - Lower-cases the key text - Treats ``__`` (double-underscore) as nested access - - Replaces ``_`` (underscore) with a hyphen. - Calls ``ast.literal_eval`` on the value """ if env is None: @@ -193,7 +181,6 @@ def collect_env(env=None): for name, value in env.items(): if name.startswith('DASK_'): varname = name[5:].lower().replace('__', '.') - varname = normalize_key(varname) try: d[varname] = ast.literal_eval(value) except (SyntaxError, ValueError): @@ -329,7 +316,7 @@ def _assign(cls, keys, value, d, old=None, path=[]): path: List[str] Used internally to hold the path of old values """ - key = normalize_key(keys[0]) + key = canonical_name(keys[0], d) if len(keys) == 1: if old is not None: path_key = tuple(path + [key]) @@ -436,7 +423,7 @@ def get(key, default=no_default, config=config): keys = key.split('.') result = config for k in keys: - k = normalize_key(k) + k = canonical_name(k, result) try: result = result[k] except (TypeError, IndexError, KeyError): @@ -452,8 +439,8 @@ def rename(aliases, config=config): This helps migrate older configuration versions over time """ - old = list() - new = dict() + old = [] + new = {} for o, n in aliases.items(): value = get(o, None, config=config) if value is not None: diff --git a/dask/context.py b/dask/context.py index ee49847c803..633de262eb0 100644 --- a/dask/context.py +++ b/dask/context.py @@ -65,7 +65,7 @@ def globalmethod(default=None, key=None, falsey=None): class GlobalMethod(object): def __init__(self, default, key, falsey=None): self._default = default - self._key = config.normalize_key(key) + self._key = key self._falsey = falsey def __get__(self, instance, owner=None): diff --git a/dask/tests/test_config.py b/dask/tests/test_config.py index 18d946bda28..4c4ecf5f27d 100644 --- a/dask/tests/test_config.py +++ b/dask/tests/test_config.py @@ -11,10 +11,22 @@ from dask.config import (update, merge, collect, collect_yaml, collect_env, get, ensure_file, set, config, rename, update_defaults, refresh, expand_environment_variables, - normalize_key, normalize_nested_keys) + canonical_name) + from dask.utils import tmpfile +def test_canonical_name(): + c = {'foo-bar': 1, + 'fizz_buzz': 2} + assert canonical_name('foo-bar', c) == 'foo-bar' + assert canonical_name('foo_bar', c) == 'foo-bar' + assert canonical_name('fizz-buzz', c) == 'fizz_buzz' + assert canonical_name('fizz_buzz', c) == 'fizz_buzz' + assert canonical_name('new-key', c) == 'new-key' + assert canonical_name('new_key', c) == 'new_key' + + def test_update(): a = {'x': 1, 'y': {'a': 1}} b = {'x': 2, 'z': 3, 'y': OrderedDict({'b': 2})} @@ -134,7 +146,7 @@ def test_env(): } expected = { - 'a-b': 123, + 'a_b': 123, 'c': True, 'd': 'hello', 'e': {'x': 123, 'y': 456}, @@ -142,7 +154,8 @@ def test_env(): 'g': '/not/parsable/as/literal', } - assert collect_env(env) == expected + res = collect_env(env) + assert res == expected def test_collect(): @@ -201,7 +214,7 @@ def test_ensure_file(tmpdir): ensure_file(source=source, destination=dest, comment=False) with open(destination) as f: - result = yaml.load(f) + result = yaml.safe_load(f) assert result == a # don't overwrite old config files @@ -211,7 +224,7 @@ def test_ensure_file(tmpdir): ensure_file(source=source, destination=dest, comment=False) with open(destination) as f: - result = yaml.load(f) + result = yaml.safe_load(f) assert result == a os.remove(destination) @@ -224,7 +237,7 @@ def test_ensure_file(tmpdir): assert '123' in text with open(destination) as f: - result = yaml.load(f) + result = yaml.safe_load(f) assert not result @@ -343,37 +356,32 @@ def test_expand_environment_variables(inp, out): del os.environ['FOO'] -@pytest.mark.parametrize('inp,out', [ - ('custom_key', 'custom-key'), - ('custom-key', 'custom-key'), - (1, 1), - (2.3, 2.3) -]) -def test_normalize_key(inp, out): - assert normalize_key(inp) == out - - -def test_normalize_nested_keys(): - config = {'key_1': 1, - 'key_2': {'nested_key_1': 2}, - 'key_3': 3 - } - expected = {'key-1': 1, - 'key-2': {'nested-key-1': 2}, - 'key-3': 3 - } - assert normalize_nested_keys(config) == expected - - -def test_env_var_normalization(monkeypatch): +def test_env_var_canonical_name(monkeypatch): value = 3 - monkeypatch.setenv('DASK_A_B', value) + monkeypatch.setenv('DASK_A_B', str(value)) d = {} dask.config.refresh(config=d) assert get('a_b', config=d) == value assert get('a-b', config=d) == value +def test_get_set_canonical_name(): + c = {'x-y': {'a_b': 123}} + + keys = ['x_y.a_b', 'x-y.a-b', 'x_y.a-b'] + for k in keys: + assert dask.config.get(k, config=c) == 123 + + with dask.config.set({'x_y': {'a-b': 456}}, config=c): + for k in keys: + assert dask.config.get(k, config=c) == 456 + + # No change to new keys in sub dicts + with dask.config.set({'x_y': {'a-b': {'c_d': 1}, 'e-f': 2}}, config=c): + assert dask.config.get('x_y.a-b', config=c) == {'c_d': 1} + assert dask.config.get('x_y.e_f', config=c) == 2 + + @pytest.mark.parametrize('key', ['custom_key', 'custom-key']) def test_get_set_roundtrip(key): value = 123 diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index a975f0a5a72..dfec3774302 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -126,8 +126,8 @@ resulting in configuration values like the following: } Dask searches for all environment variables that start with ``DASK_``, then -transforms keys by converting to lower case, changing double-underscores to -nested structures, and changing single underscores to hyphens. +transforms keys by converting to lower case and changing double-underscores to +nested structures. Dask tries to parse all values with `ast.literal_eval `_, letting users @@ -367,7 +367,7 @@ Downstream projects typically follow the following convention: ...) This process keeps configuration in a central place, but also keeps it safe -within namespaces. It places config files in an easy to access location +within namespaces. It places config files in an easy to access location by default (``~/.config/dask/\*.yaml``), so that users can easily discover what they can change, but maintains the actual defaults within the source code, so that they more closely track changes in the library.