Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove config key normalization #4742

Merged
merged 2 commits into from Apr 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
73 changes: 30 additions & 43 deletions dask/config.py
Expand Up @@ -9,7 +9,7 @@
except ImportError:
yaml = None

from .compatibility import makedirs, builtins, Mapping, string_types
from .compatibility import makedirs, builtins, Mapping


no_default = '__no_default__'
Expand Down Expand Up @@ -38,6 +38,29 @@
defaults = []


def canonical_name(k, config):
"""Return the canonical name for a key.

Handles user choice of '-' or '_' conventions by standardizing on whichever
version was set first. If a key already exists in either hyphen or
underscore form, the existing version is the canonical name. If neither
version exists the original key is used as is.
"""
try:
if k in config:
return k
except TypeError:
# config is not a mapping, return the same name as provided
return k

altk = k.replace('_', '-') if '_' in k else k.replace('-', '_')

if altk in config:
return altk

return k


def update(old, new, priority='new'):
""" Update a nested dictionary with values from another

Expand Down Expand Up @@ -68,11 +91,10 @@ def update(old, new, priority='new'):
dask.config.merge
"""
for k, v in new.items():
if k not in old and isinstance(v, Mapping):
old[k] = {}
k = canonical_name(k, old)

if isinstance(v, Mapping):
if old[k] is None:
if k not in old or old[k] is None:
old[k] = {}
update(old[k], v, priority=priority)
else:
Expand Down Expand Up @@ -104,38 +126,6 @@ def merge(*dicts):
return result


def normalize_key(key):
""" Replaces underscores with hyphens in string keys

Parameters
----------
key : string, int, or float
Key to assign.
"""
if isinstance(key, string_types):
key = key.replace('_', '-')
return key


def normalize_nested_keys(config):
""" Replaces underscores with hyphens for keys for a nested Mapping

Examples
--------
>>> a = {'x': 1, 'y_1': {'a_2': 2}}
>>> normalize_nested_keys(a)
{'x': 1, 'y-1': {'a-2': 2}}
"""
config_norm = {}
for key, value in config.items():
if isinstance(value, Mapping):
value = normalize_nested_keys(value)
key_norm = normalize_key(key)
config_norm[key_norm] = value

return config_norm


def collect_yaml(paths=paths):
""" Collect configuration from yaml files

Expand Down Expand Up @@ -166,7 +156,6 @@ def collect_yaml(paths=paths):
try:
with open(path) as f:
data = yaml.safe_load(f.read()) or {}
data = normalize_nested_keys(data)
configs.append(data)
except (OSError, IOError):
# Ignore permission errors
Expand All @@ -184,7 +173,6 @@ def collect_env(env=None):

- Lower-cases the key text
- Treats ``__`` (double-underscore) as nested access
- Replaces ``_`` (underscore) with a hyphen.
- Calls ``ast.literal_eval`` on the value
"""
if env is None:
Expand All @@ -193,7 +181,6 @@ def collect_env(env=None):
for name, value in env.items():
if name.startswith('DASK_'):
varname = name[5:].lower().replace('__', '.')
varname = normalize_key(varname)
try:
d[varname] = ast.literal_eval(value)
except (SyntaxError, ValueError):
Expand Down Expand Up @@ -329,7 +316,7 @@ def _assign(cls, keys, value, d, old=None, path=[]):
path: List[str]
Used internally to hold the path of old values
"""
key = normalize_key(keys[0])
key = canonical_name(keys[0], d)
if len(keys) == 1:
if old is not None:
path_key = tuple(path + [key])
Expand Down Expand Up @@ -436,7 +423,7 @@ def get(key, default=no_default, config=config):
keys = key.split('.')
result = config
for k in keys:
k = normalize_key(k)
k = canonical_name(k, result)
try:
result = result[k]
except (TypeError, IndexError, KeyError):
Expand All @@ -452,8 +439,8 @@ def rename(aliases, config=config):

This helps migrate older configuration versions over time
"""
old = list()
new = dict()
old = []
new = {}
for o, n in aliases.items():
value = get(o, None, config=config)
if value is not None:
Expand Down
2 changes: 1 addition & 1 deletion dask/context.py
Expand Up @@ -65,7 +65,7 @@ def globalmethod(default=None, key=None, falsey=None):
class GlobalMethod(object):
def __init__(self, default, key, falsey=None):
self._default = default
self._key = config.normalize_key(key)
self._key = key
self._falsey = falsey

def __get__(self, instance, owner=None):
Expand Down
68 changes: 38 additions & 30 deletions dask/tests/test_config.py
Expand Up @@ -11,10 +11,22 @@
from dask.config import (update, merge, collect, collect_yaml, collect_env,
get, ensure_file, set, config, rename,
update_defaults, refresh, expand_environment_variables,
normalize_key, normalize_nested_keys)
canonical_name)

from dask.utils import tmpfile


def test_canonical_name():
c = {'foo-bar': 1,
'fizz_buzz': 2}
assert canonical_name('foo-bar', c) == 'foo-bar'
assert canonical_name('foo_bar', c) == 'foo-bar'
assert canonical_name('fizz-buzz', c) == 'fizz_buzz'
assert canonical_name('fizz_buzz', c) == 'fizz_buzz'
assert canonical_name('new-key', c) == 'new-key'
assert canonical_name('new_key', c) == 'new_key'


def test_update():
a = {'x': 1, 'y': {'a': 1}}
b = {'x': 2, 'z': 3, 'y': OrderedDict({'b': 2})}
Expand Down Expand Up @@ -134,15 +146,16 @@ def test_env():
}

expected = {
'a-b': 123,
'a_b': 123,
'c': True,
'd': 'hello',
'e': {'x': 123, 'y': 456},
'f': [1, 2, "3"],
'g': '/not/parsable/as/literal',
}

assert collect_env(env) == expected
res = collect_env(env)
assert res == expected


def test_collect():
Expand Down Expand Up @@ -201,7 +214,7 @@ def test_ensure_file(tmpdir):
ensure_file(source=source, destination=dest, comment=False)

with open(destination) as f:
result = yaml.load(f)
result = yaml.safe_load(f)
assert result == a

# don't overwrite old config files
Expand All @@ -211,7 +224,7 @@ def test_ensure_file(tmpdir):
ensure_file(source=source, destination=dest, comment=False)

with open(destination) as f:
result = yaml.load(f)
result = yaml.safe_load(f)
assert result == a

os.remove(destination)
Expand All @@ -224,7 +237,7 @@ def test_ensure_file(tmpdir):
assert '123' in text

with open(destination) as f:
result = yaml.load(f)
result = yaml.safe_load(f)
assert not result


Expand Down Expand Up @@ -343,37 +356,32 @@ def test_expand_environment_variables(inp, out):
del os.environ['FOO']


@pytest.mark.parametrize('inp,out', [
('custom_key', 'custom-key'),
('custom-key', 'custom-key'),
(1, 1),
(2.3, 2.3)
])
def test_normalize_key(inp, out):
assert normalize_key(inp) == out


def test_normalize_nested_keys():
config = {'key_1': 1,
'key_2': {'nested_key_1': 2},
'key_3': 3
}
expected = {'key-1': 1,
'key-2': {'nested-key-1': 2},
'key-3': 3
}
assert normalize_nested_keys(config) == expected


def test_env_var_normalization(monkeypatch):
def test_env_var_canonical_name(monkeypatch):
value = 3
monkeypatch.setenv('DASK_A_B', value)
monkeypatch.setenv('DASK_A_B', str(value))
d = {}
dask.config.refresh(config=d)
assert get('a_b', config=d) == value
assert get('a-b', config=d) == value


def test_get_set_canonical_name():
c = {'x-y': {'a_b': 123}}

keys = ['x_y.a_b', 'x-y.a-b', 'x_y.a-b']
for k in keys:
assert dask.config.get(k, config=c) == 123

with dask.config.set({'x_y': {'a-b': 456}}, config=c):
for k in keys:
assert dask.config.get(k, config=c) == 456

# No change to new keys in sub dicts
with dask.config.set({'x_y': {'a-b': {'c_d': 1}, 'e-f': 2}}, config=c):
assert dask.config.get('x_y.a-b', config=c) == {'c_d': 1}
assert dask.config.get('x_y.e_f', config=c) == 2


@pytest.mark.parametrize('key', ['custom_key', 'custom-key'])
def test_get_set_roundtrip(key):
value = 123
Expand Down
6 changes: 3 additions & 3 deletions docs/source/configuration.rst
Expand Up @@ -126,8 +126,8 @@ resulting in configuration values like the following:
}

Dask searches for all environment variables that start with ``DASK_``, then
transforms keys by converting to lower case, changing double-underscores to
nested structures, and changing single underscores to hyphens.
transforms keys by converting to lower case and changing double-underscores to
nested structures.

Dask tries to parse all values with `ast.literal_eval
<https://docs.python.org/3/library/ast.html#ast.literal_eval>`_, letting users
Expand Down Expand Up @@ -367,7 +367,7 @@ Downstream projects typically follow the following convention:
...)

This process keeps configuration in a central place, but also keeps it safe
within namespaces. It places config files in an easy to access location
within namespaces. It places config files in an easy to access location
by default (``~/.config/dask/\*.yaml``), so that users can easily discover what
they can change, but maintains the actual defaults within the source code, so
that they more closely track changes in the library.
Expand Down