Skip to content

Commit

Permalink
Fix merging of params when empty (#6378)
Browse files Browse the repository at this point in the history
* Fix merging of params when empty

Recent release of python-benedict==0.24.1 started ignoring empty dictionaries
during merge. The way we expected to work was questionable and also how
benedict itself tries to handle the references to empty dictionary is
also questionable.

I consider this to be a proper way to handle this even if this gets fixed
on the benedict side.

* Revert "setup: limit python-benedict version (#6377)"

This reverts commit 15de826.

* add comment for why passing through benedict
  • Loading branch information
skshetry committed Aug 2, 2021
1 parent 76be53b commit 7444696
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 5 deletions.
5 changes: 2 additions & 3 deletions dvc/repo/experiments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,8 +352,7 @@ def _pack_args(self, *args, **kwargs):

def _update_params(self, params: dict):
"""Update experiment params files with the specified values."""
from benedict import benedict

from dvc.utils.collections import merge_params
from dvc.utils.serialize import MODIFIERS

logger.debug("Using experiment params '%s'", params)
Expand All @@ -363,7 +362,7 @@ def _update_params(self, params: dict):
suffix = path.suffix.lower()
modify_data = MODIFIERS[suffix]
with modify_data(path, fs=self.repo.fs) as data:
benedict(data).merge(params[params_fname], overwrite=True)
merge_params(data, params[params_fname])

# Force params file changes to be staged in git
# Otherwise in certain situations the changes to params file may be
Expand Down
14 changes: 14 additions & 0 deletions dvc/utils/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,20 @@ def chunk_dict(d: Dict[_KT, _VT], size: int = 1) -> List[Dict[_KT, _VT]]:
return [{key: d[key] for key in chunk} for chunk in chunks(size, d)]


def merge_params(src: Dict, to_update: Dict) -> Dict:
"""Recursively merges params with benedict's syntax support in-place."""
from benedict import benedict

if src:
benedict(src).merge(to_update, overwrite=True)
else:
# benedict has issues keeping references to an empty dictionary
# see: https://github.com/iterative/dvc/issues/6374.
# Also, passing to_update through benedict to expand the syntax.
src.update(benedict(to_update))
return src


class _NamespacedDict(dict):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def run(self):
"shtab>=1.3.4,<2",
"rich>=10.0.0",
"dictdiffer>=0.8.1",
"python-benedict>=0.21.1,<0.24.1",
"python-benedict>=0.21.1",
"pyparsing==2.4.7",
"typing_extensions>=3.7.4",
"fsspec>=2021.7.0",
Expand Down
76 changes: 75 additions & 1 deletion tests/unit/utils/test_collections.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# pylint: disable=unidiomatic-typecheck
import pytest
from mock import create_autospec

from dvc.utils.collections import apply_diff, chunk_dict, validate
from dvc.utils.collections import (
apply_diff,
chunk_dict,
merge_params,
validate,
)


class MyDict(dict):
Expand Down Expand Up @@ -135,3 +141,71 @@ def none_filter(result):
result = func()
test_func.assert_called_once()
assert result == [1, 2]


@pytest.mark.parametrize(
"changes, expected",
[
[{"foo": "baz"}, {"foo": "baz", "goo": {"bag": 3}, "lorem": False}],
[
{"foo": "baz", "goo": "bar"},
{"foo": "baz", "goo": "bar", "lorem": False},
],
[
{"goo.bag": 4},
{"foo": {"bar": 1, "baz": 2}, "goo": {"bag": 4}, "lorem": False},
],
[
{"foo[0]": "bar"},
{
"foo": {"bar": 1, "baz": 2, 0: "bar"},
"goo": {"bag": 3},
"lorem": False,
},
],
[
{"foo[1].baz": 3},
{
"foo": {"bar": 1, "baz": 2, 1: {"baz": 3}},
"goo": {"bag": 3},
"lorem": False,
},
],
[
{"foo[1]": ["baz", "goo"]},
{
"foo": {"bar": 1, "baz": 2, 1: ["baz", "goo"]},
"goo": {"bag": 3},
"lorem": False,
},
],
[
{"lorem.ipsum": 3},
{
"foo": {"bar": 1, "baz": 2},
"goo": {"bag": 3},
"lorem": {"ipsum": 3},
},
],
[{}, {"foo": {"bar": 1, "baz": 2}, "goo": {"bag": 3}, "lorem": False}],
],
)
def test_merge_params(changes, expected):
params = {"foo": {"bar": 1, "baz": 2}, "goo": {"bag": 3}, "lorem": False}
merged = merge_params(params, changes)
assert merged == expected == params
assert params is merged # references should be preserved


@pytest.mark.parametrize(
"changes, expected",
[
[{"foo": "baz"}, {"foo": "baz"}],
[{"foo": "baz", "goo": "bar"}, {"foo": "baz", "goo": "bar"}],
],
)
def test_merge_params_on_empty_src(changes, expected):
params = {}
merged = merge_params(params, changes)
assert merged == expected == params
assert params is merged # references should be preserved

0 comments on commit 7444696

Please sign in to comment.