Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix merging of params when empty #6378

Merged
merged 3 commits into from
Aug 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 2 additions & 3 deletions dvc/repo/experiments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,8 +352,7 @@ def _pack_args(self, *args, **kwargs):

def _update_params(self, params: dict):
"""Update experiment params files with the specified values."""
from benedict import benedict

from dvc.utils.collections import merge_params
from dvc.utils.serialize import MODIFIERS

logger.debug("Using experiment params '%s'", params)
Expand All @@ -363,7 +362,7 @@ def _update_params(self, params: dict):
suffix = path.suffix.lower()
modify_data = MODIFIERS[suffix]
with modify_data(path, fs=self.repo.fs) as data:
benedict(data).merge(params[params_fname], overwrite=True)
merge_params(data, params[params_fname])

# Force params file changes to be staged in git
# Otherwise in certain situations the changes to params file may be
Expand Down
14 changes: 14 additions & 0 deletions dvc/utils/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,20 @@ def chunk_dict(d: Dict[_KT, _VT], size: int = 1) -> List[Dict[_KT, _VT]]:
return [{key: d[key] for key in chunk} for chunk in chunks(size, d)]


def merge_params(src: Dict, to_update: Dict) -> Dict:
"""Recursively merges params with benedict's syntax support in-place."""
from benedict import benedict

if src:
benedict(src).merge(to_update, overwrite=True)
else:
# benedict has issues keeping references to an empty dictionary
# see: https://github.com/iterative/dvc/issues/6374.
# Also, passing to_update through benedict to expand the syntax.
src.update(benedict(to_update))
return src


class _NamespacedDict(dict):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def run(self):
"shtab>=1.3.4,<2",
"rich>=10.0.0",
"dictdiffer>=0.8.1",
"python-benedict>=0.21.1,<0.24.1",
"python-benedict>=0.21.1",
"pyparsing==2.4.7",
"typing_extensions>=3.7.4",
"fsspec>=2021.7.0",
Expand Down
76 changes: 75 additions & 1 deletion tests/unit/utils/test_collections.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# pylint: disable=unidiomatic-typecheck
import pytest
from mock import create_autospec

from dvc.utils.collections import apply_diff, chunk_dict, validate
from dvc.utils.collections import (
apply_diff,
chunk_dict,
merge_params,
validate,
)


class MyDict(dict):
Expand Down Expand Up @@ -135,3 +141,71 @@ def none_filter(result):
result = func()
test_func.assert_called_once()
assert result == [1, 2]


@pytest.mark.parametrize(
"changes, expected",
[
[{"foo": "baz"}, {"foo": "baz", "goo": {"bag": 3}, "lorem": False}],
[
{"foo": "baz", "goo": "bar"},
{"foo": "baz", "goo": "bar", "lorem": False},
],
[
{"goo.bag": 4},
{"foo": {"bar": 1, "baz": 2}, "goo": {"bag": 4}, "lorem": False},
],
[
{"foo[0]": "bar"},
{
"foo": {"bar": 1, "baz": 2, 0: "bar"},
"goo": {"bag": 3},
"lorem": False,
},
],
[
{"foo[1].baz": 3},
{
"foo": {"bar": 1, "baz": 2, 1: {"baz": 3}},
"goo": {"bag": 3},
"lorem": False,
},
],
[
{"foo[1]": ["baz", "goo"]},
{
"foo": {"bar": 1, "baz": 2, 1: ["baz", "goo"]},
"goo": {"bag": 3},
"lorem": False,
},
],
[
{"lorem.ipsum": 3},
{
"foo": {"bar": 1, "baz": 2},
"goo": {"bag": 3},
"lorem": {"ipsum": 3},
},
],
[{}, {"foo": {"bar": 1, "baz": 2}, "goo": {"bag": 3}, "lorem": False}],
],
)
def test_merge_params(changes, expected):
params = {"foo": {"bar": 1, "baz": 2}, "goo": {"bag": 3}, "lorem": False}
merged = merge_params(params, changes)
assert merged == expected == params
assert params is merged # references should be preserved


@pytest.mark.parametrize(
"changes, expected",
[
[{"foo": "baz"}, {"foo": "baz"}],
[{"foo": "baz", "goo": "bar"}, {"foo": "baz", "goo": "bar"}],
],
)
def test_merge_params_on_empty_src(changes, expected):
params = {}
merged = merge_params(params, changes)
assert merged == expected == params
assert params is merged # references should be preserved