From 3d133186d4ec0095f103dc2a9fdcf0ad7881800b Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 00:48:10 -0600 Subject: [PATCH 01/31] summon: ugly hack --- dvc/api.py | 37 +++++++++++++++++++++++++++++++++++++ tests/func/test_api.py | 23 +++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/dvc/api.py b/dvc/api.py index bf976dcf01..ac64b3dcaa 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -1,4 +1,5 @@ import os +import importlib.util from contextlib import contextmanager try: @@ -6,6 +7,8 @@ except ImportError: from contextlib import GeneratorContextManager as GCM +import ruamel.yaml + from dvc.utils.compat import urlparse from dvc.repo import Repo from dvc.external_repo import external_repo @@ -69,3 +72,37 @@ def _make_repo(repo_url, rev=None): else: with external_repo(url=repo_url, rev=rev) as repo: yield repo + + +def summon(name, repo=None, rev=None): + # 1. Read artifacts.yaml + # 2. Pull dependencies + # 3. Get the call and parameters + # 4. Invoke the call with the given parameters + # 5. Return the result + + with _make_repo(repo, rev=rev) as _repo: + artifacts_path = os.path.join(_repo.root_dir, "artifacts.yaml") + + with _repo.open(artifacts_path, mode="r") as fd: + artifacts = ruamel.yaml.load(fd.read()).get("artifacts") + + artifact = next(x for x in artifacts if x.get("name") == name) + + spec = importlib.util.spec_from_file_location( + artifact.get("call"), + os.path.join(_repo.root_dir, artifact.get("file")), + ) + + module = importlib.util.module_from_spec(spec) + + # ugly af, don't use exec / global, pls + call = 'global result; result = {method}({params})'.format( + method=artifact.get("call"), + params=", ".join(k + "=" + v for k, v in artifact.get("params").items()) + ) + + spec.loader.exec_module(module) + exec(call) + global result + return result diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 54623c080d..7be749e02c 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -3,6 +3,7 @@ import os import shutil +import ruamel.yaml import pytest from dvc import api @@ -126,3 +127,25 @@ def test_open_not_cached(dvc): os.remove(metric_file) with pytest.raises(FileMissingError): api.read(metric_file) + + +def test_summon(tmp_dir, erepo_dir, dvc): + artifacts_yaml = ruamel.yaml.dump({ + "artifacts": [{ + "name": "sum", + "description": "The sum of 1 + 2", + "call": "module.sum", + "file": "module.py", + "params": {"x": "1", "y": "2"}, + "deps": ["foo"], + }] + }) + + erepo_dir.gen("module.py", "def sum(x, y): return x + y") + erepo_dir.gen("artifacts.yaml", artifacts_yaml) + erepo_dir.scm.add(["module.py", "artifacts.yaml"]) + erepo_dir.scm.commit("Add module.py and artifacts.yaml") + + repo_url = "file://{}".format(erepo_dir) + + assert api.summon("sum", repo=repo_url) == 3 From b921730f37497ce08628867489724220dfa3272e Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 13:31:52 -0600 Subject: [PATCH 02/31] summon: safely get a module; change naming --- dvc/api.py | 33 ++++++++++++--------------------- tests/func/test_api.py | 36 ++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index ac64b3dcaa..ffb26cd90e 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -75,34 +75,25 @@ def _make_repo(repo_url, rev=None): def summon(name, repo=None, rev=None): - # 1. Read artifacts.yaml - # 2. Pull dependencies + # 1. Read grimoire.yaml + # 2. Pull dependencies (TODO) # 3. Get the call and parameters # 4. Invoke the call with the given parameters # 5. Return the result with _make_repo(repo, rev=rev) as _repo: - artifacts_path = os.path.join(_repo.root_dir, "artifacts.yaml") + grimoire_path = os.path.join(_repo.root_dir, "grimoire.yaml") - with _repo.open(artifacts_path, mode="r") as fd: - artifacts = ruamel.yaml.load(fd.read()).get("artifacts") - - artifact = next(x for x in artifacts if x.get("name") == name) - - spec = importlib.util.spec_from_file_location( - artifact.get("call"), - os.path.join(_repo.root_dir, artifact.get("file")), - ) + with open(grimoire_path, "r") as fobj: + spells = ruamel.yaml.load(fobj.read()).get("spells") + spell = next(spell for spell in spells if spell.get("name") == name) + spell_path = os.path.join(_repo.root_dir, spell.get("file")) + spec = importlib.util.spec_from_file_location(name, spell_path) module = importlib.util.module_from_spec(spec) - # ugly af, don't use exec / global, pls - call = 'global result; result = {method}({params})'.format( - method=artifact.get("call"), - params=", ".join(k + "=" + v for k, v in artifact.get("params").items()) - ) - spec.loader.exec_module(module) - exec(call) - global result - return result + + method = getattr(module, spell.get("method")) + + return method(**spell.get("params")) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 7be749e02c..b4cdcb2646 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -130,22 +130,26 @@ def test_open_not_cached(dvc): def test_summon(tmp_dir, erepo_dir, dvc): - artifacts_yaml = ruamel.yaml.dump({ - "artifacts": [{ - "name": "sum", - "description": "The sum of 1 + 2", - "call": "module.sum", - "file": "module.py", - "params": {"x": "1", "y": "2"}, - "deps": ["foo"], - }] - }) - - erepo_dir.gen("module.py", "def sum(x, y): return x + y") - erepo_dir.gen("artifacts.yaml", artifacts_yaml) - erepo_dir.scm.add(["module.py", "artifacts.yaml"]) - erepo_dir.scm.commit("Add module.py and artifacts.yaml") + grimoire_yaml = ruamel.yaml.dump( + { + "spells": [ + { + "name": "three", + "description": "The sum of 1 + 2", + "file": "calculator.py", + "method": "sum", + "params": {"x": 1, "y": 2}, + "deps": ["foo"], + } + ] + } + ) + + erepo_dir.gen("calculator.py", "def sum(x, y): return x + y") + erepo_dir.gen("grimoire.yaml", grimoire_yaml) + erepo_dir.scm.add(["calculator.py", "grimoire.yaml"]) + erepo_dir.scm.commit("Add calculator.py and grimoire.yaml") repo_url = "file://{}".format(erepo_dir) - assert api.summon("sum", repo=repo_url) == 3 + assert api.summon("three", repo=repo_url) == 3 From 789db9169ac697fad73df75d3ae0569e2e63050d Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 16:40:21 -0600 Subject: [PATCH 03/31] summon: support checking out dependencies --- dvc/api.py | 27 +++++++++++++++++++++------ tests/func/test_api.py | 32 +++++++++++++++++++++----------- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index ffb26cd90e..390ea029d4 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -76,7 +76,7 @@ def _make_repo(repo_url, rev=None): def summon(name, repo=None, rev=None): # 1. Read grimoire.yaml - # 2. Pull dependencies (TODO) + # 2. Pull dependencies # 3. Get the call and parameters # 4. Invoke the call with the given parameters # 5. Return the result @@ -85,15 +85,30 @@ def summon(name, repo=None, rev=None): grimoire_path = os.path.join(_repo.root_dir, "grimoire.yaml") with open(grimoire_path, "r") as fobj: - spells = ruamel.yaml.load(fobj.read()).get("spells") + spells = ruamel.yaml.safe_load(fobj.read()).get("spells") + spell = next(x for x in spells if x.get("name") == name) + + outs = [ + _repo.find_outs_by_path(os.path.join(_repo.root_dir, dep))[0] + for dep in spell.get("deps") + ] + + with _repo.state: + for out in outs: + _repo.cloud.pull(out.get_used_cache()) + out.checkout() + + previous_dir = os.path.abspath(os.curdir) + + os.chdir(_repo.root_dir) - spell = next(spell for spell in spells if spell.get("name") == name) spell_path = os.path.join(_repo.root_dir, spell.get("file")) spec = importlib.util.spec_from_file_location(name, spell_path) module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - method = getattr(module, spell.get("method")) + result = method(**spell.get("params")) + + os.chdir(previous_dir) - return method(**spell.get("params")) + return result diff --git a/tests/func/test_api.py b/tests/func/test_api.py index b4cdcb2646..f640dfd218 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -11,6 +11,7 @@ from dvc.main import main from dvc.path_info import URLInfo from dvc.remote.config import RemoteConfig +from dvc.utils.compat import fspath from tests.remotes import Azure, GCP, HDFS, Local, OSS, S3, SSH @@ -129,27 +130,36 @@ def test_open_not_cached(dvc): api.read(metric_file) -def test_summon(tmp_dir, erepo_dir, dvc): +def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): grimoire_yaml = ruamel.yaml.dump( { "spells": [ { - "name": "three", - "description": "The sum of 1 + 2", + "name": "sum", + "description": "Add to ", "file": "calculator.py", - "method": "sum", - "params": {"x": 1, "y": 2}, - "deps": ["foo"], + "method": "add_to_num", + "params": {"x": 1}, + "deps": ["number"], } ] } ) - erepo_dir.gen("calculator.py", "def sum(x, y): return x + y") - erepo_dir.gen("grimoire.yaml", grimoire_yaml) - erepo_dir.scm.add(["calculator.py", "grimoire.yaml"]) - erepo_dir.scm.commit("Add calculator.py and grimoire.yaml") + # XXX: Chdir because `stage.create` defaults to `wdir = os.curdir` + with monkeypatch.context() as m: + m.chdir(fspath(erepo_dir)) + + erepo_dir.dvc_gen("number", "100") + + erepo_dir.scm_gen("grimoire.yaml", grimoire_yaml) + erepo_dir.scm_gen( + "calculator.py", + "def add_to_num(x): return x + int(open('number').read())", + ) + erepo_dir.scm.add(["number.dvc", "calculator.py", "grimoire.yaml"]) + erepo_dir.scm.commit("Add files") repo_url = "file://{}".format(erepo_dir) - assert api.summon("three", repo=repo_url) == 3 + assert api.summon("sum", repo=repo_url) == 101 From d0344ce975fe97732b859605c873ec4c68924b86 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 17:00:39 -0600 Subject: [PATCH 04/31] summon: grimoire/spells -> artifacts --- dvc/api.py | 20 ++++++++++---------- tests/func/test_api.py | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 390ea029d4..957fdf9a09 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -75,22 +75,22 @@ def _make_repo(repo_url, rev=None): def summon(name, repo=None, rev=None): - # 1. Read grimoire.yaml + # 1. Read artifacts.yaml # 2. Pull dependencies # 3. Get the call and parameters # 4. Invoke the call with the given parameters # 5. Return the result with _make_repo(repo, rev=rev) as _repo: - grimoire_path = os.path.join(_repo.root_dir, "grimoire.yaml") + artifacts_path = os.path.join(_repo.root_dir, "artifacts.yaml") - with open(grimoire_path, "r") as fobj: - spells = ruamel.yaml.safe_load(fobj.read()).get("spells") - spell = next(x for x in spells if x.get("name") == name) + with open(artifacts_path, "r") as fobj: + artifacts = ruamel.yaml.safe_load(fobj.read()).get("artifacts") + artifact = next(x for x in artifacts if x.get("name") == name) outs = [ _repo.find_outs_by_path(os.path.join(_repo.root_dir, dep))[0] - for dep in spell.get("deps") + for dep in artifact.get("deps") ] with _repo.state: @@ -102,12 +102,12 @@ def summon(name, repo=None, rev=None): os.chdir(_repo.root_dir) - spell_path = os.path.join(_repo.root_dir, spell.get("file")) - spec = importlib.util.spec_from_file_location(name, spell_path) + artifact_path = os.path.join(_repo.root_dir, artifact.get("file")) + spec = importlib.util.spec_from_file_location(name, artifact_path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) - method = getattr(module, spell.get("method")) - result = method(**spell.get("params")) + method = getattr(module, artifact.get("method")) + result = method(**artifact.get("params")) os.chdir(previous_dir) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index f640dfd218..e424db02c0 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -131,9 +131,9 @@ def test_open_not_cached(dvc): def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): - grimoire_yaml = ruamel.yaml.dump( + artifacts_yaml = ruamel.yaml.dump( { - "spells": [ + "artifacts": [ { "name": "sum", "description": "Add to ", @@ -152,12 +152,12 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): erepo_dir.dvc_gen("number", "100") - erepo_dir.scm_gen("grimoire.yaml", grimoire_yaml) + erepo_dir.scm_gen("artifacts.yaml", artifacts_yaml) erepo_dir.scm_gen( "calculator.py", "def add_to_num(x): return x + int(open('number').read())", ) - erepo_dir.scm.add(["number.dvc", "calculator.py", "grimoire.yaml"]) + erepo_dir.scm.add(["number.dvc", "calculator.py", "artifacts.yaml"]) erepo_dir.scm.commit("Add files") repo_url = "file://{}".format(erepo_dir) From 5d66a572d4a1f2f87a5008b3616c63455f277bcd Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 17:20:07 -0600 Subject: [PATCH 05/31] summon: support overriding parameters --- dvc/api.py | 9 +++++++-- tests/func/test_api.py | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 957fdf9a09..3628e9fdf0 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -74,7 +74,7 @@ def _make_repo(repo_url, rev=None): yield repo -def summon(name, repo=None, rev=None): +def summon(name, params=None, repo=None, rev=None): # 1. Read artifacts.yaml # 2. Pull dependencies # 3. Get the call and parameters @@ -107,7 +107,12 @@ def summon(name, repo=None, rev=None): module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) method = getattr(module, artifact.get("method")) - result = method(**artifact.get("params")) + _params = artifact.get("params") + + if params: + _params.update(params) + + result = method(**_params) os.chdir(previous_dir) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index e424db02c0..67eedcf236 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -163,3 +163,4 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): repo_url = "file://{}".format(erepo_dir) assert api.summon("sum", repo=repo_url) == 101 + assert api.summon("sum", repo=repo_url, params={"x": 2}) == 102 From fea3d1400aa33abba177a906bd0d4bbdefd362ec Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 22:15:54 -0600 Subject: [PATCH 06/31] summon: return empty generator for object.deps --- dvc/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/api.py b/dvc/api.py index 3628e9fdf0..f12f55dc41 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -90,7 +90,7 @@ def summon(name, params=None, repo=None, rev=None): outs = [ _repo.find_outs_by_path(os.path.join(_repo.root_dir, dep))[0] - for dep in artifact.get("deps") + for dep in artifact.get("deps", ()) ] with _repo.state: From 4ce1113ccfdcabf27418becec0610c6c66371c84 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 22:18:37 -0600 Subject: [PATCH 07/31] summon: use find_out_by_relpath --- dvc/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/api.py b/dvc/api.py index f12f55dc41..7b4f144e67 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -89,7 +89,7 @@ def summon(name, params=None, repo=None, rev=None): artifact = next(x for x in artifacts if x.get("name") == name) outs = [ - _repo.find_outs_by_path(os.path.join(_repo.root_dir, dep))[0] + _repo.find_out_by_relpath(dep) for dep in artifact.get("deps", ()) ] From 32f35ea686a90ab2d69dc3422d8010d3abcf8a5a Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 22:29:44 -0600 Subject: [PATCH 08/31] summon: remove scm.add, `*_gen` already handles it --- tests/func/test_api.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 67eedcf236..2dc7a1ad1b 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -150,15 +150,13 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): with monkeypatch.context() as m: m.chdir(fspath(erepo_dir)) - erepo_dir.dvc_gen("number", "100") - - erepo_dir.scm_gen("artifacts.yaml", artifacts_yaml) - erepo_dir.scm_gen( - "calculator.py", - "def add_to_num(x): return x + int(open('number').read())", - ) - erepo_dir.scm.add(["number.dvc", "calculator.py", "artifacts.yaml"]) - erepo_dir.scm.commit("Add files") + erepo_dir.dvc_gen("number", "100", commit="Add number.dvc") + erepo_dir.scm_gen("artifacts.yaml", artifacts_yaml) + erepo_dir.scm_gen( + "calculator.py", + "def add_to_num(x): return x + int(open('number').read())", + ) + erepo_dir.scm.commit("Add files") repo_url = "file://{}".format(erepo_dir) From 0a54683f9121b92cd7d8df853c5e6056a0cca06f Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 22:30:22 -0600 Subject: [PATCH 09/31] :nail_care: black --- dvc/api.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 7b4f144e67..492373f959 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -89,8 +89,7 @@ def summon(name, params=None, repo=None, rev=None): artifact = next(x for x in artifacts if x.get("name") == name) outs = [ - _repo.find_out_by_relpath(dep) - for dep in artifact.get("deps", ()) + _repo.find_out_by_relpath(dep) for dep in artifact.get("deps", ()) ] with _repo.state: From 1b3c771df524077606bd8e006d0a9a022c1800d8 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 22:42:42 -0600 Subject: [PATCH 10/31] summon: artifacs.yaml -> dvcsummon.yaml; artifact -> object --- dvc/api.py | 20 +++++++++----------- tests/func/test_api.py | 6 +++--- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 492373f959..bd9488409f 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -75,22 +75,20 @@ def _make_repo(repo_url, rev=None): def summon(name, params=None, repo=None, rev=None): - # 1. Read artifacts.yaml + # 1. Read dvcsummon.yaml # 2. Pull dependencies # 3. Get the call and parameters # 4. Invoke the call with the given parameters # 5. Return the result with _make_repo(repo, rev=rev) as _repo: - artifacts_path = os.path.join(_repo.root_dir, "artifacts.yaml") + dvcsummon_path = os.path.join(_repo.root_dir, "dvcsummon.yaml") - with open(artifacts_path, "r") as fobj: - artifacts = ruamel.yaml.safe_load(fobj.read()).get("artifacts") - artifact = next(x for x in artifacts if x.get("name") == name) + with open(dvcsummon_path, "r") as fobj: + objects = ruamel.yaml.safe_load(fobj.read()).get("objects") + obj = next(x for x in objects if x.get("name") == name) - outs = [ - _repo.find_out_by_relpath(dep) for dep in artifact.get("deps", ()) - ] + outs = [_repo.find_out_by_relpath(dep) for dep in obj.get("deps", ())] with _repo.state: for out in outs: @@ -101,12 +99,12 @@ def summon(name, params=None, repo=None, rev=None): os.chdir(_repo.root_dir) - artifact_path = os.path.join(_repo.root_dir, artifact.get("file")) + artifact_path = os.path.join(_repo.root_dir, obj.get("file")) spec = importlib.util.spec_from_file_location(name, artifact_path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) - method = getattr(module, artifact.get("method")) - _params = artifact.get("params") + method = getattr(module, obj.get("method")) + _params = obj.get("params") if params: _params.update(params) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 2dc7a1ad1b..98a7040ee9 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -131,9 +131,9 @@ def test_open_not_cached(dvc): def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): - artifacts_yaml = ruamel.yaml.dump( + dvcsummon_yaml = ruamel.yaml.dump( { - "artifacts": [ + "objects": [ { "name": "sum", "description": "Add to ", @@ -151,7 +151,7 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): m.chdir(fspath(erepo_dir)) erepo_dir.dvc_gen("number", "100", commit="Add number.dvc") - erepo_dir.scm_gen("artifacts.yaml", artifacts_yaml) + erepo_dir.scm_gen("dvcsummon.yaml", dvcsummon_yaml) erepo_dir.scm_gen( "calculator.py", "def add_to_num(x): return x + int(open('number').read())", From 29339d29a248852ceb39af506da9253ebc62ceb2 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 23:25:45 -0600 Subject: [PATCH 11/31] summon: file & method -> call --- dvc/api.py | 57 ++++++++++++++++++++++++++++++------------ tests/func/test_api.py | 3 +-- 2 files changed, 42 insertions(+), 18 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index bd9488409f..972082acb7 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -1,7 +1,9 @@ +import importlib import os -import importlib.util +import sys from contextlib import contextmanager + try: from contextlib import _GeneratorContextManager as GCM except ImportError: @@ -9,7 +11,7 @@ import ruamel.yaml -from dvc.utils.compat import urlparse +from dvc.utils.compat import urlparse, builtin_str from dvc.repo import Repo from dvc.external_repo import external_repo @@ -95,22 +97,45 @@ def summon(name, params=None, repo=None, rev=None): _repo.cloud.pull(out.get_used_cache()) out.checkout() - previous_dir = os.path.abspath(os.curdir) - - os.chdir(_repo.root_dir) + with _chdir_and_syspath(_repo.root_dir): + call = _import_string(obj.get("call")) + _params = obj.get("params") - artifact_path = os.path.join(_repo.root_dir, obj.get("file")) - spec = importlib.util.spec_from_file_location(name, artifact_path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - method = getattr(module, obj.get("method")) - _params = obj.get("params") + if params: + _params.update(params) - if params: - _params.update(params) + result = call(**_params) - result = method(**_params) + return result - os.chdir(previous_dir) - return result +@contextmanager +def _chdir_and_syspath(path): + old_dir = os.path.abspath(os.curdir) + os.chdir(path) + sys.path.insert(0, path) + yield + sys.path.pop(0) + os.chdir(old_dir) + + +def _import_string(import_name, silent=False): + """Imports an object based on a string. + Useful to delay import to not load everything on startup. + Use dotted notaion in `import_name`, e.g. 'dvc.remote.gs.RemoteGS'. + If the `silent` is True the return value will be `None` if the import + fails. + + :return: imported object + """ + import_name = builtin_str(import_name) + + try: + if "." in import_name: + module, obj = import_name.rsplit(".", 1) + else: + return importlib.import_module(import_name) + return getattr(importlib.import_module(module), obj) + except (ImportError, AttributeError): + if not silent: + raise diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 98a7040ee9..a1c6f9c56a 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -137,8 +137,7 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): { "name": "sum", "description": "Add to ", - "file": "calculator.py", - "method": "add_to_num", + "call": "calculator.add_to_num", "params": {"x": 1}, "deps": ["number"], } From 4273ff49b38ef2bf86a40689eafa403306c76e16 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 23:32:28 -0600 Subject: [PATCH 12/31] summon: params -> args; use `summon` key in yaml --- dvc/api.py | 16 +++++++++------- tests/func/test_api.py | 11 +++++++---- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 972082acb7..b25001c15c 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -76,7 +76,7 @@ def _make_repo(repo_url, rev=None): yield repo -def summon(name, params=None, repo=None, rev=None): +def summon(name, args=None, repo=None, rev=None): # 1. Read dvcsummon.yaml # 2. Pull dependencies # 3. Get the call and parameters @@ -89,8 +89,11 @@ def summon(name, params=None, repo=None, rev=None): with open(dvcsummon_path, "r") as fobj: objects = ruamel.yaml.safe_load(fobj.read()).get("objects") obj = next(x for x in objects if x.get("name") == name) + _summon = obj.get("summon") - outs = [_repo.find_out_by_relpath(dep) for dep in obj.get("deps", ())] + outs = [ + _repo.find_out_by_relpath(dep) for dep in _summon.get("deps", ()) + ] with _repo.state: for out in outs: @@ -98,13 +101,12 @@ def summon(name, params=None, repo=None, rev=None): out.checkout() with _chdir_and_syspath(_repo.root_dir): - call = _import_string(obj.get("call")) - _params = obj.get("params") + call = _import_string(_summon.get("call")) - if params: - _params.update(params) + if args: + _summon.get("args").update(args) - result = call(**_params) + result = call(**_summon.get("args")) return result diff --git a/tests/func/test_api.py b/tests/func/test_api.py index a1c6f9c56a..680888e486 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -137,9 +137,12 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): { "name": "sum", "description": "Add to ", - "call": "calculator.add_to_num", - "params": {"x": 1}, - "deps": ["number"], + "summon": { + "type": "python", + "call": "calculator.add_to_num", + "args": {"x": 1}, + "deps": ["number"], + } } ] } @@ -160,4 +163,4 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): repo_url = "file://{}".format(erepo_dir) assert api.summon("sum", repo=repo_url) == 101 - assert api.summon("sum", repo=repo_url, params={"x": 2}) == 102 + assert api.summon("sum", repo=repo_url, args={"x": 2}) == 102 From ee72280e603eefa1de74e1137082134a2d661097 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 23:34:00 -0600 Subject: [PATCH 13/31] summon: left a comment about chdir and syspath --- dvc/api.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dvc/api.py b/dvc/api.py index b25001c15c..38b324a65f 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -113,6 +113,10 @@ def summon(name, args=None, repo=None, rev=None): @contextmanager def _chdir_and_syspath(path): + # XXX: Some issues with this approach: + # * Not thread safe + # * Import will pollute sys.modules + # * Weird errors if there is a name clash within sys.modules old_dir = os.path.abspath(os.curdir) os.chdir(path) sys.path.insert(0, path) From c852d3316d817d9ecce6ff911868e3d1d824f344 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 23:34:43 -0600 Subject: [PATCH 14/31] :nail_care: black --- tests/func/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 680888e486..0bc1247658 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -142,7 +142,7 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): "call": "calculator.add_to_num", "args": {"x": 1}, "deps": ["number"], - } + }, } ] } From addc56c2325ca2bb31753427562d969e5a51dc41 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Dec 2019 23:38:52 -0600 Subject: [PATCH 15/31] summon: allow different YAML file names --- dvc/api.py | 7 ++++--- tests/func/test_api.py | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 38b324a65f..7f963a4b17 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -76,7 +76,7 @@ def _make_repo(repo_url, rev=None): yield repo -def summon(name, args=None, repo=None, rev=None): +def summon(name, fname=None, args=None, repo=None, rev=None): # 1. Read dvcsummon.yaml # 2. Pull dependencies # 3. Get the call and parameters @@ -84,9 +84,10 @@ def summon(name, args=None, repo=None, rev=None): # 5. Return the result with _make_repo(repo, rev=rev) as _repo: - dvcsummon_path = os.path.join(_repo.root_dir, "dvcsummon.yaml") + fname = fname or "dvcsummon.yaml" + summon_path = os.path.join(_repo.root_dir, fname) - with open(dvcsummon_path, "r") as fobj: + with open(summon_path, "r") as fobj: objects = ruamel.yaml.safe_load(fobj.read()).get("objects") obj = next(x for x in objects if x.get("name") == name) _summon = obj.get("summon") diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 0bc1247658..0b019d42f6 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -154,6 +154,7 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): erepo_dir.dvc_gen("number", "100", commit="Add number.dvc") erepo_dir.scm_gen("dvcsummon.yaml", dvcsummon_yaml) + erepo_dir.scm_gen("non-canon.yaml", dvcsummon_yaml) erepo_dir.scm_gen( "calculator.py", "def add_to_num(x): return x + int(open('number').read())", @@ -164,3 +165,4 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): assert api.summon("sum", repo=repo_url) == 101 assert api.summon("sum", repo=repo_url, args={"x": 2}) == 102 + assert api.summon("sum", repo=repo_url, fname="non-canon.yaml") == 101 From 4ad7cb3961c72c6d26c3c4a044228a4ba4673076 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 23 Dec 2019 23:45:13 -0600 Subject: [PATCH 16/31] summon: define yaml file schema --- dvc/api.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/dvc/api.py b/dvc/api.py index 7f963a4b17..42728229b9 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -2,6 +2,7 @@ import os import sys from contextlib import contextmanager +from voluptuous import Schema try: @@ -16,6 +17,26 @@ from dvc.external_repo import external_repo +summon_schema = Schema( + { + "objects": [ + { + "name": str, + "description": str, + "paper": str, + "metrics": dict, + "summon": { + "type": "python", + "call": str, + "args": dict, + "deps": [str] + }, + } + ], + } +) + + def get_url(path, repo=None, rev=None, remote=None): """Returns an url of a resource specified by path in repo""" with _make_repo(repo, rev=rev) as _repo: From 7ece84b8405190fe8bcc03b7f34134ce0d7ea4ba Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 24 Dec 2019 00:39:24 -0600 Subject: [PATCH 17/31] summon: prevent modifying the yaml args --- dvc/api.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 42728229b9..291c87ce6b 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -1,6 +1,7 @@ import importlib import os import sys +import copy from contextlib import contextmanager from voluptuous import Schema @@ -124,11 +125,12 @@ def summon(name, fname=None, args=None, repo=None, rev=None): with _chdir_and_syspath(_repo.root_dir): call = _import_string(_summon.get("call")) + _args = copy.deepcopy(_summon["args"]) if args: - _summon.get("args").update(args) + _args.update(args) - result = call(**_summon.get("args")) + result = call(**_args) return result From 6fd5eb41853cf5e6a39c29484f8338f9de479c32 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 24 Dec 2019 00:40:07 -0600 Subject: [PATCH 18/31] summon: use slice instead of `get` --- dvc/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 291c87ce6b..23d017427f 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -110,7 +110,7 @@ def summon(name, fname=None, args=None, repo=None, rev=None): summon_path = os.path.join(_repo.root_dir, fname) with open(summon_path, "r") as fobj: - objects = ruamel.yaml.safe_load(fobj.read()).get("objects") + objects = ruamel.yaml.safe_load(fobj.read())["objects"] obj = next(x for x in objects if x.get("name") == name) _summon = obj.get("summon") @@ -124,7 +124,7 @@ def summon(name, fname=None, args=None, repo=None, rev=None): out.checkout() with _chdir_and_syspath(_repo.root_dir): - call = _import_string(_summon.get("call")) + call = _import_string(_summon["call"]) _args = copy.deepcopy(_summon["args"]) if args: From 542f4c3c18f6199d2236bbb491ecf93f30851333 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 24 Dec 2019 00:40:24 -0600 Subject: [PATCH 19/31] summon: wrap chdir_and_syspath in a try/finally --- dvc/api.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 23d017427f..2cdafc0a24 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -111,8 +111,8 @@ def summon(name, fname=None, args=None, repo=None, rev=None): with open(summon_path, "r") as fobj: objects = ruamel.yaml.safe_load(fobj.read())["objects"] - obj = next(x for x in objects if x.get("name") == name) - _summon = obj.get("summon") + obj = next(x for x in objects if x["name"] == name) + _summon = obj["summon"] outs = [ _repo.find_out_by_relpath(dep) for dep in _summon.get("deps", ()) @@ -142,11 +142,14 @@ def _chdir_and_syspath(path): # * Import will pollute sys.modules # * Weird errors if there is a name clash within sys.modules old_dir = os.path.abspath(os.curdir) - os.chdir(path) - sys.path.insert(0, path) - yield - sys.path.pop(0) - os.chdir(old_dir) + + try: + os.chdir(path) + sys.path.insert(0, path) + yield + finally: + sys.path.pop(0) + os.chdir(old_dir) def _import_string(import_name, silent=False): From 7dd340aeeb4382fd4c7542448080ef27047e1421 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 24 Dec 2019 02:13:11 -0600 Subject: [PATCH 20/31] summon: :nail_care: refactoring/reordering --- dvc/api.py | 119 ++++++++++++++++++++++++++++------------------------- 1 file changed, 63 insertions(+), 56 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 2cdafc0a24..44db6aac67 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -3,8 +3,6 @@ import sys import copy from contextlib import contextmanager -from voluptuous import Schema - try: from contextlib import _GeneratorContextManager as GCM @@ -12,32 +10,13 @@ from contextlib import GeneratorContextManager as GCM import ruamel.yaml +from voluptuous import Schema, Invalid -from dvc.utils.compat import urlparse, builtin_str +from dvc.utils.compat import urlparse, builtin_str, FileNotFoundError from dvc.repo import Repo from dvc.external_repo import external_repo -summon_schema = Schema( - { - "objects": [ - { - "name": str, - "description": str, - "paper": str, - "metrics": dict, - "summon": { - "type": "python", - "call": str, - "args": dict, - "deps": [str] - }, - } - ], - } -) - - def get_url(path, repo=None, rev=None, remote=None): """Returns an url of a resource specified by path in repo""" with _make_repo(repo, rev=rev) as _repo: @@ -98,58 +77,86 @@ def _make_repo(repo_url, rev=None): yield repo -def summon(name, fname=None, args=None, repo=None, rev=None): - # 1. Read dvcsummon.yaml - # 2. Pull dependencies - # 3. Get the call and parameters - # 4. Invoke the call with the given parameters - # 5. Return the result - +def summon(name, fname="dvcsummon.yaml", args=None, repo=None, rev=None): with _make_repo(repo, rev=rev) as _repo: - fname = fname or "dvcsummon.yaml" - summon_path = os.path.join(_repo.root_dir, fname) - with open(summon_path, "r") as fobj: - objects = ruamel.yaml.safe_load(fobj.read())["objects"] - obj = next(x for x in objects if x["name"] == name) - _summon = obj["summon"] + def pull_dependencies(deps): + if not deps: + return - outs = [ - _repo.find_out_by_relpath(dep) for dep in _summon.get("deps", ()) - ] + outs = [_repo.find_out_by_relpath(dep) for dep in deps] - with _repo.state: - for out in outs: - _repo.cloud.pull(out.get_used_cache()) - out.checkout() + with _repo.state: + for out in outs: + _repo.cloud.pull(out.get_used_cache()) + out.checkout() - with _chdir_and_syspath(_repo.root_dir): - call = _import_string(_summon["call"]) - _args = copy.deepcopy(_summon["args"]) + path = os.path.join(_repo.root_dir, fname) + obj = _get_object_from_summoners_file(name, path) + info = obj["summon"] - if args: - _args.update(args) + pull_dependencies(info.get("deps")) - result = call(**_args) + _args = copy.deepcopy(info.get("args", {})) + _args.update(args or {}) - return result + return _invoke_method(info["call"], _args, path=_repo.root_dir) -@contextmanager -def _chdir_and_syspath(path): +def _get_object_from_summoners_file(name, path): + """ + Given a summonable object's name, search for it on the given file + and bring it to life. + """ + schema = Schema( + [ + { + "name": str, + "description": str, + "paper": str, + "metrics": dict, + "summon": { + "type": "python", + "call": str, + "args": dict, # XXX: Optional + "deps": [str], # XXX: Optional + }, + } + ] + ) + + with open(path, "r") as fobj: + try: + objects = ruamel.yaml.safe_load(fobj.read())["objects"] + objects = schema(objects) + return next(x for x in objects if x["name"] == name) + except FileNotFoundError: + pass # XXX: No such YAML file with path: '' + except ruamel.yaml.ScannerError: + pass # XXX: Failed to parse YAML correctly + except KeyError: + pass # XXX: YAML file doesn't include the "objects" keyword + except Invalid: + pass # XXX: YAML file dosen't match with the schema + except StopIteration: + pass # XXX: No such object with name: '' + + +def _invoke_method(call, args, path): # XXX: Some issues with this approach: # * Not thread safe # * Import will pollute sys.modules # * Weird errors if there is a name clash within sys.modules - old_dir = os.path.abspath(os.curdir) + cwd = os.path.abspath(os.curdir) try: os.chdir(path) sys.path.insert(0, path) - yield + method = _import_string(call) + return method(**args) finally: - sys.path.pop(0) - os.chdir(old_dir) + os.chdir(cwd) + sys.path.pop() def _import_string(import_name, silent=False): From 1c22cd5af4a307058fe138920bda08ed883f34fa Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 24 Dec 2019 02:20:08 -0600 Subject: [PATCH 21/31] summon: adjust comments --- dvc/api.py | 1 + tests/func/test_api.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/api.py b/dvc/api.py index 44db6aac67..d5909dea57 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -78,6 +78,7 @@ def _make_repo(repo_url, rev=None): def summon(name, fname="dvcsummon.yaml", args=None, repo=None, rev=None): + # TODO: Write a meaningful docstring about `summon` with _make_repo(repo, rev=rev) as _repo: def pull_dependencies(deps): diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 0b019d42f6..39e7837b49 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -148,7 +148,6 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): } ) - # XXX: Chdir because `stage.create` defaults to `wdir = os.curdir` with monkeypatch.context() as m: m.chdir(fspath(erepo_dir)) From d45f07cbecf357575e8a0eedb89afab203f8e684 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 24 Dec 2019 13:16:27 -0600 Subject: [PATCH 22/31] summon: make schema more consistent * Mark optional keywords * Allow extra entries * Include objects into the schema --- dvc/api.py | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index d5909dea57..f85141470a 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -10,13 +10,32 @@ from contextlib import GeneratorContextManager as GCM import ruamel.yaml -from voluptuous import Schema, Invalid +from voluptuous import Schema, Optional, Invalid, ALLOW_EXTRA from dvc.utils.compat import urlparse, builtin_str, FileNotFoundError from dvc.repo import Repo from dvc.external_repo import external_repo +summon_schema = Schema( + { + "objects": [ + { + "name": str, + "summon": { + "type": "python", + "call": str, + Optional("args"): dict, + Optional("deps"): [str], + }, + } + ] + }, + required=True, + extra=ALLOW_EXTRA +) + + def get_url(path, repo=None, rev=None, remote=None): """Returns an url of a resource specified by path in repo""" with _make_repo(repo, rev=rev) as _repo: @@ -109,34 +128,15 @@ def _get_object_from_summoners_file(name, path): Given a summonable object's name, search for it on the given file and bring it to life. """ - schema = Schema( - [ - { - "name": str, - "description": str, - "paper": str, - "metrics": dict, - "summon": { - "type": "python", - "call": str, - "args": dict, # XXX: Optional - "deps": [str], # XXX: Optional - }, - } - ] - ) - with open(path, "r") as fobj: try: - objects = ruamel.yaml.safe_load(fobj.read())["objects"] - objects = schema(objects) + content = summon_schema(ruamel.yaml.safe_load(fobj.read())) + objects = content["objects"] return next(x for x in objects if x["name"] == name) except FileNotFoundError: pass # XXX: No such YAML file with path: '' except ruamel.yaml.ScannerError: pass # XXX: Failed to parse YAML correctly - except KeyError: - pass # XXX: YAML file doesn't include the "objects" keyword except Invalid: pass # XXX: YAML file dosen't match with the schema except StopIteration: From fbb7aebd310f606817e5d0179be8675ec848cc14 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 24 Dec 2019 19:50:43 -0600 Subject: [PATCH 23/31] tests/summon: use different args for non-canon.yml --- tests/func/test_api.py | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 39e7837b49..49c09ed6e2 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -2,6 +2,7 @@ import os import shutil +import copy import ruamel.yaml import pytest @@ -131,29 +132,33 @@ def test_open_not_cached(dvc): def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): - dvcsummon_yaml = ruamel.yaml.dump( - { - "objects": [ - { - "name": "sum", - "description": "Add to ", - "summon": { - "type": "python", - "call": "calculator.add_to_num", - "args": {"x": 1}, - "deps": ["number"], - }, - } - ] - } - ) + objects = { + "objects": [ + { + "name": "sum", + "description": "Add to ", + "summon": { + "type": "python", + "call": "calculator.add_to_num", + "args": {"x": 1}, + "deps": ["number"], + }, + } + ] + } + + different_objects = copy.deepcopy(objects) + different_objects["objects"][0]["summon"]["args"]["x"] = 100 + + dvcsummon_yaml = ruamel.yaml.dump(objects) + non_canon_yaml = ruamel.yaml.dump(different_objects) with monkeypatch.context() as m: m.chdir(fspath(erepo_dir)) erepo_dir.dvc_gen("number", "100", commit="Add number.dvc") erepo_dir.scm_gen("dvcsummon.yaml", dvcsummon_yaml) - erepo_dir.scm_gen("non-canon.yaml", dvcsummon_yaml) + erepo_dir.scm_gen("non-canon.yaml", non_canon_yaml) erepo_dir.scm_gen( "calculator.py", "def add_to_num(x): return x + int(open('number').read())", @@ -164,4 +169,4 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): assert api.summon("sum", repo=repo_url) == 101 assert api.summon("sum", repo=repo_url, args={"x": 2}) == 102 - assert api.summon("sum", repo=repo_url, fname="non-canon.yaml") == 101 + assert api.summon("sum", repo=repo_url, fname="non-canon.yaml") == 200 From 1e65c76ef1d454fd88891bf3f1ad90424fcc0049 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 24 Dec 2019 20:20:03 -0600 Subject: [PATCH 24/31] summon: raise SummonError appropiately --- dvc/api.py | 25 ++++++++++++++----------- dvc/exceptions.py | 4 ++++ tests/func/test_api.py | 23 ++++++++++++++++++++--- 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index f85141470a..e8cc7bd22c 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -12,8 +12,9 @@ import ruamel.yaml from voluptuous import Schema, Optional, Invalid, ALLOW_EXTRA -from dvc.utils.compat import urlparse, builtin_str, FileNotFoundError +from dvc.utils.compat import urlparse, builtin_str from dvc.repo import Repo +from dvc.exceptions import SummonError, FileMissingError from dvc.external_repo import external_repo @@ -128,19 +129,21 @@ def _get_object_from_summoners_file(name, path): Given a summonable object's name, search for it on the given file and bring it to life. """ - with open(path, "r") as fobj: - try: + try: + with open(path, "r") as fobj: content = summon_schema(ruamel.yaml.safe_load(fobj.read())) objects = content["objects"] return next(x for x in objects if x["name"] == name) - except FileNotFoundError: - pass # XXX: No such YAML file with path: '' - except ruamel.yaml.ScannerError: - pass # XXX: Failed to parse YAML correctly - except Invalid: - pass # XXX: YAML file dosen't match with the schema - except StopIteration: - pass # XXX: No such object with name: '' + except FileMissingError as exc: + # XXX: Prints ugly absolute path. + raise SummonError("No such YAML file with path: '{}'".format(path)) + except ruamel.yaml.YAMLError as exc: + raise SummonError("Failed to parse YAML correctly", exc) + except Invalid as exc: + # XXX: Doesn't point out the error. + raise SummonError("YAML file dosen't match with the schema", exc) + except StopIteration: + raise SummonError("No such object with name '{}'".format(name)) def _invoke_method(call, args, path): diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 7151e2e2fa..18ec671f21 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -353,3 +353,7 @@ def __init__(self, path, repo): " neighther as an output nor a git-handled file." ) super(PathMissingError, self).__init__(msg.format(path, repo)) + + +class SummonError(DvcException): + pass diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 49c09ed6e2..0d90b26ad2 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -13,6 +13,7 @@ from dvc.path_info import URLInfo from dvc.remote.config import RemoteConfig from dvc.utils.compat import fspath +from dvc.exceptions import SummonError from tests.remotes import Azure, GCP, HDFS, Local, OSS, S3, SSH @@ -151,14 +152,18 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): different_objects["objects"][0]["summon"]["args"]["x"] = 100 dvcsummon_yaml = ruamel.yaml.dump(objects) - non_canon_yaml = ruamel.yaml.dump(different_objects) + different_yaml = ruamel.yaml.dump(different_objects) + invalid_yaml = ruamel.yaml.dump({"name": "sum"}) + not_yaml = "a: - this is not a valid YAML file" with monkeypatch.context() as m: m.chdir(fspath(erepo_dir)) erepo_dir.dvc_gen("number", "100", commit="Add number.dvc") erepo_dir.scm_gen("dvcsummon.yaml", dvcsummon_yaml) - erepo_dir.scm_gen("non-canon.yaml", non_canon_yaml) + erepo_dir.scm_gen("different.yaml", different_yaml) + erepo_dir.scm_gen("invalid.yaml", invalid_yaml) + erepo_dir.scm_gen("not_yaml.yaml", not_yaml) erepo_dir.scm_gen( "calculator.py", "def add_to_num(x): return x + int(open('number').read())", @@ -169,4 +174,16 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): assert api.summon("sum", repo=repo_url) == 101 assert api.summon("sum", repo=repo_url, args={"x": 2}) == 102 - assert api.summon("sum", repo=repo_url, fname="non-canon.yaml") == 200 + assert api.summon("sum", repo=repo_url, fname="different.yaml") == 200 + + with pytest.raises(SummonError): + api.summon("sum", repo=repo_url, fname="not-existent-file.yaml") + + with pytest.raises(SummonError): + api.summon("non-existent", repo=repo_url) + + with pytest.raises(SummonError): + api.summon("sum", repo=repo_url, fname="invalid.yaml") + + with pytest.raises(SummonError): + api.summon("sum", repo=repo_url, fname="not_yaml.yaml") From 7a58901ec6589f37cb0fd04616ed925431355861 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 24 Dec 2019 20:20:31 -0600 Subject: [PATCH 25/31] :nail_care: black & flake --- dvc/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index e8cc7bd22c..a9bb95c759 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -33,7 +33,7 @@ ] }, required=True, - extra=ALLOW_EXTRA + extra=ALLOW_EXTRA, ) @@ -134,7 +134,7 @@ def _get_object_from_summoners_file(name, path): content = summon_schema(ruamel.yaml.safe_load(fobj.read())) objects = content["objects"] return next(x for x in objects if x["name"] == name) - except FileMissingError as exc: + except FileMissingError: # XXX: Prints ugly absolute path. raise SummonError("No such YAML file with path: '{}'".format(path)) except ruamel.yaml.YAMLError as exc: From 0658622ca2b211d2b816b45aa82c975e4d5bd7e6 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 25 Dec 2019 00:42:55 -0600 Subject: [PATCH 26/31] summon: pair-programming adjustments --- dvc/api.py | 115 ++++++++++++++++++++++------------------- tests/func/test_api.py | 34 ++++++++---- 2 files changed, 85 insertions(+), 64 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index a9bb95c759..17b1231d49 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -10,7 +10,7 @@ from contextlib import GeneratorContextManager as GCM import ruamel.yaml -from voluptuous import Schema, Optional, Invalid, ALLOW_EXTRA +from voluptuous import Schema, Required, Invalid from dvc.utils.compat import urlparse, builtin_str from dvc.repo import Repo @@ -18,22 +18,21 @@ from dvc.external_repo import external_repo -summon_schema = Schema( +SUMMON_SCHEMA = Schema( { - "objects": [ + Required("objects"): [ { - "name": str, - "summon": { - "type": "python", - "call": str, - Optional("args"): dict, - Optional("deps"): [str], + Required("name"): str, + "meta": dict, + Required("summon"): { + Required("type"): "python", + Required("call"): str, + "args": dict, + "deps": [str], }, } ] }, - required=True, - extra=ALLOW_EXTRA, ) @@ -97,26 +96,20 @@ def _make_repo(repo_url, rev=None): yield repo -def summon(name, fname="dvcsummon.yaml", args=None, repo=None, rev=None): - # TODO: Write a meaningful docstring about `summon` +def summon(name, repo=None, rev=None, summon_file="dvcsummon.yaml", args=None): + """Instantiate an object described in the summon file.""" with _make_repo(repo, rev=rev) as _repo: - - def pull_dependencies(deps): - if not deps: - return - - outs = [_repo.find_out_by_relpath(dep) for dep in deps] - - with _repo.state: - for out in outs: - _repo.cloud.pull(out.get_used_cache()) - out.checkout() - - path = os.path.join(_repo.root_dir, fname) - obj = _get_object_from_summoners_file(name, path) - info = obj["summon"] - - pull_dependencies(info.get("deps")) + try: + path = os.path.join(_repo.root_dir, summon_file) + obj = _get_object_from_summon_file(name, path) + info = obj["summon"] + except SummonError as exc: + raise SummonError( + str(exc) + " at '{}' in '{}'".format(summon_file, repo), + cause=exc.cause + ) + + _pull_dependencies(_repo, info.get("deps")) _args = copy.deepcopy(info.get("args", {})) _args.update(args or {}) @@ -124,26 +117,43 @@ def pull_dependencies(deps): return _invoke_method(info["call"], _args, path=_repo.root_dir) -def _get_object_from_summoners_file(name, path): +def _get_object_from_summon_file(name, path): """ Given a summonable object's name, search for it on the given file - and bring it to life. + and return its description. """ try: with open(path, "r") as fobj: - content = summon_schema(ruamel.yaml.safe_load(fobj.read())) - objects = content["objects"] - return next(x for x in objects if x["name"] == name) + content = SUMMON_SCHEMA(ruamel.yaml.safe_load(fobj.read())) + objects = [x for x in content["objects"] if x["name"] == name] + + if not objects: + raise SummonError("No object with name '{}'".format(name)) + elif len(objects) >= 2: + raise SummonError( + "More than one object with name '{}'".format(name) + ) + + return objects[0] + except FileMissingError: - # XXX: Prints ugly absolute path. - raise SummonError("No such YAML file with path: '{}'".format(path)) + raise SummonError("Summon file not found") except ruamel.yaml.YAMLError as exc: - raise SummonError("Failed to parse YAML correctly", exc) + raise SummonError("Failed to parse summon file", exc) except Invalid as exc: - # XXX: Doesn't point out the error. - raise SummonError("YAML file dosen't match with the schema", exc) - except StopIteration: - raise SummonError("No such object with name '{}'".format(name)) + raise SummonError(str(exc)) + + +def _pull_dependencies(repo, deps): + if not deps: + return + + outs = [repo.find_out_by_relpath(dep) for dep in deps] + + with repo.state: + for out in outs: + repo.cloud.pull(out.get_used_cache()) + out.checkout() def _invoke_method(call, args, path): @@ -151,6 +161,9 @@ def _invoke_method(call, args, path): # * Not thread safe # * Import will pollute sys.modules # * Weird errors if there is a name clash within sys.modules + + # XXX: sys.path manipulation is "theoretically" not needed + # but tests are failing for an unknown reason. cwd = os.path.abspath(os.curdir) try: @@ -163,23 +176,17 @@ def _invoke_method(call, args, path): sys.path.pop() -def _import_string(import_name, silent=False): +def _import_string(import_name): """Imports an object based on a string. Useful to delay import to not load everything on startup. Use dotted notaion in `import_name`, e.g. 'dvc.remote.gs.RemoteGS'. - If the `silent` is True the return value will be `None` if the import - fails. :return: imported object """ import_name = builtin_str(import_name) - try: - if "." in import_name: - module, obj = import_name.rsplit(".", 1) - else: - return importlib.import_module(import_name) - return getattr(importlib.import_module(module), obj) - except (ImportError, AttributeError): - if not silent: - raise + if "." in import_name: + module, obj = import_name.rsplit(".", 1) + else: + return importlib.import_module(import_name) + return getattr(importlib.import_module(module), obj) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 0d90b26ad2..1d22be44a1 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -137,7 +137,7 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): "objects": [ { "name": "sum", - "description": "Add to ", + "meta": { "description": "Add to " }, "summon": { "type": "python", "call": "calculator.add_to_num", @@ -151,8 +151,12 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): different_objects = copy.deepcopy(objects) different_objects["objects"][0]["summon"]["args"]["x"] = 100 + dup_objects= copy.deepcopy(objects) + dup_objects["objects"] *= 2 + dvcsummon_yaml = ruamel.yaml.dump(objects) different_yaml = ruamel.yaml.dump(different_objects) + dup_yaml = ruamel.yaml.dump(dup_objects) invalid_yaml = ruamel.yaml.dump({"name": "sum"}) not_yaml = "a: - this is not a valid YAML file" @@ -162,6 +166,7 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): erepo_dir.dvc_gen("number", "100", commit="Add number.dvc") erepo_dir.scm_gen("dvcsummon.yaml", dvcsummon_yaml) erepo_dir.scm_gen("different.yaml", different_yaml) + erepo_dir.scm_gen("dup.yaml", dup_yaml) erepo_dir.scm_gen("invalid.yaml", invalid_yaml) erepo_dir.scm_gen("not_yaml.yaml", not_yaml) erepo_dir.scm_gen( @@ -174,16 +179,25 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): assert api.summon("sum", repo=repo_url) == 101 assert api.summon("sum", repo=repo_url, args={"x": 2}) == 102 - assert api.summon("sum", repo=repo_url, fname="different.yaml") == 200 + assert api.summon("sum", repo=repo_url, summon_file="different.yaml") == 200 + + try: + api.summon("sum", repo=repo_url, summon_file="missing.yaml") + except SummonError as exc: + assert "Summon file not found" in str(exc) + assert "missing.yaml" in str(exc) + assert repo_url in str(exc) + else: + pytest.fail("Did not raise on missing summon file") - with pytest.raises(SummonError): - api.summon("sum", repo=repo_url, fname="not-existent-file.yaml") + with pytest.raises(SummonError, match=r"No object with name 'missing'"): + api.summon("missing", repo=repo_url) - with pytest.raises(SummonError): - api.summon("non-existent", repo=repo_url) + with pytest.raises(SummonError, match=r"More than one object with name 'sum'"): + api.summon("sum", repo=repo_url, summon_file="dup.yaml") - with pytest.raises(SummonError): - api.summon("sum", repo=repo_url, fname="invalid.yaml") + with pytest.raises(SummonError, match=r"extra keys not allowed"): + api.summon("sum", repo=repo_url, summon_file="invalid.yaml") - with pytest.raises(SummonError): - api.summon("sum", repo=repo_url, fname="not_yaml.yaml") + with pytest.raises(SummonError, match=r"Failed to parse summon file"): + api.summon("sum", repo=repo_url, summon_file="not_yaml.yaml") From 75ad14aaa234be8ef34ed1ca38253229053fdc8e Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 25 Dec 2019 00:43:33 -0600 Subject: [PATCH 27/31] :nail_care: black --- dvc/api.py | 6 +++--- tests/func/test_api.py | 12 ++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 17b1231d49..7bde1b0bef 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -32,7 +32,7 @@ }, } ] - }, + } ) @@ -106,7 +106,7 @@ def summon(name, repo=None, rev=None, summon_file="dvcsummon.yaml", args=None): except SummonError as exc: raise SummonError( str(exc) + " at '{}' in '{}'".format(summon_file, repo), - cause=exc.cause + cause=exc.cause, ) _pull_dependencies(_repo, info.get("deps")) @@ -132,7 +132,7 @@ def _get_object_from_summon_file(name, path): elif len(objects) >= 2: raise SummonError( "More than one object with name '{}'".format(name) - ) + ) return objects[0] diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 1d22be44a1..bff3abe51e 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -137,7 +137,7 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): "objects": [ { "name": "sum", - "meta": { "description": "Add to " }, + "meta": {"description": "Add to "}, "summon": { "type": "python", "call": "calculator.add_to_num", @@ -151,7 +151,7 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): different_objects = copy.deepcopy(objects) different_objects["objects"][0]["summon"]["args"]["x"] = 100 - dup_objects= copy.deepcopy(objects) + dup_objects = copy.deepcopy(objects) dup_objects["objects"] *= 2 dvcsummon_yaml = ruamel.yaml.dump(objects) @@ -179,7 +179,9 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): assert api.summon("sum", repo=repo_url) == 101 assert api.summon("sum", repo=repo_url, args={"x": 2}) == 102 - assert api.summon("sum", repo=repo_url, summon_file="different.yaml") == 200 + assert ( + api.summon("sum", repo=repo_url, summon_file="different.yaml") == 200 + ) try: api.summon("sum", repo=repo_url, summon_file="missing.yaml") @@ -193,7 +195,9 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): with pytest.raises(SummonError, match=r"No object with name 'missing'"): api.summon("missing", repo=repo_url) - with pytest.raises(SummonError, match=r"More than one object with name 'sum'"): + with pytest.raises( + SummonError, match=r"More than one object with name 'sum'" + ): api.summon("sum", repo=repo_url, summon_file="dup.yaml") with pytest.raises(SummonError, match=r"extra keys not allowed"): From 4a25612282ba4c5259d263cf3c8441b9682cf531 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 25 Dec 2019 01:03:21 -0600 Subject: [PATCH 28/31] :nail_care: refactor --- dvc/api.py | 11 ++++++++--- dvc/exceptions.py | 4 ---- tests/func/test_api.py | 26 +++++++++----------------- 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 7bde1b0bef..013eb0a511 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -9,12 +9,13 @@ except ImportError: from contextlib import GeneratorContextManager as GCM +from dvc.utils.compat import urlparse, builtin_str + import ruamel.yaml from voluptuous import Schema, Required, Invalid -from dvc.utils.compat import urlparse, builtin_str from dvc.repo import Repo -from dvc.exceptions import SummonError, FileMissingError +from dvc.exceptions import DvcException, FileMissingError from dvc.external_repo import external_repo @@ -36,6 +37,10 @@ ) +class SummonError(DvcException): + pass + + def get_url(path, repo=None, rev=None, remote=None): """Returns an url of a resource specified by path in repo""" with _make_repo(repo, rev=rev) as _repo: @@ -109,7 +114,7 @@ def summon(name, repo=None, rev=None, summon_file="dvcsummon.yaml", args=None): cause=exc.cause, ) - _pull_dependencies(_repo, info.get("deps")) + _pull_dependencies(_repo, info.get("deps", [])) _args = copy.deepcopy(info.get("args", {})) _args.update(args or {}) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 18ec671f21..7151e2e2fa 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -353,7 +353,3 @@ def __init__(self, path, repo): " neighther as an output nor a git-handled file." ) super(PathMissingError, self).__init__(msg.format(path, repo)) - - -class SummonError(DvcException): - pass diff --git a/tests/func/test_api.py b/tests/func/test_api.py index bff3abe51e..ba05e9a9cc 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -8,12 +8,12 @@ import pytest from dvc import api +from dvc.api import SummonError from dvc.exceptions import FileMissingError from dvc.main import main from dvc.path_info import URLInfo from dvc.remote.config import RemoteConfig from dvc.utils.compat import fspath -from dvc.exceptions import SummonError from tests.remotes import Azure, GCP, HDFS, Local, OSS, S3, SSH @@ -148,27 +148,21 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): ] } - different_objects = copy.deepcopy(objects) - different_objects["objects"][0]["summon"]["args"]["x"] = 100 + other_objects = copy.deepcopy(objects) + other_objects["objects"][0]["summon"]["args"]["x"] = 100 dup_objects = copy.deepcopy(objects) dup_objects["objects"] *= 2 - dvcsummon_yaml = ruamel.yaml.dump(objects) - different_yaml = ruamel.yaml.dump(different_objects) - dup_yaml = ruamel.yaml.dump(dup_objects) - invalid_yaml = ruamel.yaml.dump({"name": "sum"}) - not_yaml = "a: - this is not a valid YAML file" - with monkeypatch.context() as m: m.chdir(fspath(erepo_dir)) erepo_dir.dvc_gen("number", "100", commit="Add number.dvc") - erepo_dir.scm_gen("dvcsummon.yaml", dvcsummon_yaml) - erepo_dir.scm_gen("different.yaml", different_yaml) - erepo_dir.scm_gen("dup.yaml", dup_yaml) - erepo_dir.scm_gen("invalid.yaml", invalid_yaml) - erepo_dir.scm_gen("not_yaml.yaml", not_yaml) + erepo_dir.scm_gen("dvcsummon.yaml", ruamel.yaml.dump(objects)) + erepo_dir.scm_gen("other.yaml", ruamel.yaml.dump(other_objects)) + erepo_dir.scm_gen("dup.yaml", ruamel.yaml.dump(dup_objects)) + erepo_dir.scm_gen("invalid.yaml", ruamel.yaml.dump({"name": "sum"})) + erepo_dir.scm_gen("not_yaml.yaml", "a: - this is not a YAML file") erepo_dir.scm_gen( "calculator.py", "def add_to_num(x): return x + int(open('number').read())", @@ -179,9 +173,7 @@ def test_summon(tmp_dir, erepo_dir, dvc, monkeypatch): assert api.summon("sum", repo=repo_url) == 101 assert api.summon("sum", repo=repo_url, args={"x": 2}) == 102 - assert ( - api.summon("sum", repo=repo_url, summon_file="different.yaml") == 200 - ) + assert api.summon("sum", repo=repo_url, summon_file="other.yaml") == 200 try: api.summon("sum", repo=repo_url, summon_file="missing.yaml") From 49264b55acb1f121f587c3b121d4e078e71dff36 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 25 Dec 2019 13:07:40 -0600 Subject: [PATCH 29/31] py2: fix problem with ruamel.yaml and unicode --- tests/func/test_api.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index ba05e9a9cc..393e94edf8 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import os import shutil import copy From e8dd2358f5e0fe9f31dff2db7430f4dc04381407 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Fri, 27 Dec 2019 10:05:41 -0600 Subject: [PATCH 30/31] summon: fix path popping operation --- dvc/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/api.py b/dvc/api.py index 013eb0a511..dd2f9f444b 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -178,7 +178,7 @@ def _invoke_method(call, args, path): return method(**args) finally: os.chdir(cwd) - sys.path.pop() + sys.path.pop(0) def _import_string(import_name): From dad224ef42cc677fe968b61c3174e6f5214a7a54 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Fri, 27 Dec 2019 10:09:13 -0600 Subject: [PATCH 31/31] summon: os.curdir -> os.getcwd --- dvc/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/api.py b/dvc/api.py index dd2f9f444b..45c20c0c15 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -169,7 +169,7 @@ def _invoke_method(call, args, path): # XXX: sys.path manipulation is "theoretically" not needed # but tests are failing for an unknown reason. - cwd = os.path.abspath(os.curdir) + cwd = os.getcwd() try: os.chdir(path)