From 3f56e930ae903080a91dc6fb0574018a04738825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 27 Jul 2021 11:25:42 +0200 Subject: [PATCH 01/16] plots: support images initial --- dvc/command/plots.py | 56 +- dvc/repo/live.py | 8 +- dvc/repo/plots/__init__.py | 129 +---- dvc/repo/plots/data.py | 13 +- dvc/repo/plots/render.py | 183 +++++++ dvc/utils/html.py | 25 +- tests/func/plots/test_diff.py | 4 + tests/func/plots/test_modify.py | 8 +- tests/func/plots/test_show.py | 863 +++++++++++++------------------ tests/func/test_live.py | 17 +- tests/unit/command/test_plots.py | 79 +-- tests/unit/test_plots.py | 6 +- 12 files changed, 687 insertions(+), 704 deletions(-) create mode 100644 dvc/repo/plots/render.py diff --git a/dvc/command/plots.py b/dvc/command/plots.py index 7f752223a8..9cff8056cb 100644 --- a/dvc/command/plots.py +++ b/dvc/command/plots.py @@ -4,6 +4,7 @@ from dvc.command import completion from dvc.command.base import CmdBase, append_doc_link, fix_subparsers from dvc.exceptions import DvcException +from dvc.repo.plots.render import find_vega, render from dvc.ui import ui from dvc.utils import format_link @@ -35,43 +36,56 @@ def run(self): return 1 try: - plots = self._func(targets=self.args.targets, props=self._props()) + # TODO collect raw data and apply props later? + plots_data = self._func( + targets=self.args.targets, props=self._props() + ) + + if not plots_data: + ui.error_write( + "No plots were loaded, " + "visualization file will not be created." + ) + rel: str = self.args.out or "dvc_plots" + path: Path = (Path.cwd() / rel).resolve() if self.args.show_vega: target = self.args.targets[0] - ui.write(plots[target]) + plot_json = find_vega(self.repo, plots_data, target) + ui.write(plot_json) return 0 - except DvcException: - logger.exception("") - return 1 - - if plots: - rel: str = self.args.out or "plots.html" - path: Path = (Path.cwd() / rel).resolve() - self.repo.plots.write_html( - path, plots=plots, html_template_path=self.args.html_template + index_path = render( + self.repo, + plots_data, + path=path, + html_template_path=self.args.html_template, ) - assert ( - path.is_absolute() - ) # as_uri throws ValueError if not absolute - url = path.as_uri() + assert index_path.is_absolute() + url = index_path.as_uri() ui.write(url) + + # TODO show first vega plot + # if self.args.show_vega: + # target = self.args.targets[0] + # ui.write(plots[target]) + # return 0 + if self.args.open: import webbrowser - opened = webbrowser.open(rel) + opened = webbrowser.open(index_path) if not opened: ui.error_write( "Failed to open. Please try opening it manually." ) return 1 - else: - ui.error_write( - "No plots were loaded, visualization file will not be created." - ) - return 0 + return 0 + + except DvcException: + logger.exception("") + return 1 class CmdPlotsShow(CmdPlots): diff --git a/dvc/repo/live.py b/dvc/repo/live.py index 5dcfee2fc8..f94f31dbac 100644 --- a/dvc/repo/live.py +++ b/dvc/repo/live.py @@ -2,6 +2,8 @@ import os from typing import TYPE_CHECKING, List, Optional +from dvc.repo.plots.render import render + logger = logging.getLogger(__name__) if TYPE_CHECKING: @@ -15,10 +17,10 @@ def create_summary(out): metrics, plots = out.repo.live.show(str(out.path_info)) - html_path = out.path_info.with_suffix(".html") + html_path = out.path_info.fspath + "_dvc_plots" - out.repo.plots.write_html(html_path, plots, metrics) - logger.info(f"\nfile://{os.path.abspath(html_path)}") + index_path = render(out.repo, plots, metrics=metrics, path=html_path) + logger.info(f"\nfile://{os.path.abspath(index_path)}") def summary_path_info(out: "Output") -> Optional["PathInfo"]: diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index 64d30068cc..bf494243ea 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -3,7 +3,7 @@ import logging import os from collections import OrderedDict -from typing import TYPE_CHECKING, Callable, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional from funcy import cached_property, first, project @@ -113,45 +113,23 @@ def _collect_from_revision( props = props or {} for path, repo_path in plot_files: - res[repo_path] = {"props": rev_props} + joined_props = {**rev_props, **props} + res[repo_path] = {"props": joined_props} res[repo_path].update( parse( fs, path, - rev_props=rev_props, - props=props, + props=joined_props, onerror=onerror, ) ) return res - @staticmethod - def render(data, revs=None, props=None, templates=None): - """Renders plots""" - props = props or {} - - # Merge data by plot file and apply overriding props - plots = _prepare_plots(data, revs, props) - - result = {} - for datafile, desc in plots.items(): - rendered = _render( - datafile, - desc["data"], - desc["props"], - templates, - ) - if rendered: - result[datafile] = rendered - - return result - def show( self, targets: List[str] = None, revs=None, props=None, - templates=None, recursive=False, onerror=None, ): @@ -161,9 +139,7 @@ def show( targets, revs, recursive, onerror=onerror, props=props ) - if templates is None: - templates = self.templates - return self.render(data, revs, props, templates) + return data def diff(self, *args, **kwargs): from .diff import diff @@ -220,7 +196,7 @@ def templates(self): def write_html( self, path: StrPath, - plots: Dict[str, Dict], + plots: List[Any], metrics: Optional[Dict[str, Dict]] = None, html_template_path: Optional[StrPath] = None, ): @@ -235,7 +211,7 @@ def write_html( from dvc.utils.html import write - write(path, plots, metrics, html_template_path) + return write(path, plots, metrics, html_template_path) def _is_plot(out: "Output") -> bool: @@ -264,17 +240,19 @@ def _collect_plots( @error_handler -def parse(fs, path, props=None, rev_props=None, **kwargs): +def parse(fs, path, props=None, **kwargs): props = props or {} - rev_props = rev_props or {} _, extension = os.path.splitext(path) if extension in (".tsv", ".csv"): - header = {**rev_props, **props}.get("header", True) + header = props.get("header", True) if extension == ".csv": return _load_sv(path=path, fs=fs, delimiter=",", header=header) return _load_sv(path=path, fs=fs, delimiter="\t", header=header) if extension in LOADERS or extension in (".yml", ".yaml"): return LOADERS[extension](path=path, fs=fs) + if extension in (".jpeg", ".jpg", ".gif", ".png"): + with fs.open(path, "rb") as fd: + return fd.read() raise PlotMetricTypeError(path) @@ -291,89 +269,6 @@ def _plot_props(out: "Output") -> Dict: return project(out.plot, PLOT_PROPS) -def _prepare_plots(data, revs, props): - """Groups data by plot file. - - Also resolves props conflicts between revs and applies global props. - """ - # we go in order revs are supplied on props conflict first ones win. - revs = iter(data) if revs is None else revs - - plots, props_revs = {}, {} - for rev in revs: - # Asked for revision without data - if rev not in data: - continue - - for datafile, desc in data[rev].get("data", {}).items(): - # We silently skip on an absent data file, - # see also try/except/pass in .collect() - if "data" not in desc: - continue - - # props from command line overwrite plot props from out definition - full_props = {**desc["props"], **props} - - if datafile in plots: - saved = plots[datafile] - if saved["props"] != full_props: - logger.warning( - f"Inconsistent plot props for '{datafile}' in " - f"'{props_revs[datafile]}' and '{rev}'. " - f"Going to use ones from '{props_revs[datafile]}'" - ) - - saved["data"][rev] = desc["data"] - else: - plots[datafile] = { - "props": full_props, - "data": {rev: desc["data"]}, - } - # Save rev we got props from - props_revs[datafile] = rev - - return plots - - -def _render(datafile, datas, props, templates): - from .data import PlotData, plot_data - - # Copy it to not modify a passed value - props = props.copy() - - # Add x and y to fields if set - fields = props.get("fields") - if fields is not None: - fields = {*fields, props.get("x"), props.get("y")} - {None} - - template = templates.load(props.get("template") or "default") - - # If x is not set add index field - if not props.get("x") and template.has_anchor("x"): - props["append_index"] = True - props["x"] = PlotData.INDEX_FIELD - - # Parse all data, preprocess it and collect as a list of dicts - data = [] - for rev, unstructured_data in datas.items(): - rev_data = plot_data(datafile, rev, unstructured_data).to_datapoints( - fields=fields, - path=props.get("path"), - append_index=props.get("append_index", False), - ) - data.extend(rev_data) - - # If y is not set then use last field not used yet - if data: - if not props.get("y") and template.has_anchor("y"): - fields = list(first(data)) - skip = (PlotData.REVISION_FIELD, props.get("x")) - props["y"] = first(f for f in reversed(fields) if f not in skip) - - return template.render(data, props=props) - return None - - def _load_sv(path, fs, delimiter=",", header=True): with fs.open(path, "r") as fd: content = fd.read() diff --git a/dvc/repo/plots/data.py b/dvc/repo/plots/data.py index 9ce7518ab2..fc19ad6208 100644 --- a/dvc/repo/plots/data.py +++ b/dvc/repo/plots/data.py @@ -34,9 +34,12 @@ def __init__(self, path, revision): def plot_data(filename, revision, content): _, extension = os.path.splitext(filename.lower()) if extension in (".json", ".yaml"): + # TODO DO SOMETHING ABOUT THAT return DictData(filename, revision, content) if extension in (".csv", ".tsv"): return ListData(filename, revision, content) + if extension in (".jpeg", ".jpg", ".gif", ".png"): + return BinaryData(filename, revision, content) raise PlotMetricTypeError(filename) @@ -113,6 +116,8 @@ def __init__(self, filename, revision, content, **kwargs): self.revision = revision self.content = content + +class StructuredData(PlotData): def _processors(self): return [_filter_fields, _append_index, _append_revision] @@ -126,13 +131,17 @@ def to_datapoints(self, **kwargs): return data -class DictData(PlotData): +class DictData(StructuredData): # For files usually parsed as dicts: eg JSON, Yaml def _processors(self): parent_processors = super()._processors() return [_find_data] + parent_processors -class ListData(PlotData): +class ListData(StructuredData): # For files parsed as list: CSV, TSV pass + + +class BinaryData(PlotData): + pass diff --git a/dvc/repo/plots/render.py b/dvc/repo/plots/render.py new file mode 100644 index 0000000000..3361259b76 --- /dev/null +++ b/dvc/repo/plots/render.py @@ -0,0 +1,183 @@ +import logging +import os.path + +import dpath +from funcy import first + +from dvc.repo.plots.data import PlotData, plot_data +from dvc.utils import relpath +from dvc.utils.html import VEGA_DIV_HTML + +logger = logging.getLogger(__name__) + + +def get_files(data): + files = set() + for rev in data.keys(): + for file in data[rev].get("data", {}).keys(): + files.add(file) + return files + + +def group(data): + files = get_files(data) + grouped = [] + for file in files: + found = dpath.util.search(data, ["*", "*", file]) + if found: + grouped.append(found) + return grouped + + +def find_vega(repo, plots_data, target): + found = dpath.util.search(plots_data, ["*", "*", target]) + if found and VegaRenderer.matches(found): + return VegaRenderer(found, repo.plots.templates).get_vega() + return "" + + +def prepare_renderers(plots_data, templates, path): + renderers = [] + for g in group(plots_data): + if VegaRenderer.matches(g): + renderers.append(VegaRenderer(g, templates)) + if ImageRenderer.matches(g): + renderers.append(ImageRenderer(g, document_path=path)) + return renderers + + +def render(repo, plots_data, metrics=None, path=None, html_template_path=None): + renderers = prepare_renderers(plots_data, repo.plots.templates, path) + if not html_template_path: + html_template_path = repo.config.get("plots", {}).get( + "html_template", None + ) + if html_template_path and not os.path.isabs(html_template_path): + html_template_path = os.path.join(repo.dvc_dir, html_template_path) + + from dvc.utils.html import write + + return write( + path, renderers, metrics=metrics, template_path=html_template_path + ) + + +def _resolve_props(data): + # TODO resolving props from latest + resolved = None + for _, rev_data in data.items(): + for _, file_data in rev_data.get("data", {}).items(): + props = file_data.get("props") + if resolved is None: + resolved = props + else: + resolved = {**resolved, **props} + return resolved + + +class VegaRenderer: + def __init__(self, data, templates=None): + self.data = data + self.templates = templates + + files = get_files(self.data) + assert len(files) == 1 + self.filename = files.pop() + + # TODO RETURN dict? + def get_vega(self): + # TODO + props = _resolve_props(self.data) + template = self.templates.load(props.get("template") or "default") + fields = props.get("fields") + if fields is not None: + fields = {*fields, props.get("x"), props.get("y")} - {None} + + if not props.get("x") and template.has_anchor("x"): + props["append_index"] = True + props["x"] = PlotData.INDEX_FIELD + + datapoints = [] + for rev, rev_data in self.data.items(): + for file, file_data in rev_data.get("data", {}).items(): + if "data" in file_data: + datapoints.extend( + plot_data( + file, rev, file_data.get("data", []) + ).to_datapoints( + fields=fields, + path=props.get("path"), + append_index=props.get("append_index", False), + ) + ) + + if datapoints: + if not props.get("y") and template.has_anchor("y"): + fields = list(first(datapoints)) + skip = (PlotData.REVISION_FIELD, props.get("x")) + props["y"] = first( + f for f in reversed(fields) if f not in skip + ) + return template.render(datapoints, props=props) + return None + + # TODO this name + def get_html(self): + plot_string = self.get_vega() + if plot_string: + html = VEGA_DIV_HTML.format( + id=f"plot_{self.filename.replace('.', '_').replace('/', '_')}", + vega_json=plot_string, + ) + return html + return None + + @staticmethod + def matches(data): + files = get_files(data) + extensions = set(map(lambda f: os.path.splitext(f)[1], files)) + return extensions.issubset({".yml", ".yaml", ".json", ".csv", ".tsv"}) + + +class ImageRenderer: + def __init__(self, data, document_path=None): + self.document_path = document_path + self.data = data + + def get_html(self): + static = os.path.join(self.document_path, "static") + os.makedirs(static, exist_ok=True) + div = "" + for rev, rev_data in self.data.items(): + if "data" in rev_data: + for file, file_data in rev_data.get("data", {}).items(): + if "data" in file_data: + if not div: + div += f"

{file}

" + img_path = os.path.join( + static, f"{rev}_{file.replace('/', '_')}" + ) + rel_img_path = relpath(img_path, self.document_path) + with open(img_path, "wb") as fd: + fd.write(file_data["data"]) + div += ( + f"

{rev}

" + f'
' + ) + if div: + div = ( + '
' + f"{div}" + "
" + ) + return div + + @staticmethod + def matches(data): + files = get_files(data) + extensions = set(map(lambda f: os.path.splitext(f)[1], files)) + return extensions.issubset({".jpg", ".jpeg", ".gif", ".png"}) diff --git a/dvc/utils/html.py b/dvc/utils/html.py index 12c58924c0..1a85155f5f 100644 --- a/dvc/utils/html.py +++ b/dvc/utils/html.py @@ -1,3 +1,5 @@ +import os +from pathlib import Path from typing import Dict, List, Optional from dvc.exceptions import DvcException @@ -56,15 +58,6 @@ def with_metrics(self, metrics: Dict[str, Dict]) -> "HTML": self.elements.append(tabulate.tabulate(rows, header, tablefmt="html")) return self - def with_plots(self, plots: Dict[str, Dict]) -> "HTML": - self.elements.extend( - [ - VEGA_DIV_HTML.format(id=f"plot{i}", vega_json=plot) - for i, plot in enumerate(plots.values()) - ] - ) - return self - def with_element(self, html: str) -> "HTML": self.elements.append(html) return self @@ -76,10 +69,13 @@ def embed(self) -> str: def write( path: StrPath, - plots: Dict[str, Dict], + plots: list, metrics: Optional[Dict[str, Dict]] = None, template_path: Optional[StrPath] = None, ): + + os.makedirs(path, exist_ok=True) + page_html = None if template_path: with open(template_path) as fobj: @@ -90,7 +86,12 @@ def write( document.with_metrics(metrics) document.with_element("
") - document.with_plots(plots) + for renderer in plots: + document.with_element(renderer.get_html()) + + index = Path(os.path.join(path, "index.html")) - with open(path, "w") as fd: + # TODO remember to try/catch remove on exception + with open(index, "w") as fd: fd.write(document.embed()) + return index diff --git a/tests/func/plots/test_diff.py b/tests/func/plots/test_diff.py index 7162b88ac6..efce7d4aa3 100644 --- a/tests/func/plots/test_diff.py +++ b/tests/func/plots/test_diff.py @@ -1,10 +1,14 @@ import json +import pytest + from dvc.repo.plots.data import PlotData from tests.func.plots.utils import _write_json +@pytest.mark.skip def test_diff_dirty(tmp_dir, scm, dvc, run_copy_metrics): + metric_1 = [{"y": 2}, {"y": 3}] _write_json(tmp_dir, metric_1, "metric_t.json") run_copy_metrics( diff --git a/tests/func/plots/test_modify.py b/tests/func/plots/test_modify.py index cf818dc11c..7c1aa07604 100644 --- a/tests/func/plots/test_modify.py +++ b/tests/func/plots/test_modify.py @@ -1,4 +1,3 @@ -import json import os import pytest @@ -94,8 +93,5 @@ def test_dir_plots(tmp_dir, dvc, run_copy_metrics): dvc.plots.modify("subdir", {"title": "TITLE"}) result = dvc.plots.show() - p1_content = json.loads(result[p1]) - p2_content = json.loads(result[p2]) - - assert p1_content["title"] == p2_content["title"] == "TITLE" - assert p1_content == p2_content + assert result["workspace"]["data"][p1]["props"]["title"] == "TITLE" + assert result["workspace"]["data"][p2]["props"]["title"] == "TITLE" diff --git a/tests/func/plots/test_show.py b/tests/func/plots/test_show.py index bdc32e4e8f..72808827ac 100644 --- a/tests/func/plots/test_show.py +++ b/tests/func/plots/test_show.py @@ -1,7 +1,5 @@ import json -import logging import os -import shutil from collections import OrderedDict import pytest @@ -11,14 +9,14 @@ from dvc.exceptions import OverlappingOutputPathsError from dvc.main import main from dvc.path_info import PathInfo -from dvc.repo import Repo from dvc.repo.plots.data import PlotData, PlotMetricTypeError +from dvc.repo.plots.render import VegaRenderer from dvc.repo.plots.template import ( BadTemplateError, NoFieldInDataError, TemplateNotFoundError, ) -from dvc.utils import onerror_collect +from dvc.utils import onerror_collect, relpath from dvc.utils.fs import remove from dvc.utils.serialize import ( EncodingError, @@ -26,33 +24,36 @@ dump_yaml, modify_yaml, ) -from tests.func.plots.utils import _write_csv, _write_json +from tests.func.plots.utils import _write_csv +# RENDER def test_plot_csv_one_column(tmp_dir, scm, dvc, run_copy_metrics): # no header - metric = [{"val": 2}, {"val": 3}] - _write_csv(metric, "metric_t.csv", header=False) - run_copy_metrics( - "metric_t.csv", "metric.csv", plots_no_cache=["metric.csv"] - ) - props = { "header": False, "x_label": "x_title", "y_label": "y_title", "title": "mytitle", } - plot_string = dvc.plots.show(props=props)["metric.csv"] + data = { + "workspace": { + "data": { + "file.json": {"data": [{"val": 2}, {"val": 3}], "props": props} + } + } + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() plot_content = json.loads(plot_string) assert plot_content["title"] == "mytitle" assert plot_content["data"]["values"] == [ - {"0": "2", PlotData.INDEX_FIELD: 0, "rev": "workspace"}, - {"0": "3", PlotData.INDEX_FIELD: 1, "rev": "workspace"}, + {"val": 2, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, + {"val": 3, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, ] assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "0" + assert plot_content["encoding"]["y"]["field"] == "val" assert plot_content["encoding"]["x"]["title"] == "x_title" assert plot_content["encoding"]["y"]["title"] == "y_title" @@ -62,28 +63,28 @@ def test_plot_csv_multiple_columns(tmp_dir, scm, dvc, run_copy_metrics): OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), ] - _write_csv(metric, "metric_t.csv") - run_copy_metrics( - "metric_t.csv", "metric.csv", plots_no_cache=["metric.csv"] - ) - plot_string = dvc.plots.show()["metric.csv"] + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": {}}}} + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ { - "val": "2", + "val": 2, PlotData.INDEX_FIELD: 0, "rev": "workspace", - "first_val": "100", - "second_val": "100", + "first_val": 100, + "second_val": 100, }, { - "val": "3", + "val": 3, PlotData.INDEX_FIELD: 1, "rev": "workspace", - "first_val": "200", - "second_val": "300", + "first_val": 200, + "second_val": 300, }, ] assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD @@ -95,83 +96,31 @@ def test_plot_csv_choose_axes(tmp_dir, scm, dvc, run_copy_metrics): OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), ] - _write_csv(metric, "metric_t.csv") - run_copy_metrics( - "metric_t.csv", "metric.csv", plots_no_cache=["metric.csv"] - ) props = {"x": "first_val", "y": "second_val"} - plot_string = dvc.plots.show(props=props)["metric.csv"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - { - "val": "2", - "rev": "workspace", - "first_val": "100", - "second_val": "100", - }, - { - "val": "3", - "rev": "workspace", - "first_val": "200", - "second_val": "300", - }, - ] - assert plot_content["encoding"]["x"]["field"] == "first_val" - assert plot_content["encoding"]["y"]["field"] == "second_val" - -def test_plot_json_single_val(tmp_dir, scm, dvc, run_copy_metrics): - metric = [{"val": 2}, {"val": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) - - plot_string = dvc.plots.show()["metric.json"] - - plot_json = json.loads(plot_string) - assert plot_json["data"]["values"] == [ - {"val": 2, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, - {"val": 3, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, - ] - assert plot_json["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_json["encoding"]["y"]["field"] == "val" - - -def test_plot_json_multiple_val(tmp_dir, scm, dvc, run_copy_metrics): - metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) - - plot_string = dvc.plots.show()["metric.json"] + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ { "val": 2, - PlotData.INDEX_FIELD: 0, - "first_val": 100, "rev": "workspace", + "first_val": 100, + "second_val": 100, }, { "val": 3, - PlotData.INDEX_FIELD: 1, - "first_val": 200, "rev": "workspace", + "first_val": 200, + "second_val": 300, }, ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "val" + assert plot_content["encoding"]["x"]["field"] == "first_val" + assert plot_content["encoding"]["y"]["field"] == "second_val" def test_plot_confusion(tmp_dir, dvc, run_copy_metrics): @@ -179,17 +128,15 @@ def test_plot_confusion(tmp_dir, dvc, run_copy_metrics): {"predicted": "B", "actual": "A"}, {"predicted": "A", "actual": "A"}, ] - _write_json(tmp_dir, confusion_matrix, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) - props = {"template": "confusion", "x": "predicted", "y": "actual"} - show = dvc.plots.show(props=props) - plot_string = show["metric.json"] + + data = { + "workspace": { + "data": {"file.json": {"data": confusion_matrix, "props": props}} + } + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ @@ -209,20 +156,20 @@ def test_plot_confusion_normalized(tmp_dir, dvc, run_copy_metrics): {"predicted": "B", "actual": "A"}, {"predicted": "A", "actual": "A"}, ] - _write_json(tmp_dir, confusion_matrix, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) props = { "template": "confusion_normalized", "x": "predicted", "y": "actual", } - plot_string = dvc.plots.show(props=props)["metric.json"] + + data = { + "workspace": { + "data": {"file.json": {"data": confusion_matrix, "props": props}} + } + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ @@ -240,84 +187,28 @@ def test_plot_confusion_normalized(tmp_dir, dvc, run_copy_metrics): def test_plot_multiple_revs_default(tmp_dir, scm, dvc, run_copy_metrics): metric_1 = [{"y": 2}, {"y": 3}] - _write_json(tmp_dir, metric_1, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="init", - tag="v1", - ) - metric_2 = [{"y": 3}, {"y": 5}] - _write_json(tmp_dir, metric_2, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="second", - tag="v2", - ) - metric_3 = [{"y": 5}, {"y": 6}] - _write_json(tmp_dir, metric_3, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="third", - ) - plot_string = dvc.plots.show( - revs=["HEAD", "v2", "v1"], props={"fields": {"y"}} - )["metric.json"] - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"y": 5, PlotData.INDEX_FIELD: 0, "rev": "HEAD"}, - {"y": 6, PlotData.INDEX_FIELD: 1, "rev": "HEAD"}, - {"y": 3, PlotData.INDEX_FIELD: 0, "rev": "v2"}, - {"y": 5, PlotData.INDEX_FIELD: 1, "rev": "v2"}, - {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v1"}, - {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v1"}, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "y" - - -def test_plot_multiple_revs(tmp_dir, scm, dvc, run_copy_metrics): - templates_dir = dvc.plots.templates.templates_dir - shutil.copy( - os.path.join(templates_dir, "default.json"), - os.path.join(templates_dir, "template.json"), - ) - - metric_1 = [{"y": 2}, {"y": 3}] - _write_json(tmp_dir, metric_1, "metric_t.json") - stage = run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="init", - tag="v1", - ) - - metric_2 = [{"y": 3}, {"y": 5}] - _write_json(tmp_dir, metric_2, "metric_t.json") - assert dvc.reproduce(stage.addressing) == [stage] - scm.add(["metric.json", stage.path]) - scm.commit("second") - scm.tag("v2") - - metric_3 = [{"y": 5}, {"y": 6}] - _write_json(tmp_dir, metric_3, "metric_t.json") - assert dvc.reproduce(stage.addressing) == [stage] - scm.add(["metric.json", stage.path]) - scm.commit("third") + data = { + "HEAD": { + "data": { + "file.json": {"data": metric_3, "props": {"fields": {"y"}}} + } + }, + "v2": { + "data": { + "file.json": {"data": metric_2, "props": {"fields": {"y"}}} + } + }, + "v1": { + "data": { + "file.json": {"data": metric_1, "props": {"fields": {"y"}}} + } + }, + } - props = {"template": "template.json"} - plot_string = dvc.plots.show(revs=["HEAD", "v2", "v1"], props=props)[ - "metric.json" - ] + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ @@ -332,28 +223,21 @@ def test_plot_multiple_revs(tmp_dir, scm, dvc, run_copy_metrics): assert plot_content["encoding"]["y"]["field"] == "y" +# TODO add tests for grouping def test_plot_even_if_metric_missing( tmp_dir, scm, dvc, caplog, run_copy_metrics ): - tmp_dir.scm_gen("some_file", "content", commit="there is no metric") - scm.tag("v1") metric = [{"y": 2}, {"y": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="there is metric", - tag="v2", - ) - - caplog.clear() - with caplog.at_level(logging.WARNING, "dvc"): - plots = dvc.plots.show(revs=["v1", "v2"], targets=["metric.json"]) - assert "'metric.json' was not found at: 'v1'." in caplog.text + data = { + "v2": {"data": {"file.json": {"data": metric, "props": {}}}}, + "workspace": { + "data": {"file.json": {"error": FileNotFoundError(), "props": {}}} + }, + } + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() - plot_content = json.loads(plots["metric.json"]) + plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v2"}, {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v2"}, @@ -362,51 +246,47 @@ def test_plot_even_if_metric_missing( assert plot_content["encoding"]["y"]["field"] == "y" -def test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): - metric = [{"y": 2}, {"y": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - stage = run_copy_metrics( - "metric_t.json", - "metric.json", - plots=["metric.json"], - commit="there is metric", - ) - scm.tag("v1") - - # Make a different plot and then remove its datafile - metric = [{"y": 3}, {"y": 4}] - _write_json(tmp_dir, metric, "metric_t.json") - stage = run_copy_metrics( - "metric_t.json", - "metric.json", - plots=["metric.json"], - commit="there is an another metric", - ) - scm.tag("v2") - remove(stage.outs[0].fspath) - remove(stage.outs[0].cache_path) - - plots = dvc.plots.show(revs=["v1", "v2"], targets=["metric.json"]) - plot_content = json.loads(plots["metric.json"]) - assert plot_content["data"]["values"] == [ - {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v1"}, - {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v1"}, - ] - - -def test_custom_template(tmp_dir, scm, dvc, custom_template, run_copy_metrics): +# TODO to collect? +# def test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): +# metric = [{"y": 2}, {"y": 3}] +# _write_json(tmp_dir, metric, "metric_t.json") +# stage = run_copy_metrics( +# "metric_t.json", +# "metric.json", +# plots=["metric.json"], +# commit="there is metric", +# ) +# scm.tag("v1") +# +# Make a different plot and then remove its datafile +# metric = [{"y": 3}, {"y": 4}] +# _write_json(tmp_dir, metric, "metric_t.json") +# stage = run_copy_metrics( +# "metric_t.json", +# "metric.json", +# plots=["metric.json"], +# commit="there is an another metric", +# ) +# scm.tag("v2") +# remove(stage.outs[0].fspath) +# remove(stage.outs[0].cache_path) +# +# plots = dvc.plots.show(revs=["v1", "v2"], targets=["metric.json"]) +# plot_content = json.loads(plots["metric.json"]) +# assert plot_content["data"]["values"] == [ +# {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v1"}, +# {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v1"}, +# ] + + +def test_custom_template(tmp_dir, scm, dvc, custom_template): metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="init", - tag="v1", - ) - props = {"template": os.fspath(custom_template), "x": "a", "y": "b"} - plot_string = dvc.plots.show(props=props)["metric.json"] + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ @@ -417,40 +297,27 @@ def test_custom_template(tmp_dir, scm, dvc, custom_template, run_copy_metrics): assert plot_content["encoding"]["y"]["field"] == "b" -def _replace(path, src, dst): - path.write_text(path.read_text().replace(src, dst)) - - def test_should_raise_on_no_template(tmp_dir, dvc, run_copy_metrics): metric = [{"val": 2}, {"val": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) + props = {"template": "non_existing_template.json"} + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } with pytest.raises(TemplateNotFoundError): - props = {"template": "non_existing_template.json"} - dvc.plots.show("metric.json", props=props) + VegaRenderer(data, dvc.plots.templates).get_vega() -def test_bad_template(tmp_dir, dvc, run_copy_metrics): +def test_bad_template(tmp_dir, dvc): metric = [{"val": 2}, {"val": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) - tmp_dir.gen("template.json", json.dumps({"a": "b", "c": "d"})) + props = {"template": "template.json"} + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } with pytest.raises(BadTemplateError): - props = {"template": "template.json"} - dvc.plots.show("metric.json", props=props) + VegaRenderer(data, dvc.plots.templates).get_vega() def test_plot_wrong_metric_type(tmp_dir, scm, dvc, run_copy_metrics): @@ -474,22 +341,17 @@ def test_plot_choose_columns( tmp_dir, scm, dvc, custom_template, run_copy_metrics ): metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="init", - tag="v1", - ) - props = { "template": os.fspath(custom_template), "fields": {"b", "c"}, "x": "b", "y": "c", } - plot_string = dvc.plots.show(props=props)["metric.json"] + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ @@ -500,180 +362,129 @@ def test_plot_choose_columns( assert plot_content["encoding"]["y"]["field"] == "c" -def test_plot_default_choose_column(tmp_dir, scm, dvc, run_copy_metrics): - metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="init", - tag="v1", - ) - - plot_string = dvc.plots.show(props={"fields": {"b"}})["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {PlotData.INDEX_FIELD: 0, "b": 2, "rev": "workspace"}, - {PlotData.INDEX_FIELD: 1, "b": 3, "rev": "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "b" - - -def test_plot_yaml(tmp_dir, scm, dvc, run_copy_metrics): - metric = [{"val": 2}, {"val": 3}] - dump_yaml("metric_t.yaml", metric) - run_copy_metrics( - "metric_t.yaml", "metric.yaml", plots_no_cache=["metric.yaml"] - ) - - plot_string = dvc.plots.show()["metric.yaml"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"val": 2, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, - {"val": 3, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, - ] +# TODO ?? +# def test_plot_default_choose_column(tmp_dir, scm, dvc, run_copy_metrics): +# metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] +# _write_json(tmp_dir, metric, "metric_t.json") +# run_copy_metrics( +# "metric_t.json", +# "metric.json", +# plots_no_cache=["metric.json"], +# commit="init", +# tag="v1", +# ) +# +# plot_string = dvc.plots.show(props={"fields": {"b"}})["metric.json"] +# +# plot_content = json.loads(plot_string) +# assert plot_content["data"]["values"] == [ +# {PlotData.INDEX_FIELD: 0, "b": 2, "rev": "workspace"}, +# {PlotData.INDEX_FIELD: 1, "b": 3, "rev": "workspace"}, +# ] +# assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD +# assert plot_content["encoding"]["y"]["field"] == "b" +# def test_raise_on_wrong_field(tmp_dir, scm, dvc, run_copy_metrics): metric = [{"val": 2}, {"val": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) - - with pytest.raises(NoFieldInDataError): - dvc.plots.show("metric.json", props={"x": "no_val"}) + data = { + "workspace": { + "data": {"file.json": {"data": metric, "props": {"x": "no_val"}}} + } + } with pytest.raises(NoFieldInDataError): - dvc.plots.show("metric.json", props={"y": "no_val"}) - - -def test_multiple_plots(tmp_dir, scm, dvc, run_copy_metrics): - metric1 = [ - OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), - OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), - ] - metric2 = [ - OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), - OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), - ] - _write_csv(metric1, "metric_t1.csv") - _write_json(tmp_dir, metric2, "metric_t2.json") - run_copy_metrics( - "metric_t1.csv", "metric1.csv", plots_no_cache=["metric1.csv"] - ) - run_copy_metrics( - "metric_t2.json", "metric2.json", plots_no_cache=["metric2.json"] - ) - - assert len(dvc.plots.show().keys()) == 2 - - -@pytest.mark.parametrize("use_dvc", [True, False]) -def test_show_non_plot(tmp_dir, scm, use_dvc): - metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] - _write_json(tmp_dir, metric, "metric.json") - - if use_dvc: - dvc = Repo.init() - else: - dvc = Repo(uninitialized=True) - - plot_string = dvc.plots.show(targets=["metric.json"])["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - { - "val": 2, - PlotData.INDEX_FIELD: 0, - "first_val": 100, - "rev": "workspace", - }, - { - "val": 3, - PlotData.INDEX_FIELD: 1, - "first_val": 200, - "rev": "workspace", - }, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "val" - - if not use_dvc: - assert not (tmp_dir / ".dvc").exists() - - -def test_show_non_plot_and_plot_with_params( - tmp_dir, scm, dvc, run_copy_metrics -): - metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] - _write_json(tmp_dir, metric, "metric.json") - run_copy_metrics( - "metric.json", "metric2.json", plots_no_cache=["metric2.json"] - ) - - dvc.plots.modify("metric2.json", props={"title": "TITLE"}) - result = dvc.plots.show(targets=["metric.json", "metric2.json"]) - - plot_content = json.loads(result["metric.json"]) - plot2_content = json.loads(result["metric2.json"]) - - assert plot2_content["title"] == "TITLE" - - assert plot_content != plot2_content - plot_content.pop("title") - plot2_content.pop("title") - assert plot_content == plot2_content - - -def test_show_no_repo(tmp_dir): - metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] - _write_json(tmp_dir, metric, "metric.json") - - dvc = Repo(uninitialized=True) - - dvc.plots.show(["metric.json"]) - - -def test_show_from_subdir(tmp_dir, dvc, capsys): - subdir = tmp_dir / "subdir" - - subdir.mkdir() - metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] - _write_json(subdir, metric, "metric.json") - - with subdir.chdir(): - assert main(["plots", "show", "metric.json"]) == 0 - - out, _ = capsys.readouterr() - assert subdir.as_uri() in out - assert (subdir / "plots.html").exists() - - -def test_show_malformed_plots(tmp_dir, scm, dvc, caplog): - tmp_dir.gen("plot.json", '[{"m":1}]') - scm.add(["plot.json"]) - scm.commit("initial") - - tmp_dir.gen("plot.json", '[{"m":1]') - - result = dvc.plots.show(targets=["plot.json"], revs=["workspace", "HEAD"]) - plot_content = json.loads(result["plot.json"]) - - assert plot_content["data"]["values"] == [ - {"m": 1, "rev": "HEAD", "step": 0} - ] - - -def test_plots_show_non_existing(tmp_dir, dvc): - assert dvc.plots.show(targets=["plot.json"]) == {} + VegaRenderer(data, dvc.plots.templates).get_vega() + + +# TODO move to collect? +# @pytest.mark.parametrize("use_dvc", [True, False]) +# def test_show_non_plot(tmp_dir, scm, use_dvc): +# metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] +# _write_json(tmp_dir, metric, "metric.json") +# +# if use_dvc: +# dvc = Repo.init() +# else: +# dvc = Repo(uninitialized=True) +# +# plot_string = dvc.plots.show(targets=["metric.json"])["metric.json"] +# +# plot_content = json.loads(plot_string) +# assert plot_content["data"]["values"] == [ +# { +# "val": 2, +# PlotData.INDEX_FIELD: 0, +# "first_val": 100, +# "rev": "workspace", +# }, +# { +# "val": 3, +# PlotData.INDEX_FIELD: 1, +# "first_val": 200, +# "rev": "workspace", +# }, +# ] +# assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD +# assert plot_content["encoding"]["y"]["field"] == "val" +# +# if not use_dvc: +# assert not (tmp_dir / ".dvc").exists() +# + +# TODO? +# def test_show_non_plot_and_plot_with_params( +# tmp_dir, scm, dvc, run_copy_metrics +# ): +# metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] +# _write_json(tmp_dir, metric, "metric.json") +# run_copy_metrics( +# "metric.json", "metric2.json", plots_no_cache=["metric2.json"] +# ) + +# dvc.plots.modify("metric2.json", props={"title": "TITLE"}) +# result = dvc.plots.show(targets=["metric.json", "metric2.json"]) + +# plot_content = json.loads(result["metric.json"]) +# plot2_content = json.loads(result["metric2.json"]) + +# assert plot2_content["title"] == "TITLE" + +# assert plot_content != plot2_content +# plot_content.pop("title") +# plot2_content.pop("title") +# assert plot_content == plot2_content + +# TODO? +# def test_show_no_repo(tmp_dir): +# metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] +# _write_json(tmp_dir, metric, "metric.json") + +# dvc = Repo(uninitialized=True) + +# dvc.plots.show(["metric.json"]) + + +# TODO? +# def test_show_from_subdir(tmp_dir, dvc, capsys): +# subdir = tmp_dir / "subdir" + +# subdir.mkdir() +# metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] +# _write_json(subdir, metric, "metric.json") + +# with subdir.chdir(): +# assert main(["plots", "show", "metric.json"]) == 0 + +# out, _ = capsys.readouterr() +# assert subdir.as_uri() in out +# assert (subdir / "dvc_plots").is_dir() +# assert (subdir / "dvc_plots" / "index.html").is_file() + +# TODO collect format +# def test_plots_show_non_existing(tmp_dir, dvc): +# assert dvc.plots.show(targets=["plot.json"]) == {} @pytest.mark.parametrize("clear_before_run", [True, False]) @@ -711,70 +522,72 @@ def test_plots_show_overlap(tmp_dir, dvc, run_copy_metrics, clear_before_run): ) -def test_dir_plots(tmp_dir, dvc, run_copy_metrics): - subdir = tmp_dir / "subdir" - subdir.mkdir() - - metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] - - fname = "file.json" - _write_json(tmp_dir, metric, fname) - - p1 = os.path.join("subdir", "p1.json") - p2 = os.path.join("subdir", "p2.json") - tmp_dir.dvc.run( - cmd=( - f"mkdir subdir && python copy.py {fname} {p1} && " - f"python copy.py {fname} {p2}" - ), - deps=[fname], - single_stage=False, - plots=["subdir"], - name="copy_double", - ) - dvc.plots.modify("subdir", {"title": "TITLE"}) - - result = dvc.plots.show() - p1_content = json.loads(result[p1]) - p2_content = json.loads(result[p2]) - - assert p1_content["title"] == p2_content["title"] == "TITLE" - - -def test_show_dir_plots(tmp_dir, dvc, run_copy_metrics): - subdir = tmp_dir / "subdir" - subdir.mkdir() - metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] - - fname = "file.json" - _write_json(tmp_dir, metric, fname) - - p1 = os.path.join("subdir", "p1.json") - p2 = os.path.join("subdir", "p2.json") - tmp_dir.dvc.run( - cmd=( - f"mkdir subdir && python copy.py {fname} {p1} && " - f"python copy.py {fname} {p2}" - ), - deps=[fname], - single_stage=False, - plots=["subdir"], - name="copy_double", - ) - - result = dvc.plots.show(targets=["subdir"]) - p1_content = json.loads(result[p1]) - p2_content = json.loads(result[p2]) - - assert p1_content == p2_content - - result = dvc.plots.show(targets=[p1]) - assert set(result.keys()) == {p1} - - remove(dvc.odb.local.cache_dir) - remove(subdir) - - assert dvc.plots.show() == {} +# TODO +# def test_dir_plots(tmp_dir, dvc, run_copy_metrics): +# subdir = tmp_dir / "subdir" +# subdir.mkdir() +# +# metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] +# +# fname = "file.json" +# _write_json(tmp_dir, metric, fname) +# +# p1 = os.path.join("subdir", "p1.json") +# p2 = os.path.join("subdir", "p2.json") +# tmp_dir.dvc.run( +# cmd=( +# f"mkdir subdir && python copy.py {fname} {p1} && " +# f"python copy.py {fname} {p2}" +# ), +# deps=[fname], +# single_stage=False, +# plots=["subdir"], +# name="copy_double", +# ) +# dvc.plots.modify("subdir", {"title": "TITLE"}) +# +# result = dvc.plots.show() +# p1_content = json.loads(result[p1]) +# p2_content = json.loads(result[p2]) +# +# assert p1_content["title"] == p2_content["title"] == "TITLE" + + +# TODO? +# def test_show_dir_plots(tmp_dir, dvc, run_copy_metrics): +# subdir = tmp_dir / "subdir" +# subdir.mkdir() +# metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] +# +# fname = "file.json" +# _write_json(tmp_dir, metric, fname) +# +# p1 = os.path.join("subdir", "p1.json") +# p2 = os.path.join("subdir", "p2.json") +# tmp_dir.dvc.run( +# cmd=( +# f"mkdir subdir && python copy.py {fname} {p1} && " +# f"python copy.py {fname} {p2}" +# ), +# deps=[fname], +# single_stage=False, +# plots=["subdir"], +# name="copy_double", +# ) +# +# result = dvc.plots.show(targets=["subdir"]) +# p1_content = json.loads(result[p1]) +# p2_content = json.loads(result[p2]) +# +# assert p1_content == p2_content +# +# result = dvc.plots.show(targets=[p1]) +# assert set(result.keys()) == {p1} +# +# remove(dvc.odb.local.cache_dir) +# remove(subdir) +# +# assert dvc.plots.show() == {} def test_ignore_binary_file(tmp_dir, dvc, run_copy_metrics): @@ -821,3 +634,43 @@ def test_log_errors( "DVC failed to load some plots for following revisions: 'workspace'." in error ) + + +def test_plots_binary(tmp_dir, scm, dvc, run_copy_metrics, custom_template): + with open("image.jpg", "wb") as fd: + fd.write(b"content") + + metric = [{"val": 2}, {"val": 3}] + _write_csv(metric, "metric_t.csv") + + dvc.add(["image.jpg", "metric_t.csv"]) + run_copy_metrics( + "metric_t.csv", + "metric.csv", + plots=["metric.csv"], + name="s1", + single_stage=False, + ) + run_copy_metrics( + "image.jpg", + "plot.jpg", + commit="run training", + plots=["plot.jpg"], + name="s2", + single_stage=False, + ) + dvc.plots.modify( + "metric.csv", props={"template": relpath(custom_template)} + ) + scm.add(["dvc.yaml", "dvc.lock"]) + scm.commit("initial") + + scm.tag("v1") + + with open("plot.jpg", "wb") as fd: + fd.write(b"content2") + + _write_csv([{"val": 3}, {"val": 4}], "metric.csv") + + # dvc.plots.show(revs=["v1", "workspace"]) + main(["plots", "diff", "v1"]) diff --git a/tests/func/test_live.py b/tests/func/test_live.py index 102a9adbe9..cbd04e29c2 100644 --- a/tests/func/test_live.py +++ b/tests/func/test_live.py @@ -6,6 +6,7 @@ from funcy import first from dvc import stage as stage_module +from dvc.repo.plots.render import get_files LIVE_SCRIPT = dedent( """ @@ -122,9 +123,10 @@ def test_live_provides_metrics(tmp_dir, dvc, live_stage): } assert (tmp_dir / "logs").is_dir() - plots = dvc.plots.show() - assert os.path.join("logs", "accuracy.tsv") in plots - assert os.path.join("logs", "loss.tsv") in plots + plots_data = dvc.plots.show() + files = get_files(plots_data) + assert os.path.join("logs", "accuracy.tsv") in files + assert os.path.join("logs", "loss.tsv") in files def test_live_provides_no_metrics(tmp_dir, dvc, live_stage): @@ -134,9 +136,10 @@ def test_live_provides_no_metrics(tmp_dir, dvc, live_stage): assert dvc.metrics.show() == {"": {}} assert (tmp_dir / "logs").is_dir() - plots = dvc.plots.show() - assert os.path.join("logs", "accuracy.tsv") in plots - assert os.path.join("logs", "loss.tsv") in plots + plots_data = dvc.plots.show() + files = get_files(plots_data) + assert os.path.join("logs", "accuracy.tsv") in files + assert os.path.join("logs", "loss.tsv") in files @pytest.mark.parametrize("typ", ("live", "live_no_cache")) @@ -156,7 +159,7 @@ def test_experiments_track_summary(tmp_dir, scm, dvc, live_stage, typ): def test_live_html(tmp_dir, dvc, live_stage, html): live_stage(html=html, live="logs") - assert (tmp_dir / "logs.html").is_file() == html + assert (tmp_dir / "logs_dvc_plots" / "index.html").is_file() == html @pytest.fixture diff --git a/tests/unit/command/test_plots.py b/tests/unit/command/test_plots.py index cff6b3fc50..de9e5708a3 100644 --- a/tests/unit/command/test_plots.py +++ b/tests/unit/command/test_plots.py @@ -5,9 +5,21 @@ from dvc.cli import parse_args from dvc.command.plots import CmdPlotsDiff, CmdPlotsShow +from dvc.path_info import PathInfo -def test_plots_diff(dvc, mocker): +@pytest.fixture +def plots_data(): + yield { + "revision": { + "data": { + "plot.csv": {"data": [{"val": 1}, {"val": 2}], "props": {}} + } + } + } + + +def test_plots_diff(dvc, mocker, plots_data): cli_args = parse_args( [ "plots", @@ -38,9 +50,7 @@ def test_plots_diff(dvc, mocker): assert cli_args.func == CmdPlotsDiff cmd = cli_args.func(cli_args) - m = mocker.patch( - "dvc.repo.plots.diff.diff", return_value={"datafile": "filledtemplate"} - ) + m = mocker.patch("dvc.repo.plots.diff.diff", return_value=plots_data) assert cmd.run() == 0 @@ -60,7 +70,7 @@ def test_plots_diff(dvc, mocker): ) -def test_plots_show_vega(dvc, mocker): +def test_plots_show_vega(dvc, mocker, plots_data): cli_args = parse_args( [ "plots", @@ -80,7 +90,7 @@ def test_plots_show_vega(dvc, mocker): m = mocker.patch( "dvc.repo.plots.Plots.show", - return_value={"datafile": "filledtemplate"}, + return_value=plots_data, ) assert cmd.run() == 0 @@ -91,7 +101,7 @@ def test_plots_show_vega(dvc, mocker): ) -def test_plots_diff_vega(dvc, mocker, capsys): +def test_plots_diff_vega(dvc, mocker, capsys, plots_data): cli_args = parse_args( [ "plots", @@ -100,72 +110,83 @@ def test_plots_diff_vega(dvc, mocker, capsys): "HEAD~1", "--show-vega", "--targets", - "plots.csv", + "plot.csv", ] ) cmd = cli_args.func(cli_args) + mocker.patch("dvc.repo.plots.diff.diff", return_value=plots_data) mocker.patch( - "dvc.repo.plots.diff.diff", return_value={"plots.csv": "plothtml"} + "dvc.command.plots.find_vega", return_value="vega_json_content" ) assert cmd.run() == 0 out, _ = capsys.readouterr() - assert "plothtml" in out + assert "vega_json_content" in out -def test_plots_diff_open(tmp_dir, dvc, mocker, capsys): + +def test_plots_diff_open(tmp_dir, dvc, mocker, capsys, plots_data): mocked_open = mocker.patch("webbrowser.open", return_value=True) - cli_args = parse_args(["plots", "diff", "--targets", "datafile", "--open"]) - cmd = cli_args.func(cli_args) - mocker.patch( - "dvc.repo.plots.diff.diff", return_value={"datafile": "filledtemplate"} + cli_args = parse_args( + ["plots", "diff", "--targets", "plots.csv", "--open"] ) + cmd = cli_args.func(cli_args) + mocker.patch("dvc.repo.plots.diff.diff", return_value=plots_data) - assert cmd.run() == 0 - mocked_open.assert_called_once_with("plots.html") + rel = PathInfo("dvc_plots") / "index.html" + absolute_path = tmp_dir / rel - expected_url = posixpath.join(tmp_dir.as_uri(), "plots.html") + assert cmd.run() == 0 + mocked_open.assert_called_once_with(absolute_path) out, _ = capsys.readouterr() - assert expected_url in out + assert absolute_path.as_uri() in out -def test_plots_diff_open_failed(tmp_dir, dvc, mocker, capsys): +def test_plots_diff_open_failed(tmp_dir, dvc, mocker, capsys, plots_data): mocked_open = mocker.patch("webbrowser.open", return_value=False) - cli_args = parse_args(["plots", "diff", "--targets", "datafile", "--open"]) + cli_args = parse_args( + ["plots", "diff", "--targets", "plots.csv", "--open"] + ) cmd = cli_args.func(cli_args) mocker.patch( - "dvc.repo.plots.diff.diff", return_value={"datafile": "filledtemplate"} + "dvc.repo.plots.diff.diff", return_value={"datafile": plots_data} ) assert cmd.run() == 1 - mocked_open.assert_called_once_with("plots.html") + expected_url = tmp_dir / "dvc_plots" / "index.html" + mocked_open.assert_called_once_with(expected_url) error_message = "Failed to open. Please try opening it manually." - expected_url = posixpath.join(tmp_dir.as_uri(), "plots.html") out, err = capsys.readouterr() - assert expected_url in out + assert expected_url.as_uri() in out assert error_message in err @pytest.mark.parametrize( "output, expected_url_path", [ - ("plots file with spaces.html", "plots%20file%20with%20spaces.html"), - (os.path.join("dir", "..", "plots.html"), "plots.html"), + ( + "plots file with spaces", + posixpath.join("plots%20file%20with%20spaces", "index.html"), + ), + ( + os.path.join("dir", "..", "plots"), + posixpath.join("plots", "index.html"), + ), ], ids=["quote", "resolve"], ) def test_plots_path_is_quoted_and_resolved_properly( - tmp_dir, dvc, mocker, capsys, output, expected_url_path + tmp_dir, dvc, mocker, capsys, output, expected_url_path, plots_data ): cli_args = parse_args( ["plots", "diff", "--targets", "datafile", "--out", output] ) cmd = cli_args.func(cli_args) mocker.patch( - "dvc.repo.plots.diff.diff", return_value={"datafile": "filledtemplate"} + "dvc.repo.plots.diff.diff", return_value={"datafile": plots_data} ) assert cmd.run() == 0 diff --git a/tests/unit/test_plots.py b/tests/unit/test_plots.py index b0e8f2049f..39ccc6d617 100644 --- a/tests/unit/test_plots.py +++ b/tests/unit/test_plots.py @@ -1,6 +1,8 @@ import json import os +from dvc.repo.plots.render import get_files + def test_plots_order(tmp_dir, dvc): tmp_dir.gen( @@ -26,9 +28,9 @@ def test_plots_order(tmp_dir, dvc): name="stage2", ) - assert list(dvc.plots.show()) == [ + assert get_files(dvc.plots.show()) == { "p.json", os.path.join("sub", "p4.json"), "p1.json", os.path.join("sub", "p3.json"), - ] + } From 629f16205f0363a47e1ba6d78a5685743df1ad56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Mon, 16 Aug 2021 17:10:01 +0200 Subject: [PATCH 02/16] plots: data: unify PlotsData --- dvc/command/plots.py | 6 ------ dvc/repo/plots/data.py | 31 ++---------------------------- tests/unit/repo/plots/test_data.py | 6 ++++-- 3 files changed, 6 insertions(+), 37 deletions(-) diff --git a/dvc/command/plots.py b/dvc/command/plots.py index 9cff8056cb..2c7b1040fe 100644 --- a/dvc/command/plots.py +++ b/dvc/command/plots.py @@ -66,12 +66,6 @@ def run(self): url = index_path.as_uri() ui.write(url) - # TODO show first vega plot - # if self.args.show_vega: - # target = self.args.targets[0] - # ui.write(plots[target]) - # return 0 - if self.args.open: import webbrowser diff --git a/dvc/repo/plots/data.py b/dvc/repo/plots/data.py index fc19ad6208..968377be38 100644 --- a/dvc/repo/plots/data.py +++ b/dvc/repo/plots/data.py @@ -1,4 +1,3 @@ -import os from copy import copy from funcy import first @@ -32,15 +31,7 @@ def __init__(self, path, revision): def plot_data(filename, revision, content): - _, extension = os.path.splitext(filename.lower()) - if extension in (".json", ".yaml"): - # TODO DO SOMETHING ABOUT THAT - return DictData(filename, revision, content) - if extension in (".csv", ".tsv"): - return ListData(filename, revision, content) - if extension in (".jpeg", ".jpg", ".gif", ".png"): - return BinaryData(filename, revision, content) - raise PlotMetricTypeError(filename) + return PlotData(filename, revision, content) def _filter_fields(data_points, filename, revision, fields=None, **kwargs): @@ -116,10 +107,8 @@ def __init__(self, filename, revision, content, **kwargs): self.revision = revision self.content = content - -class StructuredData(PlotData): def _processors(self): - return [_filter_fields, _append_index, _append_revision] + return [_find_data, _filter_fields, _append_index, _append_revision] def to_datapoints(self, **kwargs): data = self.content @@ -129,19 +118,3 @@ def to_datapoints(self, **kwargs): data, filename=self.filename, revision=self.revision, **kwargs ) return data - - -class DictData(StructuredData): - # For files usually parsed as dicts: eg JSON, Yaml - def _processors(self): - parent_processors = super()._processors() - return [_find_data] + parent_processors - - -class ListData(StructuredData): - # For files parsed as list: CSV, TSV - pass - - -class BinaryData(PlotData): - pass diff --git a/tests/unit/repo/plots/test_data.py b/tests/unit/repo/plots/test_data.py index 93f2975234..0e64146d28 100644 --- a/tests/unit/repo/plots/test_data.py +++ b/tests/unit/repo/plots/test_data.py @@ -3,7 +3,9 @@ import pytest -from dvc.repo.plots.data import DictData, _lists +from dvc.repo.plots.data import PlotData, _lists + +# TODO do we need those tests? @pytest.mark.parametrize( @@ -28,7 +30,7 @@ def test_find_data_in_dict(tmp_dir): m2 = [{"x": 1}, {"x": 2}] dmetric = OrderedDict([("t1", m1), ("t2", m2)]) - plot_data = DictData("-", "revision", dmetric) + plot_data = PlotData("-", "revision", dmetric) def points_with(datapoints: List, additional_info: Dict): for datapoint in datapoints: From c6be64ef763dc1fdb6d1dc7de73f9dbe4a9a3412 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 17 Aug 2021 10:22:54 +0200 Subject: [PATCH 03/16] plots: data conversion refactor --- dvc/command/plots.py | 1 - dvc/repo/plots/data.py | 55 ++++++++-------------- dvc/repo/plots/render.py | 39 +++++++--------- tests/func/plots/test_diff.py | 22 ++++----- tests/func/plots/test_show.py | 87 +++++++++++++++++++---------------- 5 files changed, 94 insertions(+), 110 deletions(-) diff --git a/dvc/command/plots.py b/dvc/command/plots.py index 2c7b1040fe..5774739a12 100644 --- a/dvc/command/plots.py +++ b/dvc/command/plots.py @@ -36,7 +36,6 @@ def run(self): return 1 try: - # TODO collect raw data and apply props later? plots_data = self._func( targets=self.args.targets, props=self._props() ) diff --git a/dvc/repo/plots/data.py b/dvc/repo/plots/data.py index 968377be38..6c71944979 100644 --- a/dvc/repo/plots/data.py +++ b/dvc/repo/plots/data.py @@ -1,9 +1,11 @@ -from copy import copy +from copy import copy, deepcopy from funcy import first from dvc.exceptions import DvcException -from dvc.utils.serialize import ParseError + +REVISION_FIELD = "rev" +INDEX_FIELD = "step" class PlotMetricTypeError(DvcException): @@ -22,18 +24,6 @@ def __init__(self): ) -class PlotParsingError(ParseError): - def __init__(self, path, revision): - self.path = path - self.revision = revision - - super().__init__(path, f"revision: '{revision}'") - - -def plot_data(filename, revision, content): - return PlotData(filename, revision, content) - - def _filter_fields(data_points, filename, revision, fields=None, **kwargs): if not fields: return data_points @@ -84,37 +74,30 @@ def _find_data(data, fields=None, **kwargs): def _append_index(data_points, append_index=False, **kwargs): - if not append_index or PlotData.INDEX_FIELD in first(data_points).keys(): + if not append_index or INDEX_FIELD in first(data_points).keys(): return data_points for index, data_point in enumerate(data_points): - data_point[PlotData.INDEX_FIELD] = index + data_point[INDEX_FIELD] = index return data_points def _append_revision(data_points, revision, **kwargs): for data_point in data_points: - data_point[PlotData.REVISION_FIELD] = revision + data_point[REVISION_FIELD] = revision return data_points -class PlotData: - REVISION_FIELD = "rev" - INDEX_FIELD = "step" - - def __init__(self, filename, revision, content, **kwargs): - self.filename = filename - self.revision = revision - self.content = content - - def _processors(self): - return [_find_data, _filter_fields, _append_index, _append_revision] - - def to_datapoints(self, **kwargs): - data = self.content +def to_datapoints(data, revision, filename, **kwargs): + result = deepcopy(data) + for processor in [ + _find_data, + _filter_fields, + _append_index, + _append_revision, + ]: + result = processor( + result, revision=revision, filename=filename, **kwargs + ) - for data_proc in self._processors(): - data = data_proc( - data, filename=self.filename, revision=self.revision, **kwargs - ) - return data + return result diff --git a/dvc/repo/plots/render.py b/dvc/repo/plots/render.py index 3361259b76..e7418b6ee3 100644 --- a/dvc/repo/plots/render.py +++ b/dvc/repo/plots/render.py @@ -4,7 +4,7 @@ import dpath from funcy import first -from dvc.repo.plots.data import PlotData, plot_data +from dvc.repo.plots.data import INDEX_FIELD, REVISION_FIELD, to_datapoints from dvc.utils import relpath from dvc.utils.html import VEGA_DIV_HTML @@ -62,19 +62,6 @@ def render(repo, plots_data, metrics=None, path=None, html_template_path=None): ) -def _resolve_props(data): - # TODO resolving props from latest - resolved = None - for _, rev_data in data.items(): - for _, file_data in rev_data.get("data", {}).items(): - props = file_data.get("props") - if resolved is None: - resolved = props - else: - resolved = {**resolved, **props} - return resolved - - class VegaRenderer: def __init__(self, data, templates=None): self.data = data @@ -84,10 +71,17 @@ def __init__(self, data, templates=None): assert len(files) == 1 self.filename = files.pop() - # TODO RETURN dict? + def _squash_props(self): + resolved = {} + for rev_data in self.data.values(): + for file_data in rev_data.get("data", {}).values(): + props = file_data.get("props", {}) + resolved = {**resolved, **props} + return resolved + def get_vega(self): - # TODO - props = _resolve_props(self.data) + props = self._squash_props() + template = self.templates.load(props.get("template") or "default") fields = props.get("fields") if fields is not None: @@ -95,16 +89,17 @@ def get_vega(self): if not props.get("x") and template.has_anchor("x"): props["append_index"] = True - props["x"] = PlotData.INDEX_FIELD + props["x"] = INDEX_FIELD datapoints = [] for rev, rev_data in self.data.items(): for file, file_data in rev_data.get("data", {}).items(): if "data" in file_data: datapoints.extend( - plot_data( - file, rev, file_data.get("data", []) - ).to_datapoints( + to_datapoints( + file_data.get("data", []), + revision=rev, + filename=file, fields=fields, path=props.get("path"), append_index=props.get("append_index", False), @@ -114,7 +109,7 @@ def get_vega(self): if datapoints: if not props.get("y") and template.has_anchor("y"): fields = list(first(datapoints)) - skip = (PlotData.REVISION_FIELD, props.get("x")) + skip = (REVISION_FIELD, props.get("x")) props["y"] = first( f for f in reversed(fields) if f not in skip ) diff --git a/tests/func/plots/test_diff.py b/tests/func/plots/test_diff.py index efce7d4aa3..c8b62b70cb 100644 --- a/tests/func/plots/test_diff.py +++ b/tests/func/plots/test_diff.py @@ -2,7 +2,7 @@ import pytest -from dvc.repo.plots.data import PlotData +from dvc.repo.plots.data import INDEX_FIELD, REVISION_FIELD from tests.func.plots.utils import _write_json @@ -37,12 +37,12 @@ def test_diff_dirty(tmp_dir, scm, dvc, run_copy_metrics): plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"y": 3, PlotData.INDEX_FIELD: 0, "rev": "HEAD"}, - {"y": 5, PlotData.INDEX_FIELD: 1, "rev": "HEAD"}, - {"y": 5, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, - {"y": 6, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, + {"y": 3, INDEX_FIELD: 0, REVISION_FIELD: "HEAD"}, + {"y": 5, INDEX_FIELD: 1, REVISION_FIELD: "HEAD"}, + {"y": 5, INDEX_FIELD: 0, REVISION_FIELD: "workspace"}, + {"y": 6, INDEX_FIELD: 1, REVISION_FIELD: "workspace"}, ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD assert plot_content["encoding"]["y"]["field"] == "y" _write_json(tmp_dir, [{"y": 7}, {"y": 8}], "metric.json") @@ -51,10 +51,10 @@ def test_diff_dirty(tmp_dir, scm, dvc, run_copy_metrics): plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"y": 3, PlotData.INDEX_FIELD: 0, "rev": "HEAD"}, - {"y": 5, PlotData.INDEX_FIELD: 1, "rev": "HEAD"}, - {"y": 7, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, - {"y": 8, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, + {"y": 3, INDEX_FIELD: 0, REVISION_FIELD: "HEAD"}, + {"y": 5, INDEX_FIELD: 1, REVISION_FIELD: "HEAD"}, + {"y": 7, INDEX_FIELD: 0, REVISION_FIELD: "workspace"}, + {"y": 8, INDEX_FIELD: 1, REVISION_FIELD: "workspace"}, ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD assert plot_content["encoding"]["y"]["field"] == "y" diff --git a/tests/func/plots/test_show.py b/tests/func/plots/test_show.py index 72808827ac..fc646ed5f8 100644 --- a/tests/func/plots/test_show.py +++ b/tests/func/plots/test_show.py @@ -9,7 +9,11 @@ from dvc.exceptions import OverlappingOutputPathsError from dvc.main import main from dvc.path_info import PathInfo -from dvc.repo.plots.data import PlotData, PlotMetricTypeError +from dvc.repo.plots.data import ( + INDEX_FIELD, + REVISION_FIELD, + PlotMetricTypeError, +) from dvc.repo.plots.render import VegaRenderer from dvc.repo.plots.template import ( BadTemplateError, @@ -49,10 +53,10 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc, run_copy_metrics): plot_content = json.loads(plot_string) assert plot_content["title"] == "mytitle" assert plot_content["data"]["values"] == [ - {"val": 2, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, - {"val": 3, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, + {"val": 2, INDEX_FIELD: 0, REVISION_FIELD: "workspace"}, + {"val": 3, INDEX_FIELD: 1, REVISION_FIELD: "workspace"}, ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD assert plot_content["encoding"]["y"]["field"] == "val" assert plot_content["encoding"]["x"]["title"] == "x_title" assert plot_content["encoding"]["y"]["title"] == "y_title" @@ -74,20 +78,20 @@ def test_plot_csv_multiple_columns(tmp_dir, scm, dvc, run_copy_metrics): assert plot_content["data"]["values"] == [ { "val": 2, - PlotData.INDEX_FIELD: 0, - "rev": "workspace", + INDEX_FIELD: 0, + REVISION_FIELD: "workspace", "first_val": 100, "second_val": 100, }, { "val": 3, - PlotData.INDEX_FIELD: 1, - "rev": "workspace", + INDEX_FIELD: 1, + REVISION_FIELD: "workspace", "first_val": 200, "second_val": 300, }, ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD assert plot_content["encoding"]["y"]["field"] == "val" @@ -108,13 +112,13 @@ def test_plot_csv_choose_axes(tmp_dir, scm, dvc, run_copy_metrics): assert plot_content["data"]["values"] == [ { "val": 2, - "rev": "workspace", + REVISION_FIELD: "workspace", "first_val": 100, "second_val": 100, }, { "val": 3, - "rev": "workspace", + REVISION_FIELD: "workspace", "first_val": 200, "second_val": 300, }, @@ -140,8 +144,8 @@ def test_plot_confusion(tmp_dir, dvc, run_copy_metrics): plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"predicted": "B", "actual": "A", "rev": "workspace"}, - {"predicted": "A", "actual": "A", "rev": "workspace"}, + {"predicted": "B", "actual": "A", REVISION_FIELD: "workspace"}, + {"predicted": "A", "actual": "A", REVISION_FIELD: "workspace"}, ] assert plot_content["spec"]["transform"][0]["groupby"] == [ "actual", @@ -173,14 +177,17 @@ def test_plot_confusion_normalized(tmp_dir, dvc, run_copy_metrics): plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"predicted": "B", "actual": "A", "rev": "workspace"}, - {"predicted": "A", "actual": "A", "rev": "workspace"}, + {"predicted": "B", "actual": "A", REVISION_FIELD: "workspace"}, + {"predicted": "A", "actual": "A", REVISION_FIELD: "workspace"}, ] assert plot_content["spec"]["transform"][0]["groupby"] == [ "actual", "predicted", ] - assert plot_content["spec"]["transform"][1]["groupby"] == ["rev", "actual"] + assert plot_content["spec"]["transform"][1]["groupby"] == [ + REVISION_FIELD, + "actual", + ] assert plot_content["spec"]["encoding"]["x"]["field"] == "predicted" assert plot_content["spec"]["encoding"]["y"]["field"] == "actual" @@ -212,14 +219,14 @@ def test_plot_multiple_revs_default(tmp_dir, scm, dvc, run_copy_metrics): plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"y": 5, PlotData.INDEX_FIELD: 0, "rev": "HEAD"}, - {"y": 6, PlotData.INDEX_FIELD: 1, "rev": "HEAD"}, - {"y": 3, PlotData.INDEX_FIELD: 0, "rev": "v2"}, - {"y": 5, PlotData.INDEX_FIELD: 1, "rev": "v2"}, - {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v1"}, - {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v1"}, + {"y": 5, INDEX_FIELD: 0, REVISION_FIELD: "HEAD"}, + {"y": 6, INDEX_FIELD: 1, REVISION_FIELD: "HEAD"}, + {"y": 3, INDEX_FIELD: 0, REVISION_FIELD: "v2"}, + {"y": 5, INDEX_FIELD: 1, REVISION_FIELD: "v2"}, + {"y": 2, INDEX_FIELD: 0, REVISION_FIELD: "v1"}, + {"y": 3, INDEX_FIELD: 1, REVISION_FIELD: "v1"}, ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD assert plot_content["encoding"]["y"]["field"] == "y" @@ -239,10 +246,10 @@ def test_plot_even_if_metric_missing( plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v2"}, - {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v2"}, + {"y": 2, INDEX_FIELD: 0, REVISION_FIELD: "v2"}, + {"y": 3, INDEX_FIELD: 1, REVISION_FIELD: "v2"}, ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD assert plot_content["encoding"]["y"]["field"] == "y" @@ -274,8 +281,8 @@ def test_plot_even_if_metric_missing( # plots = dvc.plots.show(revs=["v1", "v2"], targets=["metric.json"]) # plot_content = json.loads(plots["metric.json"]) # assert plot_content["data"]["values"] == [ -# {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v1"}, -# {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v1"}, +# {"y": 2, INDEX_FIELD: 0, REVISION_FIELD: "v1"}, +# {"y": 3, INDEX_FIELD: 1, REVISION_FIELD: "v1"}, # ] @@ -290,8 +297,8 @@ def test_custom_template(tmp_dir, scm, dvc, custom_template): plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"a": 1, "b": 2, "rev": "workspace"}, - {"a": 2, "b": 3, "rev": "workspace"}, + {"a": 1, "b": 2, REVISION_FIELD: "workspace"}, + {"a": 2, "b": 3, REVISION_FIELD: "workspace"}, ] assert plot_content["encoding"]["x"]["field"] == "a" assert plot_content["encoding"]["y"]["field"] == "b" @@ -355,8 +362,8 @@ def test_plot_choose_columns( plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"b": 2, "c": 3, "rev": "workspace"}, - {"b": 3, "c": 4, "rev": "workspace"}, + {"b": 2, "c": 3, REVISION_FIELD: "workspace"}, + {"b": 3, "c": 4, REVISION_FIELD: "workspace"}, ] assert plot_content["encoding"]["x"]["field"] == "b" assert plot_content["encoding"]["y"]["field"] == "c" @@ -378,10 +385,10 @@ def test_plot_choose_columns( # # plot_content = json.loads(plot_string) # assert plot_content["data"]["values"] == [ -# {PlotData.INDEX_FIELD: 0, "b": 2, "rev": "workspace"}, -# {PlotData.INDEX_FIELD: 1, "b": 3, "rev": "workspace"}, +# {INDEX_FIELD: 0, "b": 2, REVISION_FIELD: "workspace"}, +# {INDEX_FIELD: 1, "b": 3, REVISION_FIELD: "workspace"}, # ] -# assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD +# assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD # assert plot_content["encoding"]["y"]["field"] == "b" # @@ -415,18 +422,18 @@ def test_raise_on_wrong_field(tmp_dir, scm, dvc, run_copy_metrics): # assert plot_content["data"]["values"] == [ # { # "val": 2, -# PlotData.INDEX_FIELD: 0, +# INDEX_FIELD: 0, # "first_val": 100, -# "rev": "workspace", +# REVISION_FIELD: "workspace", # }, # { # "val": 3, -# PlotData.INDEX_FIELD: 1, +# INDEX_FIELD: 1, # "first_val": 200, -# "rev": "workspace", +# REVISION_FIELD: "workspace", # }, # ] -# assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD +# assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD # assert plot_content["encoding"]["y"]["field"] == "val" # # if not use_dvc: From ed6c43e8a076711ea4c2854bbee7ec536acafc84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 17 Aug 2021 17:44:58 +0200 Subject: [PATCH 04/16] plots: renderers: abstract functionality --- dvc/repo/plots/render.py | 125 ++++++++++++++++++----------- dvc/utils/html.py | 23 +++--- tests/unit/repo/plots/test_data.py | 12 ++- 3 files changed, 91 insertions(+), 69 deletions(-) diff --git a/dvc/repo/plots/render.py b/dvc/repo/plots/render.py index e7418b6ee3..04af3e6478 100644 --- a/dvc/repo/plots/render.py +++ b/dvc/repo/plots/render.py @@ -1,17 +1,21 @@ import logging import os.path +from typing import TYPE_CHECKING, Dict, List, Optional, Set import dpath from funcy import first from dvc.repo.plots.data import INDEX_FIELD, REVISION_FIELD, to_datapoints from dvc.utils import relpath -from dvc.utils.html import VEGA_DIV_HTML + +if TYPE_CHECKING: + from dvc.repo.plots.template import PlotTemplates + from dvc.types import StrPath logger = logging.getLogger(__name__) -def get_files(data): +def get_files(data: Dict) -> Set: files = set() for rev in data.keys(): for file in data[rev].get("data", {}).keys(): @@ -19,7 +23,7 @@ def get_files(data): return files -def group(data): +def group(data: Dict) -> List[Dict]: files = get_files(data) grouped = [] for file in files: @@ -36,18 +40,18 @@ def find_vega(repo, plots_data, target): return "" -def prepare_renderers(plots_data, templates, path): +def match_renderers(plots_data, templates): renderers = [] for g in group(plots_data): if VegaRenderer.matches(g): renderers.append(VegaRenderer(g, templates)) if ImageRenderer.matches(g): - renderers.append(ImageRenderer(g, document_path=path)) + renderers.append(ImageRenderer(g)) return renderers def render(repo, plots_data, metrics=None, path=None, html_template_path=None): - renderers = prepare_renderers(plots_data, repo.plots.templates, path) + renderers = match_renderers(plots_data, repo.plots.templates) if not html_template_path: html_template_path = repo.config.get("plots", {}).get( "html_template", None @@ -62,24 +66,52 @@ def render(repo, plots_data, metrics=None, path=None, html_template_path=None): ) -class VegaRenderer: - def __init__(self, data, templates=None): +class Renderer: + def __init__(self, data: Dict): self.data = data - self.templates = templates + # we assume comparison of same file between revisions files = get_files(self.data) assert len(files) == 1 self.filename = files.pop() - def _squash_props(self): - resolved = {} + def _convert(self, path): + raise NotImplementedError + + @property + def DIV(self): + raise NotImplementedError + + def generate_html(self, path): + """this method might edit content of path""" + partial = self._convert(path) + div_id = f"plot_{self.filename.replace('.', '_').replace('/', '_')}" + return self.DIV.format(id=div_id, partial=partial) + + +class VegaRenderer(Renderer): + DIV = """ +
+ +
+ """ + + def __init__(self, data: Dict, templates: "PlotTemplates"): + super().__init__(data) + self.templates = templates + + def _squash_props(self) -> Dict: + resolved: Dict[str, str] = {} for rev_data in self.data.values(): for file_data in rev_data.get("data", {}).values(): props = file_data.get("props", {}) resolved = {**resolved, **props} return resolved - def get_vega(self): + def get_vega(self) -> Optional[str]: props = self._squash_props() template = self.templates.load(props.get("template") or "default") @@ -116,16 +148,8 @@ def get_vega(self): return template.render(datapoints, props=props) return None - # TODO this name - def get_html(self): - plot_string = self.get_vega() - if plot_string: - html = VEGA_DIV_HTML.format( - id=f"plot_{self.filename.replace('.', '_').replace('/', '_')}", - vega_json=plot_string, - ) - return html - return None + def _convert(self, path): + return self.get_vega() @staticmethod def matches(data): @@ -134,42 +158,45 @@ def matches(data): return extensions.issubset({".yml", ".yaml", ".json", ".csv", ".tsv"}) -class ImageRenderer: - def __init__(self, data, document_path=None): - self.document_path = document_path - self.data = data +class ImageRenderer(Renderer): + def _image(self, revision, image_path): + return """ +
+

{title}

+ +
""".format( + title=revision, src=image_path + ) - def get_html(self): - static = os.path.join(self.document_path, "static") + DIV = """ +
+ {partial} +
""" + + def _convert(self, path: "StrPath"): + static = os.path.join(path, "static") os.makedirs(static, exist_ok=True) - div = "" + div_content = [] for rev, rev_data in self.data.items(): if "data" in rev_data: for file, file_data in rev_data.get("data", {}).items(): if "data" in file_data: - if not div: - div += f"

{file}

" - img_path = os.path.join( + img_save_path = os.path.join( static, f"{rev}_{file.replace('/', '_')}" ) - rel_img_path = relpath(img_path, self.document_path) - with open(img_path, "wb") as fd: + rel_img_path = relpath(img_save_path, path) + with open(img_save_path, "wb") as fd: fd.write(file_data["data"]) - div += ( - f"

{rev}

" - f'
' - ) - if div: - div = ( - '
' - f"{div}" - "
" - ) - return div + div_content.append(self._image(rev, rel_img_path)) + if div_content: + div_content.insert(0, f"

{self.filename}

") + return "\n".join(div_content) + return "" @staticmethod def matches(data): diff --git a/dvc/utils/html.py b/dvc/utils/html.py index 1a85155f5f..2fab347276 100644 --- a/dvc/utils/html.py +++ b/dvc/utils/html.py @@ -1,9 +1,12 @@ import os from pathlib import Path -from typing import Dict, List, Optional +from typing import TYPE_CHECKING, Dict, List, Optional from dvc.exceptions import DvcException -from dvc.types import StrPath + +if TYPE_CHECKING: + from dvc.repo.plots.render import Renderer + from dvc.types import StrPath PAGE_HTML = """ @@ -18,12 +21,6 @@ """ -VEGA_DIV_HTML = """
-""" - class MissingPlaceholderError(DvcException): def __init__(self, placeholder): @@ -68,10 +65,10 @@ def embed(self) -> str: def write( - path: StrPath, - plots: list, + path: "StrPath", + renderers: List["Renderer"], metrics: Optional[Dict[str, Dict]] = None, - template_path: Optional[StrPath] = None, + template_path: Optional["StrPath"] = None, ): os.makedirs(path, exist_ok=True) @@ -86,8 +83,8 @@ def write( document.with_metrics(metrics) document.with_element("
") - for renderer in plots: - document.with_element(renderer.get_html()) + for renderer in renderers: + document.with_element(renderer.generate_html(path)) index = Path(os.path.join(path, "index.html")) diff --git a/tests/unit/repo/plots/test_data.py b/tests/unit/repo/plots/test_data.py index 0e64146d28..806e0083b6 100644 --- a/tests/unit/repo/plots/test_data.py +++ b/tests/unit/repo/plots/test_data.py @@ -3,7 +3,7 @@ import pytest -from dvc.repo.plots.data import PlotData, _lists +from dvc.repo.plots.data import _lists, to_datapoints # TODO do we need those tests? @@ -30,17 +30,15 @@ def test_find_data_in_dict(tmp_dir): m2 = [{"x": 1}, {"x": 2}] dmetric = OrderedDict([("t1", m1), ("t2", m2)]) - plot_data = PlotData("-", "revision", dmetric) - def points_with(datapoints: List, additional_info: Dict): for datapoint in datapoints: datapoint.update(additional_info) return datapoints - assert list(map(dict, plot_data.to_datapoints())) == points_with( - m1, {"rev": "revision"} - ) assert list( - map(dict, plot_data.to_datapoints(fields={"x"})) + map(dict, to_datapoints(dmetric, "revision", "file")) + ) == points_with(m1, {"rev": "revision"}) + assert list( + map(dict, to_datapoints(dmetric, "revision", "file", fields={"x"})) ) == points_with(m2, {"rev": "revision"}) From 381b05c25c5b34c6476a4910713511a388396e9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 18 Aug 2021 14:33:53 +0200 Subject: [PATCH 05/16] plots: uncommenting tests --- dvc/repo/collect.py | 2 +- dvc/repo/plots/__init__.py | 5 +- dvc/utils/html.py | 1 - tests/func/plots/test_show.py | 331 +++++++++++------------------ tests/unit/repo/plots/test_data.py | 2 - 5 files changed, 126 insertions(+), 215 deletions(-) diff --git a/dvc/repo/collect.py b/dvc/repo/collect.py index dac019972f..f1a99b9528 100644 --- a/dvc/repo/collect.py +++ b/dvc/repo/collect.py @@ -42,6 +42,7 @@ def _collect_paths( if recursive and fs.isdir(path_info): target_infos.extend(repo.dvcignore.walk_files(fs, path_info)) + # TODO should we check it at all? we will get FileNotFound in results if not fs.exists(path_info): if rev == "workspace" or rev == "": logger.warning( @@ -49,7 +50,6 @@ def _collect_paths( ) else: logger.warning("'%s' was not found at: '%s'.", path_info, rev) - continue target_infos.append(path_info) return target_infos diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index bf494243ea..9fa5e22924 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -135,12 +135,11 @@ def show( ): if onerror is None: onerror = onerror_collect - data = self.collect( + + return self.collect( targets, revs, recursive, onerror=onerror, props=props ) - return data - def diff(self, *args, **kwargs): from .diff import diff diff --git a/dvc/utils/html.py b/dvc/utils/html.py index 2fab347276..0c7f915e46 100644 --- a/dvc/utils/html.py +++ b/dvc/utils/html.py @@ -88,7 +88,6 @@ def write( index = Path(os.path.join(path, "index.html")) - # TODO remember to try/catch remove on exception with open(index, "w") as fd: fd.write(document.embed()) return index diff --git a/tests/func/plots/test_show.py b/tests/func/plots/test_show.py index fc646ed5f8..230342b597 100644 --- a/tests/func/plots/test_show.py +++ b/tests/func/plots/test_show.py @@ -9,6 +9,7 @@ from dvc.exceptions import OverlappingOutputPathsError from dvc.main import main from dvc.path_info import PathInfo +from dvc.repo import Repo from dvc.repo.plots.data import ( INDEX_FIELD, REVISION_FIELD, @@ -20,7 +21,7 @@ NoFieldInDataError, TemplateNotFoundError, ) -from dvc.utils import onerror_collect, relpath +from dvc.utils import onerror_collect from dvc.utils.fs import remove from dvc.utils.serialize import ( EncodingError, @@ -28,7 +29,7 @@ dump_yaml, modify_yaml, ) -from tests.func.plots.utils import _write_csv +from tests.func.plots.utils import _write_json # RENDER @@ -253,37 +254,35 @@ def test_plot_even_if_metric_missing( assert plot_content["encoding"]["y"]["field"] == "y" -# TODO to collect? -# def test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): -# metric = [{"y": 2}, {"y": 3}] -# _write_json(tmp_dir, metric, "metric_t.json") -# stage = run_copy_metrics( -# "metric_t.json", -# "metric.json", -# plots=["metric.json"], -# commit="there is metric", -# ) -# scm.tag("v1") -# -# Make a different plot and then remove its datafile -# metric = [{"y": 3}, {"y": 4}] -# _write_json(tmp_dir, metric, "metric_t.json") -# stage = run_copy_metrics( -# "metric_t.json", -# "metric.json", -# plots=["metric.json"], -# commit="there is an another metric", -# ) -# scm.tag("v2") -# remove(stage.outs[0].fspath) -# remove(stage.outs[0].cache_path) -# -# plots = dvc.plots.show(revs=["v1", "v2"], targets=["metric.json"]) -# plot_content = json.loads(plots["metric.json"]) -# assert plot_content["data"]["values"] == [ -# {"y": 2, INDEX_FIELD: 0, REVISION_FIELD: "v1"}, -# {"y": 3, INDEX_FIELD: 1, REVISION_FIELD: "v1"}, -# ] +def test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): + metric1 = [{"y": 2}, {"y": 3}] + _write_json(tmp_dir, metric1, "metric_t.json") + run_copy_metrics( + "metric_t.json", + "metric.json", + plots=["metric.json"], + commit="there is metric", + ) + scm.tag("v1") + + # Make a different plot and then remove its datafile + metric2 = [{"y": 3}, {"y": 4}] + _write_json(tmp_dir, metric2, "metric_t.json") + stage = run_copy_metrics( + "metric_t.json", + "metric.json", + plots=["metric.json"], + commit="there is an another metric", + ) + scm.tag("v2") + remove(stage.outs[0].fspath) + remove(stage.outs[0].cache_path) + + plots_data = dvc.plots.show(revs=["v1", "v2"], targets=["metric.json"]) + assert plots_data["v1"]["data"]["metric.json"]["data"] == metric1 + assert isinstance( + plots_data["v2"]["data"]["metric.json"]["error"], FileNotFoundError + ) def test_custom_template(tmp_dir, scm, dvc, custom_template): @@ -370,26 +369,25 @@ def test_plot_choose_columns( # TODO ?? -# def test_plot_default_choose_column(tmp_dir, scm, dvc, run_copy_metrics): -# metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] -# _write_json(tmp_dir, metric, "metric_t.json") -# run_copy_metrics( -# "metric_t.json", -# "metric.json", -# plots_no_cache=["metric.json"], -# commit="init", -# tag="v1", -# ) -# -# plot_string = dvc.plots.show(props={"fields": {"b"}})["metric.json"] -# -# plot_content = json.loads(plot_string) -# assert plot_content["data"]["values"] == [ -# {INDEX_FIELD: 0, "b": 2, REVISION_FIELD: "workspace"}, -# {INDEX_FIELD: 1, "b": 3, REVISION_FIELD: "workspace"}, -# ] -# assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD -# assert plot_content["encoding"]["y"]["field"] == "b" +def test_plot_default_choose_column(tmp_dir, scm, dvc, run_copy_metrics): + metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] + data = { + "workspace": { + "data": {"file.json": {"data": metric, "props": {"fields": {"b"}}}} + } + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + plot_content = json.loads(plot_string) + + assert plot_content["data"]["values"] == [ + {INDEX_FIELD: 0, "b": 2, REVISION_FIELD: "workspace"}, + {INDEX_FIELD: 1, "b": 3, REVISION_FIELD: "workspace"}, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "b" + + # @@ -405,93 +403,61 @@ def test_raise_on_wrong_field(tmp_dir, scm, dvc, run_copy_metrics): VegaRenderer(data, dvc.plots.templates).get_vega() -# TODO move to collect? -# @pytest.mark.parametrize("use_dvc", [True, False]) -# def test_show_non_plot(tmp_dir, scm, use_dvc): -# metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] -# _write_json(tmp_dir, metric, "metric.json") -# -# if use_dvc: -# dvc = Repo.init() -# else: -# dvc = Repo(uninitialized=True) -# -# plot_string = dvc.plots.show(targets=["metric.json"])["metric.json"] -# -# plot_content = json.loads(plot_string) -# assert plot_content["data"]["values"] == [ -# { -# "val": 2, -# INDEX_FIELD: 0, -# "first_val": 100, -# REVISION_FIELD: "workspace", -# }, -# { -# "val": 3, -# INDEX_FIELD: 1, -# "first_val": 200, -# REVISION_FIELD: "workspace", -# }, -# ] -# assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD -# assert plot_content["encoding"]["y"]["field"] == "val" -# -# if not use_dvc: -# assert not (tmp_dir / ".dvc").exists() -# +@pytest.mark.parametrize("use_dvc", [True, False]) +def test_show_non_plot(tmp_dir, scm, use_dvc): + metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] + _write_json(tmp_dir, metric, "metric.json") -# TODO? -# def test_show_non_plot_and_plot_with_params( -# tmp_dir, scm, dvc, run_copy_metrics -# ): -# metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] -# _write_json(tmp_dir, metric, "metric.json") -# run_copy_metrics( -# "metric.json", "metric2.json", plots_no_cache=["metric2.json"] -# ) + if use_dvc: + dvc = Repo.init() + else: + dvc = Repo(uninitialized=True) -# dvc.plots.modify("metric2.json", props={"title": "TITLE"}) -# result = dvc.plots.show(targets=["metric.json", "metric2.json"]) + plots = dvc.plots.show(targets=["metric.json"]) -# plot_content = json.loads(result["metric.json"]) -# plot2_content = json.loads(result["metric2.json"]) + assert plots["workspace"]["data"]["metric.json"]["data"] == metric -# assert plot2_content["title"] == "TITLE" -# assert plot_content != plot2_content -# plot_content.pop("title") -# plot2_content.pop("title") -# assert plot_content == plot2_content +def test_show_non_plot_and_plot_with_params( + tmp_dir, scm, dvc, run_copy_metrics +): + metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] + _write_json(tmp_dir, metric, "metric.json") + run_copy_metrics( + "metric.json", "metric2.json", plots_no_cache=["metric2.json"] + ) + props = {"title": "TITLE"} + dvc.plots.modify("metric2.json", props=props) -# TODO? -# def test_show_no_repo(tmp_dir): -# metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] -# _write_json(tmp_dir, metric, "metric.json") + result = dvc.plots.show(targets=["metric.json", "metric2.json"]) + assert "metric.json" in result["workspace"]["data"] + assert "metric2.json" in result["workspace"]["data"] + assert result["workspace"]["data"]["metric2.json"]["props"] == props -# dvc = Repo(uninitialized=True) -# dvc.plots.show(["metric.json"]) +def test_show_from_subdir(tmp_dir, dvc, capsys): + subdir = tmp_dir / "subdir" + subdir.mkdir() + metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] + _write_json(subdir, metric, "metric.json") -# TODO? -# def test_show_from_subdir(tmp_dir, dvc, capsys): -# subdir = tmp_dir / "subdir" + with subdir.chdir(): + assert main(["plots", "show", "metric.json"]) == 0 -# subdir.mkdir() -# metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] -# _write_json(subdir, metric, "metric.json") + out, _ = capsys.readouterr() + assert subdir.as_uri() in out + assert (subdir / "dvc_plots").is_dir() + assert (subdir / "dvc_plots" / "index.html").is_file() -# with subdir.chdir(): -# assert main(["plots", "show", "metric.json"]) == 0 -# out, _ = capsys.readouterr() -# assert subdir.as_uri() in out -# assert (subdir / "dvc_plots").is_dir() -# assert (subdir / "dvc_plots" / "index.html").is_file() +def test_plots_show_non_existing(tmp_dir, dvc, caplog): + result = dvc.plots.show(targets=["plot.json"]) + assert isinstance( + result["workspace"]["data"]["plot.json"]["error"], FileNotFoundError + ) -# TODO collect format -# def test_plots_show_non_existing(tmp_dir, dvc): -# assert dvc.plots.show(targets=["plot.json"]) == {} + assert "'plot.json' was not found in current workspace." in caplog.text @pytest.mark.parametrize("clear_before_run", [True, False]) @@ -529,72 +495,34 @@ def test_plots_show_overlap(tmp_dir, dvc, run_copy_metrics, clear_before_run): ) -# TODO -# def test_dir_plots(tmp_dir, dvc, run_copy_metrics): -# subdir = tmp_dir / "subdir" -# subdir.mkdir() -# -# metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] -# -# fname = "file.json" -# _write_json(tmp_dir, metric, fname) -# -# p1 = os.path.join("subdir", "p1.json") -# p2 = os.path.join("subdir", "p2.json") -# tmp_dir.dvc.run( -# cmd=( -# f"mkdir subdir && python copy.py {fname} {p1} && " -# f"python copy.py {fname} {p2}" -# ), -# deps=[fname], -# single_stage=False, -# plots=["subdir"], -# name="copy_double", -# ) -# dvc.plots.modify("subdir", {"title": "TITLE"}) -# -# result = dvc.plots.show() -# p1_content = json.loads(result[p1]) -# p2_content = json.loads(result[p2]) -# -# assert p1_content["title"] == p2_content["title"] == "TITLE" +def test_dir_plots(tmp_dir, dvc, run_copy_metrics): + subdir = tmp_dir / "subdir" + subdir.mkdir() + metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] -# TODO? -# def test_show_dir_plots(tmp_dir, dvc, run_copy_metrics): -# subdir = tmp_dir / "subdir" -# subdir.mkdir() -# metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] -# -# fname = "file.json" -# _write_json(tmp_dir, metric, fname) -# -# p1 = os.path.join("subdir", "p1.json") -# p2 = os.path.join("subdir", "p2.json") -# tmp_dir.dvc.run( -# cmd=( -# f"mkdir subdir && python copy.py {fname} {p1} && " -# f"python copy.py {fname} {p2}" -# ), -# deps=[fname], -# single_stage=False, -# plots=["subdir"], -# name="copy_double", -# ) -# -# result = dvc.plots.show(targets=["subdir"]) -# p1_content = json.loads(result[p1]) -# p2_content = json.loads(result[p2]) -# -# assert p1_content == p2_content -# -# result = dvc.plots.show(targets=[p1]) -# assert set(result.keys()) == {p1} -# -# remove(dvc.odb.local.cache_dir) -# remove(subdir) -# -# assert dvc.plots.show() == {} + fname = "file.json" + _write_json(tmp_dir, metric, fname) + + p1 = os.path.join("subdir", "p1.json") + p2 = os.path.join("subdir", "p2.json") + tmp_dir.dvc.run( + cmd=( + f"mkdir subdir && python copy.py {fname} {p1} && " + f"python copy.py {fname} {p2}" + ), + deps=[fname], + single_stage=False, + plots=["subdir"], + name="copy_double", + ) + props = {"title": "TITLE"} + dvc.plots.modify("subdir", {"title": "TITLE"}) + + result = dvc.plots.show() + assert set(result["workspace"]["data"]) == {p1, p2} + assert result["workspace"]["data"][p1]["props"] == props + assert result["workspace"]["data"][p2]["props"] == props def test_ignore_binary_file(tmp_dir, dvc, run_copy_metrics): @@ -647,17 +575,7 @@ def test_plots_binary(tmp_dir, scm, dvc, run_copy_metrics, custom_template): with open("image.jpg", "wb") as fd: fd.write(b"content") - metric = [{"val": 2}, {"val": 3}] - _write_csv(metric, "metric_t.csv") - - dvc.add(["image.jpg", "metric_t.csv"]) - run_copy_metrics( - "metric_t.csv", - "metric.csv", - plots=["metric.csv"], - name="s1", - single_stage=False, - ) + dvc.add(["image.jpg"]) run_copy_metrics( "image.jpg", "plot.jpg", @@ -666,9 +584,7 @@ def test_plots_binary(tmp_dir, scm, dvc, run_copy_metrics, custom_template): name="s2", single_stage=False, ) - dvc.plots.modify( - "metric.csv", props={"template": relpath(custom_template)} - ) + scm.add(["dvc.yaml", "dvc.lock"]) scm.commit("initial") @@ -677,7 +593,6 @@ def test_plots_binary(tmp_dir, scm, dvc, run_copy_metrics, custom_template): with open("plot.jpg", "wb") as fd: fd.write(b"content2") - _write_csv([{"val": 3}, {"val": 4}], "metric.csv") - - # dvc.plots.show(revs=["v1", "workspace"]) - main(["plots", "diff", "v1"]) + result = dvc.plots.show(revs=["v1", "workspace"]) + assert result["v1"]["data"]["plot.jpg"]["data"] == b"content" + assert result["workspace"]["data"]["plot.jpg"]["data"] == b"content2" diff --git a/tests/unit/repo/plots/test_data.py b/tests/unit/repo/plots/test_data.py index 806e0083b6..a22e046913 100644 --- a/tests/unit/repo/plots/test_data.py +++ b/tests/unit/repo/plots/test_data.py @@ -5,8 +5,6 @@ from dvc.repo.plots.data import _lists, to_datapoints -# TODO do we need those tests? - @pytest.mark.parametrize( "dictionary, expected_result", From f7dde5abd3ad4ff13c7c9358136c41037a01c774 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 18 Aug 2021 23:14:50 +0200 Subject: [PATCH 06/16] plots: self implement grouping --- dvc/repo/plots/render.py | 29 ++++++++++++++--- tests/func/plots/test_show.py | 3 -- tests/unit/repo/plots/test_data.py | 51 ++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 7 deletions(-) diff --git a/dvc/repo/plots/render.py b/dvc/repo/plots/render.py index 04af3e6478..433a256d55 100644 --- a/dvc/repo/plots/render.py +++ b/dvc/repo/plots/render.py @@ -2,7 +2,7 @@ import os.path from typing import TYPE_CHECKING, Dict, List, Optional, Set -import dpath +import dpath.util from funcy import first from dvc.repo.plots.data import INDEX_FIELD, REVISION_FIELD, to_datapoints @@ -23,13 +23,34 @@ def get_files(data: Dict) -> Set: return files +# def group(data: Dict) -> List[Dict]: +# files = get_files(data) +# grouped = [] +# for file in files: +# found = dpath.util.search(data, ["*", "*", file]) +# if found: +# grouped.append(found) +# return grouped + + def group(data: Dict) -> List[Dict]: + # TODO use dpath.util.search once + # https://github.com/dpath-maintainers/dpath-python/issues/147 is released + # now we cannot search when errors are present in data files = get_files(data) grouped = [] + for file in files: - found = dpath.util.search(data, ["*", "*", file]) - if found: - grouped.append(found) + tmp: Dict = {} + for revision, revision_data in data.items(): + if file in revision_data.get("data", {}): + if "data" not in tmp: + tmp[revision] = {"data": {}} + tmp[revision]["data"].update( + {file: revision_data["data"][file]} + ) + grouped.append(tmp) + return grouped diff --git a/tests/func/plots/test_show.py b/tests/func/plots/test_show.py index 230342b597..b508859e01 100644 --- a/tests/func/plots/test_show.py +++ b/tests/func/plots/test_show.py @@ -34,9 +34,7 @@ # RENDER def test_plot_csv_one_column(tmp_dir, scm, dvc, run_copy_metrics): - # no header props = { - "header": False, "x_label": "x_title", "y_label": "y_title", "title": "mytitle", @@ -231,7 +229,6 @@ def test_plot_multiple_revs_default(tmp_dir, scm, dvc, run_copy_metrics): assert plot_content["encoding"]["y"]["field"] == "y" -# TODO add tests for grouping def test_plot_even_if_metric_missing( tmp_dir, scm, dvc, caplog, run_copy_metrics ): diff --git a/tests/unit/repo/plots/test_data.py b/tests/unit/repo/plots/test_data.py index a22e046913..277e8c42f8 100644 --- a/tests/unit/repo/plots/test_data.py +++ b/tests/unit/repo/plots/test_data.py @@ -4,6 +4,7 @@ import pytest from dvc.repo.plots.data import _lists, to_datapoints +from dvc.repo.plots.render import group @pytest.mark.parametrize( @@ -40,3 +41,53 @@ def points_with(datapoints: List, additional_info: Dict): assert list( map(dict, to_datapoints(dmetric, "revision", "file", fields={"x"})) ) == points_with(m2, {"rev": "revision"}) + + +def test_group_plots_data(): + error = FileNotFoundError() + data = { + "v2": { + "data": { + "file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}, + "other_file.jpg": {"data": "content"}, + } + }, + "v1": { + "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} + }, + "workspace": { + "data": { + "file.json": {"error": error, "props": {}}, + "other_file.jpg": {"data": "content2"}, + } + }, + } + + results = group(data) + assert { + "v2": { + "data": { + "file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}, + } + }, + "v1": { + "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} + }, + "workspace": { + "data": { + "file.json": {"error": error, "props": {}}, + } + }, + } in results + assert { + "v2": { + "data": { + "other_file.jpg": {"data": "content"}, + } + }, + "workspace": { + "data": { + "other_file.jpg": {"data": "content2"}, + } + }, + } in results From 7827fce67dc0265c9678e6f0132f7626d7f5d1c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 18 Aug 2021 23:36:30 +0200 Subject: [PATCH 07/16] plots: rename group --- dvc/repo/plots/render.py | 16 +++------------- tests/unit/repo/plots/test_data.py | 4 ++-- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/dvc/repo/plots/render.py b/dvc/repo/plots/render.py index 433a256d55..578a3a3cb5 100644 --- a/dvc/repo/plots/render.py +++ b/dvc/repo/plots/render.py @@ -23,20 +23,10 @@ def get_files(data: Dict) -> Set: return files -# def group(data: Dict) -> List[Dict]: -# files = get_files(data) -# grouped = [] -# for file in files: -# found = dpath.util.search(data, ["*", "*", file]) -# if found: -# grouped.append(found) -# return grouped - - -def group(data: Dict) -> List[Dict]: +def group_by_filename(data: Dict) -> List[Dict]: # TODO use dpath.util.search once # https://github.com/dpath-maintainers/dpath-python/issues/147 is released - # now we cannot search when errors are present in data + # now cannot search when errors are present in data files = get_files(data) grouped = [] @@ -63,7 +53,7 @@ def find_vega(repo, plots_data, target): def match_renderers(plots_data, templates): renderers = [] - for g in group(plots_data): + for g in group_by_filename(plots_data): if VegaRenderer.matches(g): renderers.append(VegaRenderer(g, templates)) if ImageRenderer.matches(g): diff --git a/tests/unit/repo/plots/test_data.py b/tests/unit/repo/plots/test_data.py index 277e8c42f8..e7ba000e37 100644 --- a/tests/unit/repo/plots/test_data.py +++ b/tests/unit/repo/plots/test_data.py @@ -4,7 +4,7 @@ import pytest from dvc.repo.plots.data import _lists, to_datapoints -from dvc.repo.plots.render import group +from dvc.repo.plots.render import group_by_filename @pytest.mark.parametrize( @@ -63,7 +63,7 @@ def test_group_plots_data(): }, } - results = group(data) + results = group_by_filename(data) assert { "v2": { "data": { From a58ebf53c8f52b92b385ba8f96bc1de9938ddd2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 18 Aug 2021 23:50:28 +0200 Subject: [PATCH 08/16] plots: extract rendering tests --- tests/func/plots/render/__init__.py | 0 tests/func/plots/render/test_vega.py | 289 +++++++++++++++++++++++ tests/func/plots/test_show.py | 334 +-------------------------- 3 files changed, 290 insertions(+), 333 deletions(-) create mode 100644 tests/func/plots/render/__init__.py create mode 100644 tests/func/plots/render/test_vega.py diff --git a/tests/func/plots/render/__init__.py b/tests/func/plots/render/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/func/plots/render/test_vega.py b/tests/func/plots/render/test_vega.py new file mode 100644 index 0000000000..68ba53084a --- /dev/null +++ b/tests/func/plots/render/test_vega.py @@ -0,0 +1,289 @@ +import json +import os +from collections import OrderedDict + +import pytest + +from dvc.repo.plots.data import INDEX_FIELD, REVISION_FIELD +from dvc.repo.plots.render import VegaRenderer +from dvc.repo.plots.template import ( + BadTemplateError, + NoFieldInDataError, + TemplateNotFoundError, +) + + +def test_one_column(tmp_dir, scm, dvc, run_copy_metrics): + props = { + "x_label": "x_title", + "y_label": "y_title", + "title": "mytitle", + } + data = { + "workspace": { + "data": { + "file.json": {"data": [{"val": 2}, {"val": 3}], "props": props} + } + } + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["title"] == "mytitle" + assert plot_content["data"]["values"] == [ + {"val": 2, INDEX_FIELD: 0, REVISION_FIELD: "workspace"}, + {"val": 3, INDEX_FIELD: 1, REVISION_FIELD: "workspace"}, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "val" + assert plot_content["encoding"]["x"]["title"] == "x_title" + assert plot_content["encoding"]["y"]["title"] == "y_title" + + +def test_multiple_columns(tmp_dir, scm, dvc, run_copy_metrics): + metric = [ + OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), + OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), + ] + + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": {}}}} + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + { + "val": 2, + INDEX_FIELD: 0, + REVISION_FIELD: "workspace", + "first_val": 100, + "second_val": 100, + }, + { + "val": 3, + INDEX_FIELD: 1, + REVISION_FIELD: "workspace", + "first_val": 200, + "second_val": 300, + }, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "val" + + +def test_choose_axes(tmp_dir, scm, dvc, run_copy_metrics): + metric = [ + OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), + OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), + ] + + props = {"x": "first_val", "y": "second_val"} + + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + { + "val": 2, + REVISION_FIELD: "workspace", + "first_val": 100, + "second_val": 100, + }, + { + "val": 3, + REVISION_FIELD: "workspace", + "first_val": 200, + "second_val": 300, + }, + ] + assert plot_content["encoding"]["x"]["field"] == "first_val" + assert plot_content["encoding"]["y"]["field"] == "second_val" + + +def test_confusion(tmp_dir, dvc, run_copy_metrics): + confusion_matrix = [ + {"predicted": "B", "actual": "A"}, + {"predicted": "A", "actual": "A"}, + ] + props = {"template": "confusion", "x": "predicted", "y": "actual"} + + data = { + "workspace": { + "data": {"file.json": {"data": confusion_matrix, "props": props}} + } + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + {"predicted": "B", "actual": "A", REVISION_FIELD: "workspace"}, + {"predicted": "A", "actual": "A", REVISION_FIELD: "workspace"}, + ] + assert plot_content["spec"]["transform"][0]["groupby"] == [ + "actual", + "predicted", + ] + assert plot_content["spec"]["encoding"]["x"]["field"] == "predicted" + assert plot_content["spec"]["encoding"]["y"]["field"] == "actual" + + +def test_multiple_revs_default(tmp_dir, scm, dvc, run_copy_metrics): + metric_1 = [{"y": 2}, {"y": 3}] + metric_2 = [{"y": 3}, {"y": 5}] + metric_3 = [{"y": 5}, {"y": 6}] + + data = { + "HEAD": { + "data": { + "file.json": {"data": metric_3, "props": {"fields": {"y"}}} + } + }, + "v2": { + "data": { + "file.json": {"data": metric_2, "props": {"fields": {"y"}}} + } + }, + "v1": { + "data": { + "file.json": {"data": metric_1, "props": {"fields": {"y"}}} + } + }, + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + {"y": 5, INDEX_FIELD: 0, REVISION_FIELD: "HEAD"}, + {"y": 6, INDEX_FIELD: 1, REVISION_FIELD: "HEAD"}, + {"y": 3, INDEX_FIELD: 0, REVISION_FIELD: "v2"}, + {"y": 5, INDEX_FIELD: 1, REVISION_FIELD: "v2"}, + {"y": 2, INDEX_FIELD: 0, REVISION_FIELD: "v1"}, + {"y": 3, INDEX_FIELD: 1, REVISION_FIELD: "v1"}, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "y" + + +def test_metric_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): + + metric = [{"y": 2}, {"y": 3}] + data = { + "v2": {"data": {"file.json": {"data": metric, "props": {}}}}, + "workspace": { + "data": {"file.json": {"error": FileNotFoundError(), "props": {}}} + }, + } + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + {"y": 2, INDEX_FIELD: 0, REVISION_FIELD: "v2"}, + {"y": 3, INDEX_FIELD: 1, REVISION_FIELD: "v2"}, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "y" + + +def test_custom_template(tmp_dir, scm, dvc, custom_template): + metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] + props = {"template": os.fspath(custom_template), "x": "a", "y": "b"} + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + {"a": 1, "b": 2, REVISION_FIELD: "workspace"}, + {"a": 2, "b": 3, REVISION_FIELD: "workspace"}, + ] + assert plot_content["encoding"]["x"]["field"] == "a" + assert plot_content["encoding"]["y"]["field"] == "b" + + +def test_raise_on_no_template(tmp_dir, dvc, run_copy_metrics): + metric = [{"val": 2}, {"val": 3}] + props = {"template": "non_existing_template.json"} + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + + with pytest.raises(TemplateNotFoundError): + VegaRenderer(data, dvc.plots.templates).get_vega() + + +def test_bad_template(tmp_dir, dvc): + metric = [{"val": 2}, {"val": 3}] + tmp_dir.gen("template.json", json.dumps({"a": "b", "c": "d"})) + props = {"template": "template.json"} + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + + with pytest.raises(BadTemplateError): + VegaRenderer(data, dvc.plots.templates).get_vega() + + +def test_plot_choose_columns( + tmp_dir, scm, dvc, custom_template, run_copy_metrics +): + metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] + props = { + "template": os.fspath(custom_template), + "fields": {"b", "c"}, + "x": "b", + "y": "c", + } + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + {"b": 2, "c": 3, REVISION_FIELD: "workspace"}, + {"b": 3, "c": 4, REVISION_FIELD: "workspace"}, + ] + assert plot_content["encoding"]["x"]["field"] == "b" + assert plot_content["encoding"]["y"]["field"] == "c" + + +def test_plot_default_choose_column(tmp_dir, scm, dvc, run_copy_metrics): + metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] + data = { + "workspace": { + "data": {"file.json": {"data": metric, "props": {"fields": {"b"}}}} + } + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + plot_content = json.loads(plot_string) + + assert plot_content["data"]["values"] == [ + {INDEX_FIELD: 0, "b": 2, REVISION_FIELD: "workspace"}, + {INDEX_FIELD: 1, "b": 3, REVISION_FIELD: "workspace"}, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "b" + + +def test_raise_on_wrong_field(tmp_dir, scm, dvc, run_copy_metrics): + metric = [{"val": 2}, {"val": 3}] + data = { + "workspace": { + "data": {"file.json": {"data": metric, "props": {"x": "no_val"}}} + } + } + + with pytest.raises(NoFieldInDataError): + VegaRenderer(data, dvc.plots.templates).get_vega() diff --git a/tests/func/plots/test_show.py b/tests/func/plots/test_show.py index b508859e01..461687ab10 100644 --- a/tests/func/plots/test_show.py +++ b/tests/func/plots/test_show.py @@ -1,6 +1,4 @@ -import json import os -from collections import OrderedDict import pytest from funcy import get_in @@ -10,17 +8,7 @@ from dvc.main import main from dvc.path_info import PathInfo from dvc.repo import Repo -from dvc.repo.plots.data import ( - INDEX_FIELD, - REVISION_FIELD, - PlotMetricTypeError, -) -from dvc.repo.plots.render import VegaRenderer -from dvc.repo.plots.template import ( - BadTemplateError, - NoFieldInDataError, - TemplateNotFoundError, -) +from dvc.repo.plots.data import PlotMetricTypeError from dvc.utils import onerror_collect from dvc.utils.fs import remove from dvc.utils.serialize import ( @@ -32,225 +20,6 @@ from tests.func.plots.utils import _write_json -# RENDER -def test_plot_csv_one_column(tmp_dir, scm, dvc, run_copy_metrics): - props = { - "x_label": "x_title", - "y_label": "y_title", - "title": "mytitle", - } - data = { - "workspace": { - "data": { - "file.json": {"data": [{"val": 2}, {"val": 3}], "props": props} - } - } - } - - plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() - - plot_content = json.loads(plot_string) - assert plot_content["title"] == "mytitle" - assert plot_content["data"]["values"] == [ - {"val": 2, INDEX_FIELD: 0, REVISION_FIELD: "workspace"}, - {"val": 3, INDEX_FIELD: 1, REVISION_FIELD: "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "val" - assert plot_content["encoding"]["x"]["title"] == "x_title" - assert plot_content["encoding"]["y"]["title"] == "y_title" - - -def test_plot_csv_multiple_columns(tmp_dir, scm, dvc, run_copy_metrics): - metric = [ - OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), - OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), - ] - - data = { - "workspace": {"data": {"file.json": {"data": metric, "props": {}}}} - } - - plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - { - "val": 2, - INDEX_FIELD: 0, - REVISION_FIELD: "workspace", - "first_val": 100, - "second_val": 100, - }, - { - "val": 3, - INDEX_FIELD: 1, - REVISION_FIELD: "workspace", - "first_val": 200, - "second_val": 300, - }, - ] - assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "val" - - -def test_plot_csv_choose_axes(tmp_dir, scm, dvc, run_copy_metrics): - metric = [ - OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), - OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), - ] - - props = {"x": "first_val", "y": "second_val"} - - data = { - "workspace": {"data": {"file.json": {"data": metric, "props": props}}} - } - plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - { - "val": 2, - REVISION_FIELD: "workspace", - "first_val": 100, - "second_val": 100, - }, - { - "val": 3, - REVISION_FIELD: "workspace", - "first_val": 200, - "second_val": 300, - }, - ] - assert plot_content["encoding"]["x"]["field"] == "first_val" - assert plot_content["encoding"]["y"]["field"] == "second_val" - - -def test_plot_confusion(tmp_dir, dvc, run_copy_metrics): - confusion_matrix = [ - {"predicted": "B", "actual": "A"}, - {"predicted": "A", "actual": "A"}, - ] - props = {"template": "confusion", "x": "predicted", "y": "actual"} - - data = { - "workspace": { - "data": {"file.json": {"data": confusion_matrix, "props": props}} - } - } - - plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"predicted": "B", "actual": "A", REVISION_FIELD: "workspace"}, - {"predicted": "A", "actual": "A", REVISION_FIELD: "workspace"}, - ] - assert plot_content["spec"]["transform"][0]["groupby"] == [ - "actual", - "predicted", - ] - assert plot_content["spec"]["encoding"]["x"]["field"] == "predicted" - assert plot_content["spec"]["encoding"]["y"]["field"] == "actual" - - -def test_plot_confusion_normalized(tmp_dir, dvc, run_copy_metrics): - confusion_matrix = [ - {"predicted": "B", "actual": "A"}, - {"predicted": "A", "actual": "A"}, - ] - - props = { - "template": "confusion_normalized", - "x": "predicted", - "y": "actual", - } - - data = { - "workspace": { - "data": {"file.json": {"data": confusion_matrix, "props": props}} - } - } - - plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"predicted": "B", "actual": "A", REVISION_FIELD: "workspace"}, - {"predicted": "A", "actual": "A", REVISION_FIELD: "workspace"}, - ] - assert plot_content["spec"]["transform"][0]["groupby"] == [ - "actual", - "predicted", - ] - assert plot_content["spec"]["transform"][1]["groupby"] == [ - REVISION_FIELD, - "actual", - ] - assert plot_content["spec"]["encoding"]["x"]["field"] == "predicted" - assert plot_content["spec"]["encoding"]["y"]["field"] == "actual" - - -def test_plot_multiple_revs_default(tmp_dir, scm, dvc, run_copy_metrics): - metric_1 = [{"y": 2}, {"y": 3}] - metric_2 = [{"y": 3}, {"y": 5}] - metric_3 = [{"y": 5}, {"y": 6}] - - data = { - "HEAD": { - "data": { - "file.json": {"data": metric_3, "props": {"fields": {"y"}}} - } - }, - "v2": { - "data": { - "file.json": {"data": metric_2, "props": {"fields": {"y"}}} - } - }, - "v1": { - "data": { - "file.json": {"data": metric_1, "props": {"fields": {"y"}}} - } - }, - } - - plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"y": 5, INDEX_FIELD: 0, REVISION_FIELD: "HEAD"}, - {"y": 6, INDEX_FIELD: 1, REVISION_FIELD: "HEAD"}, - {"y": 3, INDEX_FIELD: 0, REVISION_FIELD: "v2"}, - {"y": 5, INDEX_FIELD: 1, REVISION_FIELD: "v2"}, - {"y": 2, INDEX_FIELD: 0, REVISION_FIELD: "v1"}, - {"y": 3, INDEX_FIELD: 1, REVISION_FIELD: "v1"}, - ] - assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "y" - - -def test_plot_even_if_metric_missing( - tmp_dir, scm, dvc, caplog, run_copy_metrics -): - - metric = [{"y": 2}, {"y": 3}] - data = { - "v2": {"data": {"file.json": {"data": metric, "props": {}}}}, - "workspace": { - "data": {"file.json": {"error": FileNotFoundError(), "props": {}}} - }, - } - plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"y": 2, INDEX_FIELD: 0, REVISION_FIELD: "v2"}, - {"y": 3, INDEX_FIELD: 1, REVISION_FIELD: "v2"}, - ] - assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "y" - - def test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): metric1 = [{"y": 2}, {"y": 3}] _write_json(tmp_dir, metric1, "metric_t.json") @@ -282,47 +51,6 @@ def test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): ) -def test_custom_template(tmp_dir, scm, dvc, custom_template): - metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] - props = {"template": os.fspath(custom_template), "x": "a", "y": "b"} - data = { - "workspace": {"data": {"file.json": {"data": metric, "props": props}}} - } - - plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"a": 1, "b": 2, REVISION_FIELD: "workspace"}, - {"a": 2, "b": 3, REVISION_FIELD: "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == "a" - assert plot_content["encoding"]["y"]["field"] == "b" - - -def test_should_raise_on_no_template(tmp_dir, dvc, run_copy_metrics): - metric = [{"val": 2}, {"val": 3}] - props = {"template": "non_existing_template.json"} - data = { - "workspace": {"data": {"file.json": {"data": metric, "props": props}}} - } - - with pytest.raises(TemplateNotFoundError): - VegaRenderer(data, dvc.plots.templates).get_vega() - - -def test_bad_template(tmp_dir, dvc): - metric = [{"val": 2}, {"val": 3}] - tmp_dir.gen("template.json", json.dumps({"a": "b", "c": "d"})) - props = {"template": "template.json"} - data = { - "workspace": {"data": {"file.json": {"data": metric, "props": props}}} - } - - with pytest.raises(BadTemplateError): - VegaRenderer(data, dvc.plots.templates).get_vega() - - def test_plot_wrong_metric_type(tmp_dir, scm, dvc, run_copy_metrics): tmp_dir.gen("metric_t.txt", "some text") run_copy_metrics( @@ -340,66 +68,6 @@ def test_plot_wrong_metric_type(tmp_dir, scm, dvc, run_copy_metrics): ) -def test_plot_choose_columns( - tmp_dir, scm, dvc, custom_template, run_copy_metrics -): - metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] - props = { - "template": os.fspath(custom_template), - "fields": {"b", "c"}, - "x": "b", - "y": "c", - } - data = { - "workspace": {"data": {"file.json": {"data": metric, "props": props}}} - } - - plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"b": 2, "c": 3, REVISION_FIELD: "workspace"}, - {"b": 3, "c": 4, REVISION_FIELD: "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == "b" - assert plot_content["encoding"]["y"]["field"] == "c" - - -# TODO ?? -def test_plot_default_choose_column(tmp_dir, scm, dvc, run_copy_metrics): - metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] - data = { - "workspace": { - "data": {"file.json": {"data": metric, "props": {"fields": {"b"}}}} - } - } - - plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() - plot_content = json.loads(plot_string) - - assert plot_content["data"]["values"] == [ - {INDEX_FIELD: 0, "b": 2, REVISION_FIELD: "workspace"}, - {INDEX_FIELD: 1, "b": 3, REVISION_FIELD: "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "b" - - -# - - -def test_raise_on_wrong_field(tmp_dir, scm, dvc, run_copy_metrics): - metric = [{"val": 2}, {"val": 3}] - data = { - "workspace": { - "data": {"file.json": {"data": metric, "props": {"x": "no_val"}}} - } - } - - with pytest.raises(NoFieldInDataError): - VegaRenderer(data, dvc.plots.templates).get_vega() - - @pytest.mark.parametrize("use_dvc", [True, False]) def test_show_non_plot(tmp_dir, scm, use_dvc): metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] From 3db09b98c186f274e02379e92de7e1dd300b802a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Thu, 19 Aug 2021 12:25:50 +0200 Subject: [PATCH 09/16] plots: move rendering tests to unit --- tests/conftest.py | 9 +++ tests/func/plots/conftest.py | 10 --- tests/func/plots/test_diff.py | 66 ++++++++----------- tests/unit/command/test_plots.py | 50 ++++++++++++-- .../repo}/plots/render/__init__.py | 0 tests/unit/repo/plots/render/test_image.py | 25 +++++++ tests/unit/repo/plots/render/test_render.py | 0 .../repo}/plots/render/test_vega.py | 44 +++++++++---- 8 files changed, 137 insertions(+), 67 deletions(-) delete mode 100644 tests/func/plots/conftest.py rename tests/{func => unit/repo}/plots/render/__init__.py (100%) create mode 100644 tests/unit/repo/plots/render/test_image.py create mode 100644 tests/unit/repo/plots/render/test_render.py rename tests/{func => unit/repo}/plots/render/test_vega.py (89%) diff --git a/tests/conftest.py b/tests/conftest.py index cc160688f6..32fce78614 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -155,3 +155,12 @@ def pytest_configure(config): enabled_remotes.discard(remote_name) if enabled: enabled_remotes.add(remote_name) + + +@pytest.fixture() +def custom_template(tmp_dir, dvc): + import shutil + + template = tmp_dir / "custom_template.json" + shutil.copy(tmp_dir / ".dvc" / "plots" / "default.json", template) + return template diff --git a/tests/func/plots/conftest.py b/tests/func/plots/conftest.py deleted file mode 100644 index f9d7d96a34..0000000000 --- a/tests/func/plots/conftest.py +++ /dev/null @@ -1,10 +0,0 @@ -import shutil - -import pytest - - -@pytest.fixture() -def custom_template(tmp_dir, dvc): - template = tmp_dir / "custom_template.json" - shutil.copy(tmp_dir / ".dvc" / "plots" / "default.json", template) - return template diff --git a/tests/func/plots/test_diff.py b/tests/func/plots/test_diff.py index c8b62b70cb..3d20410e64 100644 --- a/tests/func/plots/test_diff.py +++ b/tests/func/plots/test_diff.py @@ -1,16 +1,8 @@ -import json - -import pytest - -from dvc.repo.plots.data import INDEX_FIELD, REVISION_FIELD from tests.func.plots.utils import _write_json -@pytest.mark.skip def test_diff_dirty(tmp_dir, scm, dvc, run_copy_metrics): - - metric_1 = [{"y": 2}, {"y": 3}] - _write_json(tmp_dir, metric_1, "metric_t.json") + _write_json(tmp_dir, [{"y": 2}, {"y": 3}], "metric_t.json") run_copy_metrics( "metric_t.json", "metric.json", @@ -18,8 +10,8 @@ def test_diff_dirty(tmp_dir, scm, dvc, run_copy_metrics): commit="init", ) - metric_2 = [{"y": 3}, {"y": 5}] - _write_json(tmp_dir, metric_2, "metric_t.json") + metric_head = [{"y": 3}, {"y": 5}] + _write_json(tmp_dir, metric_head, "metric_t.json") run_copy_metrics( "metric_t.json", "metric.json", @@ -27,34 +19,30 @@ def test_diff_dirty(tmp_dir, scm, dvc, run_copy_metrics): commit="second", ) - metric_3 = [{"y": 5}, {"y": 6}] - _write_json(tmp_dir, metric_3, "metric_t.json") + metric_1 = [{"y": 5}, {"y": 6}] + _write_json(tmp_dir, metric_1, "metric_t.json") run_copy_metrics( "metric_t.json", "metric.json", plots_no_cache=["metric.json"] ) - - plot_string = dvc.plots.diff(props={"fields": {"y"}})["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"y": 3, INDEX_FIELD: 0, REVISION_FIELD: "HEAD"}, - {"y": 5, INDEX_FIELD: 1, REVISION_FIELD: "HEAD"}, - {"y": 5, INDEX_FIELD: 0, REVISION_FIELD: "workspace"}, - {"y": 6, INDEX_FIELD: 1, REVISION_FIELD: "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "y" - - _write_json(tmp_dir, [{"y": 7}, {"y": 8}], "metric.json") - - plot_string = dvc.plots.diff(props={"fields": {"y"}})["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"y": 3, INDEX_FIELD: 0, REVISION_FIELD: "HEAD"}, - {"y": 5, INDEX_FIELD: 1, REVISION_FIELD: "HEAD"}, - {"y": 7, INDEX_FIELD: 0, REVISION_FIELD: "workspace"}, - {"y": 8, INDEX_FIELD: 1, REVISION_FIELD: "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "y" + props = {"fields": {"y"}} + diff_result = dvc.plots.diff(props=props) + assert diff_result == { + "workspace": { + "data": {"metric.json": {"data": metric_1, "props": props}} + }, + "HEAD": { + "data": {"metric.json": {"data": metric_head, "props": props}} + }, + } + metric_2 = [{"y": 7}, {"y": 8}] + _write_json(tmp_dir, metric_2, "metric.json") + + diff_result = dvc.plots.diff(props=props) + assert diff_result == { + "workspace": { + "data": {"metric.json": {"data": metric_2, "props": props}} + }, + "HEAD": { + "data": {"metric.json": {"data": metric_head, "props": props}} + }, + } diff --git a/tests/unit/command/test_plots.py b/tests/unit/command/test_plots.py index de9e5708a3..0387f90f71 100644 --- a/tests/unit/command/test_plots.py +++ b/tests/unit/command/test_plots.py @@ -5,7 +5,6 @@ from dvc.cli import parse_args from dvc.command.plots import CmdPlotsDiff, CmdPlotsShow -from dvc.path_info import PathInfo @pytest.fixture @@ -13,7 +12,8 @@ def plots_data(): yield { "revision": { "data": { - "plot.csv": {"data": [{"val": 1}, {"val": 2}], "props": {}} + "plot.csv": {"data": [{"val": 1}, {"val": 2}], "props": {}}, + "other.jpg": {"data": b"content"}, } } } @@ -51,6 +51,9 @@ def test_plots_diff(dvc, mocker, plots_data): cmd = cli_args.func(cli_args) m = mocker.patch("dvc.repo.plots.diff.diff", return_value=plots_data) + render_mock = mocker.patch( + "dvc.command.plots.render", return_value="html_path" + ) assert cmd.run() == 0 @@ -68,6 +71,7 @@ def test_plots_diff(dvc, mocker, plots_data): }, experiment=True, ) + render_mock.assert_not_called() def test_plots_show_vega(dvc, mocker, plots_data): @@ -92,6 +96,9 @@ def test_plots_show_vega(dvc, mocker, plots_data): "dvc.repo.plots.Plots.show", return_value=plots_data, ) + render_mock = mocker.patch( + "dvc.command.plots.render", return_value="html_path" + ) assert cmd.run() == 0 @@ -99,6 +106,7 @@ def test_plots_show_vega(dvc, mocker, plots_data): targets=["datafile"], props={"template": "template", "header": False}, ) + render_mock.assert_not_called() def test_plots_diff_vega(dvc, mocker, capsys, plots_data): @@ -118,11 +126,15 @@ def test_plots_diff_vega(dvc, mocker, capsys, plots_data): mocker.patch( "dvc.command.plots.find_vega", return_value="vega_json_content" ) + render_mock = mocker.patch( + "dvc.command.plots.render", return_value="html_path" + ) assert cmd.run() == 0 out, _ = capsys.readouterr() assert "vega_json_content" in out + render_mock.assert_not_called() def test_plots_diff_open(tmp_dir, dvc, mocker, capsys, plots_data): @@ -133,14 +145,14 @@ def test_plots_diff_open(tmp_dir, dvc, mocker, capsys, plots_data): cmd = cli_args.func(cli_args) mocker.patch("dvc.repo.plots.diff.diff", return_value=plots_data) - rel = PathInfo("dvc_plots") / "index.html" - absolute_path = tmp_dir / rel + index_path = tmp_dir / "dvc_plots" / "index.html" + mocker.patch("dvc.command.plots.render", return_value=index_path) assert cmd.run() == 0 - mocked_open.assert_called_once_with(absolute_path) + mocked_open.assert_called_once_with(index_path) out, _ = capsys.readouterr() - assert absolute_path.as_uri() in out + assert index_path.as_uri() in out def test_plots_diff_open_failed(tmp_dir, dvc, mocker, capsys, plots_data): @@ -194,3 +206,29 @@ def test_plots_path_is_quoted_and_resolved_properly( out, _ = capsys.readouterr() assert expected_url in out + + +@pytest.mark.parametrize( + "output", ("some_out", os.path.join("to", "subdir"), None) +) +def test_should_call_render(tmp_dir, mocker, capsys, plots_data, output): + cli_args = parse_args( + ["plots", "diff", "--targets", "plots.csv", "--out", output] + ) + cmd = cli_args.func(cli_args) + mocker.patch("dvc.repo.plots.diff.diff", return_value=plots_data) + + output = output or "dvc_plots" + index_path = tmp_dir / output / "index.html" + render_mock = mocker.patch( + "dvc.command.plots.render", return_value=index_path + ) + + assert cmd.run() == 0 + + out, _ = capsys.readouterr() + assert index_path.as_uri() in out + + render_mock.assert_called_once_with( + cmd.repo, plots_data, path=tmp_dir / output, html_template_path=None + ) diff --git a/tests/func/plots/render/__init__.py b/tests/unit/repo/plots/render/__init__.py similarity index 100% rename from tests/func/plots/render/__init__.py rename to tests/unit/repo/plots/render/__init__.py diff --git a/tests/unit/repo/plots/render/test_image.py b/tests/unit/repo/plots/render/test_image.py new file mode 100644 index 0000000000..6c93db2b97 --- /dev/null +++ b/tests/unit/repo/plots/render/test_image.py @@ -0,0 +1,25 @@ +import pytest + +from dvc.repo.plots.render import ImageRenderer + + +@pytest.mark.parametrize( + "extension, matches", + ( + (".csv", False), + (".json", False), + (".tsv", False), + (".yaml", False), + (".jpg", True), + (".gif", True), + (".jpeg", True), + (".png", True), + ), +) +def test_matches(extension, matches): + filename = "file" + extension + data = { + "HEAD": {"data": {filename: {}}}, + "v1": {"data": {filename: {}}}, + } + assert ImageRenderer.matches(data) == matches diff --git a/tests/unit/repo/plots/render/test_render.py b/tests/unit/repo/plots/render/test_render.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/func/plots/render/test_vega.py b/tests/unit/repo/plots/render/test_vega.py similarity index 89% rename from tests/func/plots/render/test_vega.py rename to tests/unit/repo/plots/render/test_vega.py index 68ba53084a..c1e10f4041 100644 --- a/tests/func/plots/render/test_vega.py +++ b/tests/unit/repo/plots/render/test_vega.py @@ -13,7 +13,7 @@ ) -def test_one_column(tmp_dir, scm, dvc, run_copy_metrics): +def test_one_column(tmp_dir, scm, dvc): props = { "x_label": "x_title", "y_label": "y_title", @@ -41,7 +41,7 @@ def test_one_column(tmp_dir, scm, dvc, run_copy_metrics): assert plot_content["encoding"]["y"]["title"] == "y_title" -def test_multiple_columns(tmp_dir, scm, dvc, run_copy_metrics): +def test_multiple_columns(tmp_dir, scm, dvc): metric = [ OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), @@ -74,7 +74,7 @@ def test_multiple_columns(tmp_dir, scm, dvc, run_copy_metrics): assert plot_content["encoding"]["y"]["field"] == "val" -def test_choose_axes(tmp_dir, scm, dvc, run_copy_metrics): +def test_choose_axes(tmp_dir, scm, dvc): metric = [ OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), @@ -106,7 +106,7 @@ def test_choose_axes(tmp_dir, scm, dvc, run_copy_metrics): assert plot_content["encoding"]["y"]["field"] == "second_val" -def test_confusion(tmp_dir, dvc, run_copy_metrics): +def test_confusion(tmp_dir, dvc): confusion_matrix = [ {"predicted": "B", "actual": "A"}, {"predicted": "A", "actual": "A"}, @@ -134,7 +134,7 @@ def test_confusion(tmp_dir, dvc, run_copy_metrics): assert plot_content["spec"]["encoding"]["y"]["field"] == "actual" -def test_multiple_revs_default(tmp_dir, scm, dvc, run_copy_metrics): +def test_multiple_revs_default(tmp_dir, scm, dvc): metric_1 = [{"y": 2}, {"y": 3}] metric_2 = [{"y": 3}, {"y": 5}] metric_3 = [{"y": 5}, {"y": 6}] @@ -172,7 +172,7 @@ def test_multiple_revs_default(tmp_dir, scm, dvc, run_copy_metrics): assert plot_content["encoding"]["y"]["field"] == "y" -def test_metric_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): +def test_metric_missing(tmp_dir, scm, dvc, caplog): metric = [{"y": 2}, {"y": 3}] data = { @@ -210,7 +210,7 @@ def test_custom_template(tmp_dir, scm, dvc, custom_template): assert plot_content["encoding"]["y"]["field"] == "b" -def test_raise_on_no_template(tmp_dir, dvc, run_copy_metrics): +def test_raise_on_no_template(tmp_dir, dvc): metric = [{"val": 2}, {"val": 3}] props = {"template": "non_existing_template.json"} data = { @@ -233,9 +233,7 @@ def test_bad_template(tmp_dir, dvc): VegaRenderer(data, dvc.plots.templates).get_vega() -def test_plot_choose_columns( - tmp_dir, scm, dvc, custom_template, run_copy_metrics -): +def test_plot_choose_columns(tmp_dir, scm, dvc, custom_template): metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] props = { "template": os.fspath(custom_template), @@ -258,7 +256,7 @@ def test_plot_choose_columns( assert plot_content["encoding"]["y"]["field"] == "c" -def test_plot_default_choose_column(tmp_dir, scm, dvc, run_copy_metrics): +def test_plot_default_choose_column(tmp_dir, scm, dvc): metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] data = { "workspace": { @@ -277,7 +275,7 @@ def test_plot_default_choose_column(tmp_dir, scm, dvc, run_copy_metrics): assert plot_content["encoding"]["y"]["field"] == "b" -def test_raise_on_wrong_field(tmp_dir, scm, dvc, run_copy_metrics): +def test_raise_on_wrong_field(tmp_dir, scm, dvc): metric = [{"val": 2}, {"val": 3}] data = { "workspace": { @@ -287,3 +285,25 @@ def test_raise_on_wrong_field(tmp_dir, scm, dvc, run_copy_metrics): with pytest.raises(NoFieldInDataError): VegaRenderer(data, dvc.plots.templates).get_vega() + + +@pytest.mark.parametrize( + "extension, matches", + ( + (".csv", True), + (".json", True), + (".tsv", True), + (".yaml", True), + (".jpg", False), + (".gif", False), + (".jpeg", False), + (".png", False), + ), +) +def test_matches(extension, matches): + filename = "file" + extension + data = { + "HEAD": {"data": {filename: {}}}, + "v1": {"data": {filename: {}}}, + } + assert VegaRenderer.matches(data) == matches From ad828e3dc6caa35d6c82e41a68450c393ead673a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Thu, 19 Aug 2021 13:14:03 +0200 Subject: [PATCH 10/16] plots: add image rendering tests --- dvc/repo/collect.py | 1 - dvc/repo/plots/data.py | 1 + dvc/repo/plots/render.py | 62 +++++++++++++--------- tests/unit/repo/plots/render/test_image.py | 19 +++++++ 4 files changed, 57 insertions(+), 26 deletions(-) diff --git a/dvc/repo/collect.py b/dvc/repo/collect.py index f1a99b9528..3cba2a3caa 100644 --- a/dvc/repo/collect.py +++ b/dvc/repo/collect.py @@ -42,7 +42,6 @@ def _collect_paths( if recursive and fs.isdir(path_info): target_infos.extend(repo.dvcignore.walk_files(fs, path_info)) - # TODO should we check it at all? we will get FileNotFound in results if not fs.exists(path_info): if rev == "workspace" or rev == "": logger.warning( diff --git a/dvc/repo/plots/data.py b/dvc/repo/plots/data.py index 6c71944979..301cb3605b 100644 --- a/dvc/repo/plots/data.py +++ b/dvc/repo/plots/data.py @@ -89,6 +89,7 @@ def _append_revision(data_points, revision, **kwargs): def to_datapoints(data, revision, filename, **kwargs): + # TODO data assumes single file, but it does not have to be, assert? result = deepcopy(data) for processor in [ _find_data, diff --git a/dvc/repo/plots/render.py b/dvc/repo/plots/render.py index 578a3a3cb5..d960c53e68 100644 --- a/dvc/repo/plots/render.py +++ b/dvc/repo/plots/render.py @@ -86,16 +86,16 @@ def __init__(self, data: Dict): assert len(files) == 1 self.filename = files.pop() - def _convert(self, path): + def _convert(self, page_dir_path: "StrPath"): raise NotImplementedError @property def DIV(self): raise NotImplementedError - def generate_html(self, path): + def generate_html(self, page_dir_path: "StrPath"): """this method might edit content of path""" - partial = self._convert(path) + partial = self._convert(page_dir_path) div_id = f"plot_{self.filename.replace('.', '_').replace('/', '_')}" return self.DIV.format(id=div_id, partial=partial) @@ -159,7 +159,8 @@ def get_vega(self) -> Optional[str]: return template.render(datapoints, props=props) return None - def _convert(self, path): + # TODO naming? + def _convert(self, page_dir_path: "StrPath"): return self.get_vega() @staticmethod @@ -170,40 +171,51 @@ def matches(data): class ImageRenderer(Renderer): - def _image(self, revision, image_path): + DIV = """ +
+ {partial} +
""" + + def _write_image( + self, + page_dir_path: "StrPath", + revision: str, + filename: str, + image_data: bytes, + ): + static = os.path.join(page_dir_path, "static") + os.makedirs(static, exist_ok=True) + + img_path = os.path.join( + static, f"{revision}_{filename.replace('/', '_')}" + ) + rel_img_path = relpath(img_path, page_dir_path) + with open(img_path, "wb") as fd: + fd.write(image_data) return """

{title}

""".format( - title=revision, src=image_path + title=revision, src=rel_img_path ) - DIV = """ -
- {partial} -
""" - - def _convert(self, path: "StrPath"): - static = os.path.join(path, "static") - os.makedirs(static, exist_ok=True) + def _convert(self, page_dir_path: "StrPath"): div_content = [] for rev, rev_data in self.data.items(): if "data" in rev_data: for file, file_data in rev_data.get("data", {}).items(): if "data" in file_data: - img_save_path = os.path.join( - static, f"{rev}_{file.replace('/', '_')}" + div_content.append( + self._write_image( + page_dir_path, rev, file, file_data["data"] + ) ) - rel_img_path = relpath(img_save_path, path) - with open(img_save_path, "wb") as fd: - fd.write(file_data["data"]) - div_content.append(self._image(rev, rel_img_path)) if div_content: div_content.insert(0, f"

{self.filename}

") return "\n".join(div_content) diff --git a/tests/unit/repo/plots/render/test_image.py b/tests/unit/repo/plots/render/test_image.py index 6c93db2b97..e4a2a22c68 100644 --- a/tests/unit/repo/plots/render/test_image.py +++ b/tests/unit/repo/plots/render/test_image.py @@ -1,3 +1,5 @@ +import os + import pytest from dvc.repo.plots.render import ImageRenderer @@ -23,3 +25,20 @@ def test_matches(extension, matches): "v1": {"data": {filename: {}}}, } assert ImageRenderer.matches(data) == matches + + +def test_render(tmp_dir): + data = {"workspace": {"data": {"file.jpg": {"data": b"content"}}}} + + page_dir = os.path.join("some", "path") + html = ImageRenderer(data).generate_html(page_dir) + + assert (tmp_dir / page_dir).is_dir() + image_file = tmp_dir / page_dir / "static" / "workspace_file.jpg" + assert image_file.is_file() + + with open(image_file, "rb") as fobj: + assert fobj.read() == b"content" + + assert "

file.jpg

" in html + assert '' in html From c54f0440d7f41be71bb37301f644176ed4ea3bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Thu, 19 Aug 2021 15:06:32 +0200 Subject: [PATCH 11/16] plots: dont use dpath to find vega plot --- dvc/repo/plots/render.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/dvc/repo/plots/render.py b/dvc/repo/plots/render.py index d960c53e68..ab0924d7c4 100644 --- a/dvc/repo/plots/render.py +++ b/dvc/repo/plots/render.py @@ -2,7 +2,6 @@ import os.path from typing import TYPE_CHECKING, Dict, List, Optional, Set -import dpath.util from funcy import first from dvc.repo.plots.data import INDEX_FIELD, REVISION_FIELD, to_datapoints @@ -45,7 +44,17 @@ def group_by_filename(data: Dict) -> List[Dict]: def find_vega(repo, plots_data, target): - found = dpath.util.search(plots_data, ["*", "*", target]) + # TODO same as group_by_filename + grouped = group_by_filename(plots_data) + found = None + for plot_group in grouped: + files = get_files(plot_group) + assert len(files) == 1 + file = files.pop() + if file == target: + found = plot_group + break + if found and VegaRenderer.matches(found): return VegaRenderer(found, repo.plots.templates).get_vega() return "" From b845456e524f36906165ac2337371e791dcb4b69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Thu, 19 Aug 2021 15:25:00 +0200 Subject: [PATCH 12/16] plots: test find_vega --- dvc/command/plots.py | 3 +- tests/unit/repo/plots/render/test_vega.py | 47 ++++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/dvc/command/plots.py b/dvc/command/plots.py index 5774739a12..c74e44325d 100644 --- a/dvc/command/plots.py +++ b/dvc/command/plots.py @@ -51,7 +51,8 @@ def run(self): if self.args.show_vega: target = self.args.targets[0] plot_json = find_vega(self.repo, plots_data, target) - ui.write(plot_json) + if plot_json: + ui.write(plot_json) return 0 index_path = render( diff --git a/tests/unit/repo/plots/render/test_vega.py b/tests/unit/repo/plots/render/test_vega.py index c1e10f4041..4738ea696a 100644 --- a/tests/unit/repo/plots/render/test_vega.py +++ b/tests/unit/repo/plots/render/test_vega.py @@ -5,7 +5,7 @@ import pytest from dvc.repo.plots.data import INDEX_FIELD, REVISION_FIELD -from dvc.repo.plots.render import VegaRenderer +from dvc.repo.plots.render import VegaRenderer, find_vega from dvc.repo.plots.template import ( BadTemplateError, NoFieldInDataError, @@ -307,3 +307,48 @@ def test_matches(extension, matches): "v1": {"data": {filename: {}}}, } assert VegaRenderer.matches(data) == matches + + +def test_find_vega(tmp_dir, dvc): + data = { + "HEAD": { + "data": { + "file.json": { + "data": [{"y": 5}, {"y": 6}], + "props": {"fields": {"y"}}, + }, + "other_file.jpg": {"data": b"content"}, + } + }, + "v2": { + "data": { + "file.json": { + "data": [{"y": 3}, {"y": 5}], + "props": {"fields": {"y"}}, + }, + "other_file.jpg": {"data": b"content2"}, + } + }, + "v1": { + "data": { + "file.json": { + "data": [{"y": 2}, {"y": 3}], + "props": {"fields": {"y"}}, + }, + "another.gif": {"data": b"content2"}, + } + }, + } + + plot_content = json.loads(find_vega(dvc, data, "file.json")) + + assert plot_content["data"]["values"] == [ + {"y": 5, INDEX_FIELD: 0, REVISION_FIELD: "HEAD"}, + {"y": 6, INDEX_FIELD: 1, REVISION_FIELD: "HEAD"}, + {"y": 3, INDEX_FIELD: 0, REVISION_FIELD: "v2"}, + {"y": 5, INDEX_FIELD: 1, REVISION_FIELD: "v2"}, + {"y": 2, INDEX_FIELD: 0, REVISION_FIELD: "v1"}, + {"y": 3, INDEX_FIELD: 1, REVISION_FIELD: "v1"}, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "y" From a834397eeb31f8c46ab754fc15b10727f35419bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Thu, 19 Aug 2021 15:51:52 +0200 Subject: [PATCH 13/16] plots: test render function --- dvc/repo/plots/render.py | 2 +- tests/unit/repo/plots/render/test_render.py | 67 +++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/dvc/repo/plots/render.py b/dvc/repo/plots/render.py index ab0924d7c4..7cd0ebb5d7 100644 --- a/dvc/repo/plots/render.py +++ b/dvc/repo/plots/render.py @@ -201,7 +201,7 @@ def _write_image( os.makedirs(static, exist_ok=True) img_path = os.path.join( - static, f"{revision}_{filename.replace('/', '_')}" + static, f"{revision}_{filename.replace(os.sep, '_')}" ) rel_img_path = relpath(img_path, page_dir_path) with open(img_path, "wb") as fd: diff --git a/tests/unit/repo/plots/render/test_render.py b/tests/unit/repo/plots/render/test_render.py index e69de29bb2..443417fb4b 100644 --- a/tests/unit/repo/plots/render/test_render.py +++ b/tests/unit/repo/plots/render/test_render.py @@ -0,0 +1,67 @@ +import os + +from dvc.repo.plots.render import find_vega, render + + +def assert_website_has_image(page_path, revision, filename, image_content): + index_path = page_path / "index.html" + assert index_path.is_file() + index_content = index_path.read_text() + + resources_filename = f"{revision}_{filename.replace(os.sep, '_')}" + image_path = page_path / "static" / resources_filename + assert image_path.is_file() + + img_html = f'' + assert img_html in index_content + with open(image_path, "rb") as fobj: + assert fobj.read() == image_content + + +def test_render(tmp_dir, dvc): + data = { + "HEAD": { + "data": { + "file.json": { + "data": [{"y": 5}, {"y": 6}], + "props": {"fields": {"y"}}, + }, + os.path.join("sub", "other_file.jpg"): {"data": b"content"}, + } + }, + "v2": { + "data": { + "file.json": { + "data": [{"y": 3}, {"y": 5}], + "props": {"fields": {"y"}}, + }, + "other_file.jpg": {"data": b"content2"}, + } + }, + "v1": { + "data": { + "some.csv": { + "data": [{"y": 2}, {"y": 3}], + "props": {"fields": {"y"}}, + }, + "another.gif": {"data": b"content3"}, + } + }, + } + + render(dvc, data, path=os.path.join("results", "dir")) + page_path = tmp_dir / "results" / "dir" + index_path = page_path / "index.html" + + assert index_path.is_file() + assert_website_has_image( + page_path, "HEAD", os.path.join("sub", "other_file.jpg"), b"content" + ) + assert_website_has_image(page_path, "v2", "other_file.jpg", b"content2") + assert_website_has_image(page_path, "v1", "another.gif", b"content3") + + index_content = index_path.read_text() + file_vega = find_vega(dvc, data, "file.json") + some_vega = find_vega(dvc, data, "some.csv") + assert file_vega in index_content.strip() + assert some_vega in index_content.strip() From 6950f66413fda70d39fe3b724698dda71e5a2ab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Fri, 20 Aug 2021 15:53:10 +0200 Subject: [PATCH 14/16] plots: move rendering out of repo package --- dvc/command/live.py | 9 +- dvc/command/plots.py | 2 +- dvc/render/__init__.py | 32 +++ dvc/{utils => render}/html.py | 2 +- dvc/render/image.py | 67 +++++ dvc/render/utils.py | 80 ++++++ dvc/render/vega.py | 176 +++++++++++++ dvc/repo/live.py | 2 +- dvc/repo/plots/__init__.py | 25 +- dvc/repo/plots/data.py | 104 -------- dvc/repo/plots/render.py | 237 ------------------ tests/func/plots/test_show.py | 2 +- tests/func/test_live.py | 2 +- .../unit/{repo/plots => }/render/__init__.py | 0 .../{func/utils => unit/render}/test_html.py | 2 +- .../{repo/plots => }/render/test_image.py | 2 +- .../{repo/plots => }/render/test_render.py | 2 +- .../unit/{repo/plots => }/render/test_vega.py | 86 ++++++- tests/unit/repo/plots/test_data.py | 93 ------- tests/unit/test_plots.py | 2 +- 20 files changed, 455 insertions(+), 472 deletions(-) create mode 100644 dvc/render/__init__.py rename dvc/{utils => render}/html.py (98%) create mode 100644 dvc/render/image.py create mode 100644 dvc/render/utils.py create mode 100644 dvc/render/vega.py delete mode 100644 dvc/repo/plots/data.py delete mode 100644 dvc/repo/plots/render.py rename tests/unit/{repo/plots => }/render/__init__.py (100%) rename tests/{func/utils => unit/render}/test_html.py (93%) rename tests/unit/{repo/plots => }/render/test_image.py (95%) rename tests/unit/{repo/plots => }/render/test_render.py (97%) rename tests/unit/{repo/plots => }/render/test_vega.py (83%) delete mode 100644 tests/unit/repo/plots/test_data.py diff --git a/dvc/command/live.py b/dvc/command/live.py index 5995e1d934..f0488a93d2 100644 --- a/dvc/command/live.py +++ b/dvc/command/live.py @@ -1,5 +1,5 @@ import argparse -import os +from pathlib import Path from dvc.command import completion from dvc.command.base import CmdBase, fix_subparsers @@ -13,10 +13,11 @@ def _run(self, target, revs=None): metrics, plots = self.repo.live.show(target=target, revs=revs) if plots: - html_path = self.args.target + ".html" - self.repo.plots.write_html(html_path, plots, metrics) + html_path = Path.cwd() / (self.args.target + "_html") + from dvc.render.utils import render - ui.write("\nfile://", os.path.abspath(html_path), sep="") + index_path = render(self.repo, plots, metrics, html_path) + ui.write(index_path.as_uri()) return 0 return 1 diff --git a/dvc/command/plots.py b/dvc/command/plots.py index c74e44325d..8ce334ba53 100644 --- a/dvc/command/plots.py +++ b/dvc/command/plots.py @@ -4,7 +4,7 @@ from dvc.command import completion from dvc.command.base import CmdBase, append_doc_link, fix_subparsers from dvc.exceptions import DvcException -from dvc.repo.plots.render import find_vega, render +from dvc.render.utils import find_vega, render from dvc.ui import ui from dvc.utils import format_link diff --git a/dvc/render/__init__.py b/dvc/render/__init__.py new file mode 100644 index 0000000000..d9dcc5b531 --- /dev/null +++ b/dvc/render/__init__.py @@ -0,0 +1,32 @@ +import logging +from typing import TYPE_CHECKING, Dict + +if TYPE_CHECKING: + from dvc.types import StrPath + +logger = logging.getLogger(__name__) + + +class Renderer: + def __init__(self, data: Dict): + self.data = data + + # we assume comparison of same file between revisions + from dvc.render.utils import get_files + + files = get_files(self.data) + assert len(files) == 1 + self.filename = files.pop() + + def _convert(self, page_dir_path: "StrPath"): + raise NotImplementedError + + @property + def DIV(self): + raise NotImplementedError + + def generate_html(self, page_dir_path: "StrPath"): + """this method might edit content of path""" + partial = self._convert(page_dir_path) + div_id = f"plot_{self.filename.replace('.', '_').replace('/', '_')}" + return self.DIV.format(id=div_id, partial=partial) diff --git a/dvc/utils/html.py b/dvc/render/html.py similarity index 98% rename from dvc/utils/html.py rename to dvc/render/html.py index 0c7f915e46..ec3cc5f5bb 100644 --- a/dvc/utils/html.py +++ b/dvc/render/html.py @@ -5,7 +5,7 @@ from dvc.exceptions import DvcException if TYPE_CHECKING: - from dvc.repo.plots.render import Renderer + from dvc.render import Renderer from dvc.types import StrPath PAGE_HTML = """ diff --git a/dvc/render/image.py b/dvc/render/image.py new file mode 100644 index 0000000000..dfd3ff74b0 --- /dev/null +++ b/dvc/render/image.py @@ -0,0 +1,67 @@ +import os +from typing import TYPE_CHECKING + +from dvc.render import Renderer +from dvc.render.utils import get_files +from dvc.utils import relpath + +if TYPE_CHECKING: + from dvc.types import StrPath + + +class ImageRenderer(Renderer): + DIV = """ +
+ {partial} +
""" + + def _write_image( + self, + page_dir_path: "StrPath", + revision: str, + filename: str, + image_data: bytes, + ): + static = os.path.join(page_dir_path, "static") + os.makedirs(static, exist_ok=True) + + img_path = os.path.join( + static, f"{revision}_{filename.replace(os.sep, '_')}" + ) + rel_img_path = relpath(img_path, page_dir_path) + with open(img_path, "wb") as fd: + fd.write(image_data) + return """ +
+

{title}

+ +
""".format( + title=revision, src=rel_img_path + ) + + def _convert(self, page_dir_path: "StrPath"): + div_content = [] + for rev, rev_data in self.data.items(): + if "data" in rev_data: + for file, file_data in rev_data.get("data", {}).items(): + if "data" in file_data: + div_content.append( + self._write_image( + page_dir_path, rev, file, file_data["data"] + ) + ) + if div_content: + div_content.insert(0, f"

{self.filename}

") + return "\n".join(div_content) + return "" + + @staticmethod + def matches(data): + files = get_files(data) + extensions = set(map(lambda f: os.path.splitext(f)[1], files)) + return extensions.issubset({".jpg", ".jpeg", ".gif", ".png"}) diff --git a/dvc/render/utils.py b/dvc/render/utils.py new file mode 100644 index 0000000000..b0c21e106a --- /dev/null +++ b/dvc/render/utils.py @@ -0,0 +1,80 @@ +import os.path +from typing import Dict, List, Set + + +def get_files(data: Dict) -> Set: + files = set() + for rev in data.keys(): + for file in data[rev].get("data", {}).keys(): + files.add(file) + return files + + +def group_by_filename(plots_data: Dict) -> List[Dict]: + # TODO use dpath.util.search once + # https://github.com/dpath-maintainers/dpath-python/issues/147 is released + # now cannot search when errors are present in data + files = get_files(plots_data) + grouped = [] + + for file in files: + tmp: Dict = {} + for revision, revision_data in plots_data.items(): + if file in revision_data.get("data", {}): + if "data" not in tmp: + tmp[revision] = {"data": {}} + tmp[revision]["data"].update( + {file: revision_data["data"][file]} + ) + grouped.append(tmp) + + return grouped + + +def find_vega(repo, plots_data, target): + # TODO same as group_by_filename + grouped = group_by_filename(plots_data) + found = None + for plot_group in grouped: + files = get_files(plot_group) + assert len(files) == 1 + file = files.pop() + if file == target: + found = plot_group + break + + from dvc.render.vega import VegaRenderer + + if found and VegaRenderer.matches(found): + return VegaRenderer(found, repo.plots.templates).get_vega() + return "" + + +def match_renderers(plots_data, templates): + renderers = [] + for g in group_by_filename(plots_data): + from dvc.render.vega import VegaRenderer + + if VegaRenderer.matches(g): + renderers.append(VegaRenderer(g, templates)) + from dvc.render.image import ImageRenderer + + if ImageRenderer.matches(g): + renderers.append(ImageRenderer(g)) + return renderers + + +def render(repo, plots_data, metrics=None, path=None, html_template_path=None): + renderers = match_renderers(plots_data, repo.plots.templates) + if not html_template_path: + html_template_path = repo.config.get("plots", {}).get( + "html_template", None + ) + if html_template_path and not os.path.isabs(html_template_path): + html_template_path = os.path.join(repo.dvc_dir, html_template_path) + + from dvc.render.html import write + + return write( + path, renderers, metrics=metrics, template_path=html_template_path + ) diff --git a/dvc/render/vega.py b/dvc/render/vega.py new file mode 100644 index 0000000000..0248d583e3 --- /dev/null +++ b/dvc/render/vega.py @@ -0,0 +1,176 @@ +import os +from copy import copy, deepcopy +from typing import TYPE_CHECKING, Dict, List, Optional, Union + +from funcy import first + +from dvc.exceptions import DvcException +from dvc.render import Renderer +from dvc.render.utils import get_files + +if TYPE_CHECKING: + from dvc.repo.plots.template import PlotTemplates + from dvc.types import StrPath + +REVISION_FIELD = "rev" +INDEX_FIELD = "step" + + +class PlotMetricTypeError(DvcException): + def __init__(self, file): + super().__init__( + "'{}' - file type error\n" + "Only JSON, YAML, CSV and TSV formats are supported.".format(file) + ) + + +class PlotDataStructureError(DvcException): + def __init__(self): + super().__init__( + "Plot data extraction failed. Please see " + "https://man.dvc.org/plot for supported data formats." + ) + + +def _filter_fields( + datapoints: List[Dict], filename, revision, fields=None +) -> List[Dict]: + if not fields: + return datapoints + assert isinstance(fields, set) + + new_data = [] + for data_point in datapoints: + new_dp = copy(data_point) + + keys = set(data_point.keys()) + if keys & fields != fields: + raise DvcException( + "Could not find fields: '{}' for '{}' at '{}'.".format( + ", ".join(fields), filename, revision + ) + ) + + to_del = keys - fields + for key in to_del: + del new_dp[key] + new_data.append(new_dp) + return new_data + + +def _lists(dictionary): + for _, value in dictionary.items(): + if isinstance(value, dict): + yield from _lists(value) + elif isinstance(value, list): + yield value + + +def _find_data(data: Union[Dict, List], fields=None) -> List[Dict]: + if not isinstance(data, dict): + return data + + if not fields: + # just look for first list of dicts + fields = set() + + for lst in _lists(data): + if ( + all(isinstance(dp, dict) for dp in lst) + and set(first(lst).keys()) & fields == fields + ): + return lst + raise PlotDataStructureError() + + +def _append_index(datapoints: List[Dict], append_index=False) -> List[Dict]: + if not append_index or INDEX_FIELD in first(datapoints).keys(): + return datapoints + + for index, data_point in enumerate(datapoints): + data_point[INDEX_FIELD] = index + return datapoints + + +def _append_revision(datapoints: List[Dict], revision) -> List[Dict]: + for data_point in datapoints: + data_point[REVISION_FIELD] = revision + return datapoints + + +class VegaRenderer(Renderer): + DIV = """ +
+ +
+ """ + + def __init__(self, data: Dict, templates: "PlotTemplates"): + super().__init__(data) + self.templates = templates + + def _squash_props(self) -> Dict: + resolved: Dict[str, str] = {} + for rev_data in self.data.values(): + for file_data in rev_data.get("data", {}).values(): + props = file_data.get("props", {}) + resolved = {**resolved, **props} + return resolved + + def _datapoints(self, props: Dict): + fields = props.get("fields", set()) + if fields: + fields = {*fields, props.get("x"), props.get("y")} - {None} + + datapoints = [] + for revision, rev_data in self.data.items(): + for filename, file_data in rev_data.get("data", {}).items(): + if "data" in file_data: + tmp = deepcopy(file_data.get("data")) + tmp = _find_data(tmp, fields=fields - {INDEX_FIELD}) + tmp = _append_index( + tmp, append_index=props.get("append_index", False) + ) + tmp = _filter_fields( + tmp, + filename=filename, + revision=revision, + fields=fields, + ) + tmp = _append_revision(tmp, revision=revision) + datapoints.extend(tmp) + return datapoints + + def get_vega(self) -> Optional[str]: + props = self._squash_props() + + template = self.templates.load(props.get("template") or "default") + + if not props.get("x") and template.has_anchor("x"): + props["append_index"] = True + props["x"] = INDEX_FIELD + + datapoints = self._datapoints(props) + + if datapoints: + if not props.get("y") and template.has_anchor("y"): + fields = list(first(datapoints)) + skip = (REVISION_FIELD, props.get("x")) + props["y"] = first( + f for f in reversed(fields) if f not in skip + ) + return template.render(datapoints, props=props) + return None + + # TODO naming? + def _convert(self, page_dir_path: "StrPath"): + return self.get_vega() + + @staticmethod + def matches(data): + files = get_files(data) + extensions = set(map(lambda f: os.path.splitext(f)[1], files)) + return extensions.issubset({".yml", ".yaml", ".json", ".csv", ".tsv"}) diff --git a/dvc/repo/live.py b/dvc/repo/live.py index f94f31dbac..df52279fec 100644 --- a/dvc/repo/live.py +++ b/dvc/repo/live.py @@ -2,7 +2,7 @@ import os from typing import TYPE_CHECKING, List, Optional -from dvc.repo.plots.render import render +from dvc.render.utils import render logger = logging.getLogger(__name__) diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index 9fa5e22924..cdb4dcac55 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -3,13 +3,12 @@ import logging import os from collections import OrderedDict -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional +from typing import TYPE_CHECKING, Callable, Dict, List, Optional from funcy import cached_property, first, project from dvc.exceptions import DvcException -from dvc.repo.plots.data import PlotMetricTypeError -from dvc.types import StrPath +from dvc.render.vega import PlotMetricTypeError from dvc.utils import ( error_handler, errored_revisions, @@ -192,26 +191,6 @@ def templates(self): return PlotTemplates(self.repo.dvc_dir) - def write_html( - self, - path: StrPath, - plots: List[Any], - metrics: Optional[Dict[str, Dict]] = None, - html_template_path: Optional[StrPath] = None, - ): - if not html_template_path: - html_template_path = self.repo.config.get("plots", {}).get( - "html_template", None - ) - if html_template_path and not os.path.isabs(html_template_path): - html_template_path = os.path.join( - self.repo.dvc_dir, html_template_path - ) - - from dvc.utils.html import write - - return write(path, plots, metrics, html_template_path) - def _is_plot(out: "Output") -> bool: return bool(out.plot) or bool(out.live) diff --git a/dvc/repo/plots/data.py b/dvc/repo/plots/data.py deleted file mode 100644 index 301cb3605b..0000000000 --- a/dvc/repo/plots/data.py +++ /dev/null @@ -1,104 +0,0 @@ -from copy import copy, deepcopy - -from funcy import first - -from dvc.exceptions import DvcException - -REVISION_FIELD = "rev" -INDEX_FIELD = "step" - - -class PlotMetricTypeError(DvcException): - def __init__(self, file): - super().__init__( - "'{}' - file type error\n" - "Only JSON, YAML, CSV and TSV formats are supported.".format(file) - ) - - -class PlotDataStructureError(DvcException): - def __init__(self): - super().__init__( - "Plot data extraction failed. Please see " - "https://man.dvc.org/plot for supported data formats." - ) - - -def _filter_fields(data_points, filename, revision, fields=None, **kwargs): - if not fields: - return data_points - assert isinstance(fields, set) - - new_data = [] - for data_point in data_points: - new_dp = copy(data_point) - - keys = set(data_point.keys()) - if keys & fields != fields: - raise DvcException( - "Could not find fields: '{}' for '{}' at '{}'.".format( - ", ".join(fields), filename, revision - ) - ) - - to_del = keys - fields - for key in to_del: - del new_dp[key] - new_data.append(new_dp) - return new_data - - -def _lists(dictionary): - for _, value in dictionary.items(): - if isinstance(value, dict): - yield from _lists(value) - elif isinstance(value, list): - yield value - - -def _find_data(data, fields=None, **kwargs): - if not isinstance(data, dict): - return data - - if not fields: - # just look for first list of dicts - fields = set() - - for lst in _lists(data): - if ( - all(isinstance(dp, dict) for dp in lst) - and set(first(lst).keys()) & fields == fields - ): - return lst - raise PlotDataStructureError() - - -def _append_index(data_points, append_index=False, **kwargs): - if not append_index or INDEX_FIELD in first(data_points).keys(): - return data_points - - for index, data_point in enumerate(data_points): - data_point[INDEX_FIELD] = index - return data_points - - -def _append_revision(data_points, revision, **kwargs): - for data_point in data_points: - data_point[REVISION_FIELD] = revision - return data_points - - -def to_datapoints(data, revision, filename, **kwargs): - # TODO data assumes single file, but it does not have to be, assert? - result = deepcopy(data) - for processor in [ - _find_data, - _filter_fields, - _append_index, - _append_revision, - ]: - result = processor( - result, revision=revision, filename=filename, **kwargs - ) - - return result diff --git a/dvc/repo/plots/render.py b/dvc/repo/plots/render.py deleted file mode 100644 index 7cd0ebb5d7..0000000000 --- a/dvc/repo/plots/render.py +++ /dev/null @@ -1,237 +0,0 @@ -import logging -import os.path -from typing import TYPE_CHECKING, Dict, List, Optional, Set - -from funcy import first - -from dvc.repo.plots.data import INDEX_FIELD, REVISION_FIELD, to_datapoints -from dvc.utils import relpath - -if TYPE_CHECKING: - from dvc.repo.plots.template import PlotTemplates - from dvc.types import StrPath - -logger = logging.getLogger(__name__) - - -def get_files(data: Dict) -> Set: - files = set() - for rev in data.keys(): - for file in data[rev].get("data", {}).keys(): - files.add(file) - return files - - -def group_by_filename(data: Dict) -> List[Dict]: - # TODO use dpath.util.search once - # https://github.com/dpath-maintainers/dpath-python/issues/147 is released - # now cannot search when errors are present in data - files = get_files(data) - grouped = [] - - for file in files: - tmp: Dict = {} - for revision, revision_data in data.items(): - if file in revision_data.get("data", {}): - if "data" not in tmp: - tmp[revision] = {"data": {}} - tmp[revision]["data"].update( - {file: revision_data["data"][file]} - ) - grouped.append(tmp) - - return grouped - - -def find_vega(repo, plots_data, target): - # TODO same as group_by_filename - grouped = group_by_filename(plots_data) - found = None - for plot_group in grouped: - files = get_files(plot_group) - assert len(files) == 1 - file = files.pop() - if file == target: - found = plot_group - break - - if found and VegaRenderer.matches(found): - return VegaRenderer(found, repo.plots.templates).get_vega() - return "" - - -def match_renderers(plots_data, templates): - renderers = [] - for g in group_by_filename(plots_data): - if VegaRenderer.matches(g): - renderers.append(VegaRenderer(g, templates)) - if ImageRenderer.matches(g): - renderers.append(ImageRenderer(g)) - return renderers - - -def render(repo, plots_data, metrics=None, path=None, html_template_path=None): - renderers = match_renderers(plots_data, repo.plots.templates) - if not html_template_path: - html_template_path = repo.config.get("plots", {}).get( - "html_template", None - ) - if html_template_path and not os.path.isabs(html_template_path): - html_template_path = os.path.join(repo.dvc_dir, html_template_path) - - from dvc.utils.html import write - - return write( - path, renderers, metrics=metrics, template_path=html_template_path - ) - - -class Renderer: - def __init__(self, data: Dict): - self.data = data - - # we assume comparison of same file between revisions - files = get_files(self.data) - assert len(files) == 1 - self.filename = files.pop() - - def _convert(self, page_dir_path: "StrPath"): - raise NotImplementedError - - @property - def DIV(self): - raise NotImplementedError - - def generate_html(self, page_dir_path: "StrPath"): - """this method might edit content of path""" - partial = self._convert(page_dir_path) - div_id = f"plot_{self.filename.replace('.', '_').replace('/', '_')}" - return self.DIV.format(id=div_id, partial=partial) - - -class VegaRenderer(Renderer): - DIV = """ -
- -
- """ - - def __init__(self, data: Dict, templates: "PlotTemplates"): - super().__init__(data) - self.templates = templates - - def _squash_props(self) -> Dict: - resolved: Dict[str, str] = {} - for rev_data in self.data.values(): - for file_data in rev_data.get("data", {}).values(): - props = file_data.get("props", {}) - resolved = {**resolved, **props} - return resolved - - def get_vega(self) -> Optional[str]: - props = self._squash_props() - - template = self.templates.load(props.get("template") or "default") - fields = props.get("fields") - if fields is not None: - fields = {*fields, props.get("x"), props.get("y")} - {None} - - if not props.get("x") and template.has_anchor("x"): - props["append_index"] = True - props["x"] = INDEX_FIELD - - datapoints = [] - for rev, rev_data in self.data.items(): - for file, file_data in rev_data.get("data", {}).items(): - if "data" in file_data: - datapoints.extend( - to_datapoints( - file_data.get("data", []), - revision=rev, - filename=file, - fields=fields, - path=props.get("path"), - append_index=props.get("append_index", False), - ) - ) - - if datapoints: - if not props.get("y") and template.has_anchor("y"): - fields = list(first(datapoints)) - skip = (REVISION_FIELD, props.get("x")) - props["y"] = first( - f for f in reversed(fields) if f not in skip - ) - return template.render(datapoints, props=props) - return None - - # TODO naming? - def _convert(self, page_dir_path: "StrPath"): - return self.get_vega() - - @staticmethod - def matches(data): - files = get_files(data) - extensions = set(map(lambda f: os.path.splitext(f)[1], files)) - return extensions.issubset({".yml", ".yaml", ".json", ".csv", ".tsv"}) - - -class ImageRenderer(Renderer): - DIV = """ -
- {partial} -
""" - - def _write_image( - self, - page_dir_path: "StrPath", - revision: str, - filename: str, - image_data: bytes, - ): - static = os.path.join(page_dir_path, "static") - os.makedirs(static, exist_ok=True) - - img_path = os.path.join( - static, f"{revision}_{filename.replace(os.sep, '_')}" - ) - rel_img_path = relpath(img_path, page_dir_path) - with open(img_path, "wb") as fd: - fd.write(image_data) - return """ -
-

{title}

- -
""".format( - title=revision, src=rel_img_path - ) - - def _convert(self, page_dir_path: "StrPath"): - div_content = [] - for rev, rev_data in self.data.items(): - if "data" in rev_data: - for file, file_data in rev_data.get("data", {}).items(): - if "data" in file_data: - div_content.append( - self._write_image( - page_dir_path, rev, file, file_data["data"] - ) - ) - if div_content: - div_content.insert(0, f"

{self.filename}

") - return "\n".join(div_content) - return "" - - @staticmethod - def matches(data): - files = get_files(data) - extensions = set(map(lambda f: os.path.splitext(f)[1], files)) - return extensions.issubset({".jpg", ".jpeg", ".gif", ".png"}) diff --git a/tests/func/plots/test_show.py b/tests/func/plots/test_show.py index 461687ab10..94e57049fe 100644 --- a/tests/func/plots/test_show.py +++ b/tests/func/plots/test_show.py @@ -7,8 +7,8 @@ from dvc.exceptions import OverlappingOutputPathsError from dvc.main import main from dvc.path_info import PathInfo +from dvc.render.vega import PlotMetricTypeError from dvc.repo import Repo -from dvc.repo.plots.data import PlotMetricTypeError from dvc.utils import onerror_collect from dvc.utils.fs import remove from dvc.utils.serialize import ( diff --git a/tests/func/test_live.py b/tests/func/test_live.py index cbd04e29c2..d259c11d42 100644 --- a/tests/func/test_live.py +++ b/tests/func/test_live.py @@ -6,7 +6,7 @@ from funcy import first from dvc import stage as stage_module -from dvc.repo.plots.render import get_files +from dvc.render.utils import get_files LIVE_SCRIPT = dedent( """ diff --git a/tests/unit/repo/plots/render/__init__.py b/tests/unit/render/__init__.py similarity index 100% rename from tests/unit/repo/plots/render/__init__.py rename to tests/unit/render/__init__.py diff --git a/tests/func/utils/test_html.py b/tests/unit/render/test_html.py similarity index 93% rename from tests/func/utils/test_html.py rename to tests/unit/render/test_html.py index 37d9266bc8..56ebe63232 100644 --- a/tests/func/utils/test_html.py +++ b/tests/unit/render/test_html.py @@ -1,6 +1,6 @@ import pytest -from dvc.utils.html import HTML, PAGE_HTML, MissingPlaceholderError +from dvc.render.html import HTML, PAGE_HTML, MissingPlaceholderError CUSTOM_PAGE_HTML = """ diff --git a/tests/unit/repo/plots/render/test_image.py b/tests/unit/render/test_image.py similarity index 95% rename from tests/unit/repo/plots/render/test_image.py rename to tests/unit/render/test_image.py index e4a2a22c68..ec2901af62 100644 --- a/tests/unit/repo/plots/render/test_image.py +++ b/tests/unit/render/test_image.py @@ -2,7 +2,7 @@ import pytest -from dvc.repo.plots.render import ImageRenderer +from dvc.render.image import ImageRenderer @pytest.mark.parametrize( diff --git a/tests/unit/repo/plots/render/test_render.py b/tests/unit/render/test_render.py similarity index 97% rename from tests/unit/repo/plots/render/test_render.py rename to tests/unit/render/test_render.py index 443417fb4b..55496b6f3e 100644 --- a/tests/unit/repo/plots/render/test_render.py +++ b/tests/unit/render/test_render.py @@ -1,6 +1,6 @@ import os -from dvc.repo.plots.render import find_vega, render +from dvc.render.utils import find_vega, render def assert_website_has_image(page_path, revision, filename, image_content): diff --git a/tests/unit/repo/plots/render/test_vega.py b/tests/unit/render/test_vega.py similarity index 83% rename from tests/unit/repo/plots/render/test_vega.py rename to tests/unit/render/test_vega.py index 4738ea696a..16700c5fd4 100644 --- a/tests/unit/repo/plots/render/test_vega.py +++ b/tests/unit/render/test_vega.py @@ -4,8 +4,14 @@ import pytest -from dvc.repo.plots.data import INDEX_FIELD, REVISION_FIELD -from dvc.repo.plots.render import VegaRenderer, find_vega +from dvc.render.utils import find_vega, group_by_filename +from dvc.render.vega import ( + INDEX_FIELD, + REVISION_FIELD, + VegaRenderer, + _find_data, + _lists, +) from dvc.repo.plots.template import ( BadTemplateError, NoFieldInDataError, @@ -13,6 +19,82 @@ ) +@pytest.mark.parametrize( + "dictionary, expected_result", + [ + ({}, []), + ({"x": ["a", "b", "c"]}, [["a", "b", "c"]]), + ( + OrderedDict([("x", {"y": ["a", "b"]}), ("z", {"w": ["c", "d"]})]), + [["a", "b"], ["c", "d"]], + ), + ], +) +def test_finding_lists(dictionary, expected_result): + result = _lists(dictionary) + + assert list(result) == expected_result + + +def test_find_data_in_dict(tmp_dir): + m1 = [{"accuracy": 1, "loss": 2}, {"accuracy": 3, "loss": 4}] + m2 = [{"x": 1}, {"x": 2}] + dmetric = OrderedDict([("t1", m1), ("t2", m2)]) + + assert _find_data(dmetric) == m1 + assert _find_data(dmetric, fields={"x"}) == m2 + + +def test_group_plots_data(): + error = FileNotFoundError() + data = { + "v2": { + "data": { + "file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}, + "other_file.jpg": {"data": "content"}, + } + }, + "v1": { + "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} + }, + "workspace": { + "data": { + "file.json": {"error": error, "props": {}}, + "other_file.jpg": {"data": "content2"}, + } + }, + } + + results = group_by_filename(data) + assert { + "v2": { + "data": { + "file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}, + } + }, + "v1": { + "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} + }, + "workspace": { + "data": { + "file.json": {"error": error, "props": {}}, + } + }, + } in results + assert { + "v2": { + "data": { + "other_file.jpg": {"data": "content"}, + } + }, + "workspace": { + "data": { + "other_file.jpg": {"data": "content2"}, + } + }, + } in results + + def test_one_column(tmp_dir, scm, dvc): props = { "x_label": "x_title", diff --git a/tests/unit/repo/plots/test_data.py b/tests/unit/repo/plots/test_data.py deleted file mode 100644 index e7ba000e37..0000000000 --- a/tests/unit/repo/plots/test_data.py +++ /dev/null @@ -1,93 +0,0 @@ -from collections import OrderedDict -from typing import Dict, List - -import pytest - -from dvc.repo.plots.data import _lists, to_datapoints -from dvc.repo.plots.render import group_by_filename - - -@pytest.mark.parametrize( - "dictionary, expected_result", - [ - ({}, []), - ({"x": ["a", "b", "c"]}, [["a", "b", "c"]]), - ( - OrderedDict([("x", {"y": ["a", "b"]}), ("z", {"w": ["c", "d"]})]), - [["a", "b"], ["c", "d"]], - ), - ], -) -def test_finding_lists(dictionary, expected_result): - result = _lists(dictionary) - - assert list(result) == expected_result - - -def test_find_data_in_dict(tmp_dir): - m1 = [{"accuracy": 1, "loss": 2}, {"accuracy": 3, "loss": 4}] - m2 = [{"x": 1}, {"x": 2}] - dmetric = OrderedDict([("t1", m1), ("t2", m2)]) - - def points_with(datapoints: List, additional_info: Dict): - for datapoint in datapoints: - datapoint.update(additional_info) - - return datapoints - - assert list( - map(dict, to_datapoints(dmetric, "revision", "file")) - ) == points_with(m1, {"rev": "revision"}) - assert list( - map(dict, to_datapoints(dmetric, "revision", "file", fields={"x"})) - ) == points_with(m2, {"rev": "revision"}) - - -def test_group_plots_data(): - error = FileNotFoundError() - data = { - "v2": { - "data": { - "file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}, - "other_file.jpg": {"data": "content"}, - } - }, - "v1": { - "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} - }, - "workspace": { - "data": { - "file.json": {"error": error, "props": {}}, - "other_file.jpg": {"data": "content2"}, - } - }, - } - - results = group_by_filename(data) - assert { - "v2": { - "data": { - "file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}, - } - }, - "v1": { - "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} - }, - "workspace": { - "data": { - "file.json": {"error": error, "props": {}}, - } - }, - } in results - assert { - "v2": { - "data": { - "other_file.jpg": {"data": "content"}, - } - }, - "workspace": { - "data": { - "other_file.jpg": {"data": "content2"}, - } - }, - } in results diff --git a/tests/unit/test_plots.py b/tests/unit/test_plots.py index 39ccc6d617..49ec95f58e 100644 --- a/tests/unit/test_plots.py +++ b/tests/unit/test_plots.py @@ -1,7 +1,7 @@ import json import os -from dvc.repo.plots.render import get_files +from dvc.render.utils import get_files def test_plots_order(tmp_dir, dvc): From a04558c07fc97de133c67213a57d102615866b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Fri, 20 Aug 2021 16:14:36 +0200 Subject: [PATCH 15/16] plots: refactoring --- dvc/render/__init__.py | 9 +++++---- dvc/render/image.py | 4 ++-- dvc/render/utils.py | 1 + dvc/render/vega.py | 3 +-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/dvc/render/__init__.py b/dvc/render/__init__.py index d9dcc5b531..30d26f3a70 100644 --- a/dvc/render/__init__.py +++ b/dvc/render/__init__.py @@ -11,22 +11,23 @@ class Renderer: def __init__(self, data: Dict): self.data = data - # we assume comparison of same file between revisions from dvc.render.utils import get_files files = get_files(self.data) + + # we assume comparison of same file between revisions for now assert len(files) == 1 self.filename = files.pop() - def _convert(self, page_dir_path: "StrPath"): + def _convert(self, path: "StrPath"): raise NotImplementedError @property def DIV(self): raise NotImplementedError - def generate_html(self, page_dir_path: "StrPath"): + def generate_html(self, path: "StrPath"): """this method might edit content of path""" - partial = self._convert(page_dir_path) + partial = self._convert(path) div_id = f"plot_{self.filename.replace('.', '_').replace('/', '_')}" return self.DIV.format(id=div_id, partial=partial) diff --git a/dvc/render/image.py b/dvc/render/image.py index dfd3ff74b0..88652bdadb 100644 --- a/dvc/render/image.py +++ b/dvc/render/image.py @@ -44,7 +44,7 @@ def _write_image( title=revision, src=rel_img_path ) - def _convert(self, page_dir_path: "StrPath"): + def _convert(self, path: "StrPath"): div_content = [] for rev, rev_data in self.data.items(): if "data" in rev_data: @@ -52,7 +52,7 @@ def _convert(self, page_dir_path: "StrPath"): if "data" in file_data: div_content.append( self._write_image( - page_dir_path, rev, file, file_data["data"] + path, rev, file, file_data["data"] ) ) if div_content: diff --git a/dvc/render/utils.py b/dvc/render/utils.py index b0c21e106a..62de8ed1f0 100644 --- a/dvc/render/utils.py +++ b/dvc/render/utils.py @@ -65,6 +65,7 @@ def match_renderers(plots_data, templates): def render(repo, plots_data, metrics=None, path=None, html_template_path=None): + # TODO we could probably remove repo usages (here and in VegaRenderer) renderers = match_renderers(plots_data, repo.plots.templates) if not html_template_path: html_template_path = repo.config.get("plots", {}).get( diff --git a/dvc/render/vega.py b/dvc/render/vega.py index 0248d583e3..d8522aae94 100644 --- a/dvc/render/vega.py +++ b/dvc/render/vega.py @@ -165,8 +165,7 @@ def get_vega(self) -> Optional[str]: return template.render(datapoints, props=props) return None - # TODO naming? - def _convert(self, page_dir_path: "StrPath"): + def _convert(self, path: "StrPath"): return self.get_vega() @staticmethod From 4ffef00d4bf17004720422f7de615805f97ddc1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Sat, 21 Aug 2021 16:14:12 +0200 Subject: [PATCH 16/16] plots: self review refactor --- dvc/command/plots.py | 4 ++-- dvc/render/image.py | 5 +---- dvc/render/utils.py | 7 +++---- tests/unit/render/test_image.py | 4 +++- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/dvc/command/plots.py b/dvc/command/plots.py index 8ce334ba53..4fe422e8d1 100644 --- a/dvc/command/plots.py +++ b/dvc/command/plots.py @@ -45,8 +45,6 @@ def run(self): "No plots were loaded, " "visualization file will not be created." ) - rel: str = self.args.out or "dvc_plots" - path: Path = (Path.cwd() / rel).resolve() if self.args.show_vega: target = self.args.targets[0] @@ -55,6 +53,8 @@ def run(self): ui.write(plot_json) return 0 + rel: str = self.args.out or "dvc_plots" + path: Path = (Path.cwd() / rel).resolve() index_path = render( self.repo, plots_data, diff --git a/dvc/render/image.py b/dvc/render/image.py index 88652bdadb..8acbd7d8d9 100644 --- a/dvc/render/image.py +++ b/dvc/render/image.py @@ -13,10 +13,7 @@ class ImageRenderer(Renderer): DIV = """
+ style="border: 1px solid;"> {partial}
""" diff --git a/dvc/render/utils.py b/dvc/render/utils.py index 62de8ed1f0..2b0d567525 100644 --- a/dvc/render/utils.py +++ b/dvc/render/utils.py @@ -51,14 +51,13 @@ def find_vega(repo, plots_data, target): def match_renderers(plots_data, templates): + from dvc.render.image import ImageRenderer + from dvc.render.vega import VegaRenderer + renderers = [] for g in group_by_filename(plots_data): - from dvc.render.vega import VegaRenderer - if VegaRenderer.matches(g): renderers.append(VegaRenderer(g, templates)) - from dvc.render.image import ImageRenderer - if ImageRenderer.matches(g): renderers.append(ImageRenderer(g)) return renderers diff --git a/tests/unit/render/test_image.py b/tests/unit/render/test_image.py index ec2901af62..dab190d7f6 100644 --- a/tests/unit/render/test_image.py +++ b/tests/unit/render/test_image.py @@ -41,4 +41,6 @@ def test_render(tmp_dir): assert fobj.read() == b"content" assert "

file.jpg

" in html - assert '' in html + assert ( + f'' in html + )