diff --git a/dvc/command/live.py b/dvc/command/live.py index 5995e1d934..f0488a93d2 100644 --- a/dvc/command/live.py +++ b/dvc/command/live.py @@ -1,5 +1,5 @@ import argparse -import os +from pathlib import Path from dvc.command import completion from dvc.command.base import CmdBase, fix_subparsers @@ -13,10 +13,11 @@ def _run(self, target, revs=None): metrics, plots = self.repo.live.show(target=target, revs=revs) if plots: - html_path = self.args.target + ".html" - self.repo.plots.write_html(html_path, plots, metrics) + html_path = Path.cwd() / (self.args.target + "_html") + from dvc.render.utils import render - ui.write("\nfile://", os.path.abspath(html_path), sep="") + index_path = render(self.repo, plots, metrics, html_path) + ui.write(index_path.as_uri()) return 0 return 1 diff --git a/dvc/command/plots.py b/dvc/command/plots.py index 7f752223a8..4fe422e8d1 100644 --- a/dvc/command/plots.py +++ b/dvc/command/plots.py @@ -4,6 +4,7 @@ from dvc.command import completion from dvc.command.base import CmdBase, append_doc_link, fix_subparsers from dvc.exceptions import DvcException +from dvc.render.utils import find_vega, render from dvc.ui import ui from dvc.utils import format_link @@ -35,43 +36,50 @@ def run(self): return 1 try: - plots = self._func(targets=self.args.targets, props=self._props()) + plots_data = self._func( + targets=self.args.targets, props=self._props() + ) + + if not plots_data: + ui.error_write( + "No plots were loaded, " + "visualization file will not be created." + ) if self.args.show_vega: target = self.args.targets[0] - ui.write(plots[target]) + plot_json = find_vega(self.repo, plots_data, target) + if plot_json: + ui.write(plot_json) return 0 - except DvcException: - logger.exception("") - return 1 - - if plots: - rel: str = self.args.out or "plots.html" + rel: str = self.args.out or "dvc_plots" path: Path = (Path.cwd() / rel).resolve() - self.repo.plots.write_html( - path, plots=plots, html_template_path=self.args.html_template + index_path = render( + self.repo, + plots_data, + path=path, + html_template_path=self.args.html_template, ) - assert ( - path.is_absolute() - ) # as_uri throws ValueError if not absolute - url = path.as_uri() + assert index_path.is_absolute() + url = index_path.as_uri() ui.write(url) + if self.args.open: import webbrowser - opened = webbrowser.open(rel) + opened = webbrowser.open(index_path) if not opened: ui.error_write( "Failed to open. Please try opening it manually." ) return 1 - else: - ui.error_write( - "No plots were loaded, visualization file will not be created." - ) - return 0 + return 0 + + except DvcException: + logger.exception("") + return 1 class CmdPlotsShow(CmdPlots): diff --git a/dvc/render/__init__.py b/dvc/render/__init__.py new file mode 100644 index 0000000000..30d26f3a70 --- /dev/null +++ b/dvc/render/__init__.py @@ -0,0 +1,33 @@ +import logging +from typing import TYPE_CHECKING, Dict + +if TYPE_CHECKING: + from dvc.types import StrPath + +logger = logging.getLogger(__name__) + + +class Renderer: + def __init__(self, data: Dict): + self.data = data + + from dvc.render.utils import get_files + + files = get_files(self.data) + + # we assume comparison of same file between revisions for now + assert len(files) == 1 + self.filename = files.pop() + + def _convert(self, path: "StrPath"): + raise NotImplementedError + + @property + def DIV(self): + raise NotImplementedError + + def generate_html(self, path: "StrPath"): + """this method might edit content of path""" + partial = self._convert(path) + div_id = f"plot_{self.filename.replace('.', '_').replace('/', '_')}" + return self.DIV.format(id=div_id, partial=partial) diff --git a/dvc/utils/html.py b/dvc/render/html.py similarity index 75% rename from dvc/utils/html.py rename to dvc/render/html.py index 12c58924c0..ec3cc5f5bb 100644 --- a/dvc/utils/html.py +++ b/dvc/render/html.py @@ -1,7 +1,12 @@ -from typing import Dict, List, Optional +import os +from pathlib import Path +from typing import TYPE_CHECKING, Dict, List, Optional from dvc.exceptions import DvcException -from dvc.types import StrPath + +if TYPE_CHECKING: + from dvc.render import Renderer + from dvc.types import StrPath PAGE_HTML = """ @@ -16,12 +21,6 @@ """ -VEGA_DIV_HTML = """
-""" - class MissingPlaceholderError(DvcException): def __init__(self, placeholder): @@ -56,15 +55,6 @@ def with_metrics(self, metrics: Dict[str, Dict]) -> "HTML": self.elements.append(tabulate.tabulate(rows, header, tablefmt="html")) return self - def with_plots(self, plots: Dict[str, Dict]) -> "HTML": - self.elements.extend( - [ - VEGA_DIV_HTML.format(id=f"plot{i}", vega_json=plot) - for i, plot in enumerate(plots.values()) - ] - ) - return self - def with_element(self, html: str) -> "HTML": self.elements.append(html) return self @@ -75,11 +65,14 @@ def embed(self) -> str: def write( - path: StrPath, - plots: Dict[str, Dict], + path: "StrPath", + renderers: List["Renderer"], metrics: Optional[Dict[str, Dict]] = None, - template_path: Optional[StrPath] = None, + template_path: Optional["StrPath"] = None, ): + + os.makedirs(path, exist_ok=True) + page_html = None if template_path: with open(template_path) as fobj: @@ -90,7 +83,11 @@ def write( document.with_metrics(metrics) document.with_element("
") - document.with_plots(plots) + for renderer in renderers: + document.with_element(renderer.generate_html(path)) + + index = Path(os.path.join(path, "index.html")) - with open(path, "w") as fd: + with open(index, "w") as fd: fd.write(document.embed()) + return index diff --git a/dvc/render/image.py b/dvc/render/image.py new file mode 100644 index 0000000000..8acbd7d8d9 --- /dev/null +++ b/dvc/render/image.py @@ -0,0 +1,64 @@ +import os +from typing import TYPE_CHECKING + +from dvc.render import Renderer +from dvc.render.utils import get_files +from dvc.utils import relpath + +if TYPE_CHECKING: + from dvc.types import StrPath + + +class ImageRenderer(Renderer): + DIV = """ +
+ {partial} +
""" + + def _write_image( + self, + page_dir_path: "StrPath", + revision: str, + filename: str, + image_data: bytes, + ): + static = os.path.join(page_dir_path, "static") + os.makedirs(static, exist_ok=True) + + img_path = os.path.join( + static, f"{revision}_{filename.replace(os.sep, '_')}" + ) + rel_img_path = relpath(img_path, page_dir_path) + with open(img_path, "wb") as fd: + fd.write(image_data) + return """ +
+

{title}

+ +
""".format( + title=revision, src=rel_img_path + ) + + def _convert(self, path: "StrPath"): + div_content = [] + for rev, rev_data in self.data.items(): + if "data" in rev_data: + for file, file_data in rev_data.get("data", {}).items(): + if "data" in file_data: + div_content.append( + self._write_image( + path, rev, file, file_data["data"] + ) + ) + if div_content: + div_content.insert(0, f"

{self.filename}

") + return "\n".join(div_content) + return "" + + @staticmethod + def matches(data): + files = get_files(data) + extensions = set(map(lambda f: os.path.splitext(f)[1], files)) + return extensions.issubset({".jpg", ".jpeg", ".gif", ".png"}) diff --git a/dvc/render/utils.py b/dvc/render/utils.py new file mode 100644 index 0000000000..2b0d567525 --- /dev/null +++ b/dvc/render/utils.py @@ -0,0 +1,80 @@ +import os.path +from typing import Dict, List, Set + + +def get_files(data: Dict) -> Set: + files = set() + for rev in data.keys(): + for file in data[rev].get("data", {}).keys(): + files.add(file) + return files + + +def group_by_filename(plots_data: Dict) -> List[Dict]: + # TODO use dpath.util.search once + # https://github.com/dpath-maintainers/dpath-python/issues/147 is released + # now cannot search when errors are present in data + files = get_files(plots_data) + grouped = [] + + for file in files: + tmp: Dict = {} + for revision, revision_data in plots_data.items(): + if file in revision_data.get("data", {}): + if "data" not in tmp: + tmp[revision] = {"data": {}} + tmp[revision]["data"].update( + {file: revision_data["data"][file]} + ) + grouped.append(tmp) + + return grouped + + +def find_vega(repo, plots_data, target): + # TODO same as group_by_filename + grouped = group_by_filename(plots_data) + found = None + for plot_group in grouped: + files = get_files(plot_group) + assert len(files) == 1 + file = files.pop() + if file == target: + found = plot_group + break + + from dvc.render.vega import VegaRenderer + + if found and VegaRenderer.matches(found): + return VegaRenderer(found, repo.plots.templates).get_vega() + return "" + + +def match_renderers(plots_data, templates): + from dvc.render.image import ImageRenderer + from dvc.render.vega import VegaRenderer + + renderers = [] + for g in group_by_filename(plots_data): + if VegaRenderer.matches(g): + renderers.append(VegaRenderer(g, templates)) + if ImageRenderer.matches(g): + renderers.append(ImageRenderer(g)) + return renderers + + +def render(repo, plots_data, metrics=None, path=None, html_template_path=None): + # TODO we could probably remove repo usages (here and in VegaRenderer) + renderers = match_renderers(plots_data, repo.plots.templates) + if not html_template_path: + html_template_path = repo.config.get("plots", {}).get( + "html_template", None + ) + if html_template_path and not os.path.isabs(html_template_path): + html_template_path = os.path.join(repo.dvc_dir, html_template_path) + + from dvc.render.html import write + + return write( + path, renderers, metrics=metrics, template_path=html_template_path + ) diff --git a/dvc/render/vega.py b/dvc/render/vega.py new file mode 100644 index 0000000000..d8522aae94 --- /dev/null +++ b/dvc/render/vega.py @@ -0,0 +1,175 @@ +import os +from copy import copy, deepcopy +from typing import TYPE_CHECKING, Dict, List, Optional, Union + +from funcy import first + +from dvc.exceptions import DvcException +from dvc.render import Renderer +from dvc.render.utils import get_files + +if TYPE_CHECKING: + from dvc.repo.plots.template import PlotTemplates + from dvc.types import StrPath + +REVISION_FIELD = "rev" +INDEX_FIELD = "step" + + +class PlotMetricTypeError(DvcException): + def __init__(self, file): + super().__init__( + "'{}' - file type error\n" + "Only JSON, YAML, CSV and TSV formats are supported.".format(file) + ) + + +class PlotDataStructureError(DvcException): + def __init__(self): + super().__init__( + "Plot data extraction failed. Please see " + "https://man.dvc.org/plot for supported data formats." + ) + + +def _filter_fields( + datapoints: List[Dict], filename, revision, fields=None +) -> List[Dict]: + if not fields: + return datapoints + assert isinstance(fields, set) + + new_data = [] + for data_point in datapoints: + new_dp = copy(data_point) + + keys = set(data_point.keys()) + if keys & fields != fields: + raise DvcException( + "Could not find fields: '{}' for '{}' at '{}'.".format( + ", ".join(fields), filename, revision + ) + ) + + to_del = keys - fields + for key in to_del: + del new_dp[key] + new_data.append(new_dp) + return new_data + + +def _lists(dictionary): + for _, value in dictionary.items(): + if isinstance(value, dict): + yield from _lists(value) + elif isinstance(value, list): + yield value + + +def _find_data(data: Union[Dict, List], fields=None) -> List[Dict]: + if not isinstance(data, dict): + return data + + if not fields: + # just look for first list of dicts + fields = set() + + for lst in _lists(data): + if ( + all(isinstance(dp, dict) for dp in lst) + and set(first(lst).keys()) & fields == fields + ): + return lst + raise PlotDataStructureError() + + +def _append_index(datapoints: List[Dict], append_index=False) -> List[Dict]: + if not append_index or INDEX_FIELD in first(datapoints).keys(): + return datapoints + + for index, data_point in enumerate(datapoints): + data_point[INDEX_FIELD] = index + return datapoints + + +def _append_revision(datapoints: List[Dict], revision) -> List[Dict]: + for data_point in datapoints: + data_point[REVISION_FIELD] = revision + return datapoints + + +class VegaRenderer(Renderer): + DIV = """ +
+ +
+ """ + + def __init__(self, data: Dict, templates: "PlotTemplates"): + super().__init__(data) + self.templates = templates + + def _squash_props(self) -> Dict: + resolved: Dict[str, str] = {} + for rev_data in self.data.values(): + for file_data in rev_data.get("data", {}).values(): + props = file_data.get("props", {}) + resolved = {**resolved, **props} + return resolved + + def _datapoints(self, props: Dict): + fields = props.get("fields", set()) + if fields: + fields = {*fields, props.get("x"), props.get("y")} - {None} + + datapoints = [] + for revision, rev_data in self.data.items(): + for filename, file_data in rev_data.get("data", {}).items(): + if "data" in file_data: + tmp = deepcopy(file_data.get("data")) + tmp = _find_data(tmp, fields=fields - {INDEX_FIELD}) + tmp = _append_index( + tmp, append_index=props.get("append_index", False) + ) + tmp = _filter_fields( + tmp, + filename=filename, + revision=revision, + fields=fields, + ) + tmp = _append_revision(tmp, revision=revision) + datapoints.extend(tmp) + return datapoints + + def get_vega(self) -> Optional[str]: + props = self._squash_props() + + template = self.templates.load(props.get("template") or "default") + + if not props.get("x") and template.has_anchor("x"): + props["append_index"] = True + props["x"] = INDEX_FIELD + + datapoints = self._datapoints(props) + + if datapoints: + if not props.get("y") and template.has_anchor("y"): + fields = list(first(datapoints)) + skip = (REVISION_FIELD, props.get("x")) + props["y"] = first( + f for f in reversed(fields) if f not in skip + ) + return template.render(datapoints, props=props) + return None + + def _convert(self, path: "StrPath"): + return self.get_vega() + + @staticmethod + def matches(data): + files = get_files(data) + extensions = set(map(lambda f: os.path.splitext(f)[1], files)) + return extensions.issubset({".yml", ".yaml", ".json", ".csv", ".tsv"}) diff --git a/dvc/repo/collect.py b/dvc/repo/collect.py index dac019972f..3cba2a3caa 100644 --- a/dvc/repo/collect.py +++ b/dvc/repo/collect.py @@ -49,7 +49,6 @@ def _collect_paths( ) else: logger.warning("'%s' was not found at: '%s'.", path_info, rev) - continue target_infos.append(path_info) return target_infos diff --git a/dvc/repo/live.py b/dvc/repo/live.py index 5dcfee2fc8..df52279fec 100644 --- a/dvc/repo/live.py +++ b/dvc/repo/live.py @@ -2,6 +2,8 @@ import os from typing import TYPE_CHECKING, List, Optional +from dvc.render.utils import render + logger = logging.getLogger(__name__) if TYPE_CHECKING: @@ -15,10 +17,10 @@ def create_summary(out): metrics, plots = out.repo.live.show(str(out.path_info)) - html_path = out.path_info.with_suffix(".html") + html_path = out.path_info.fspath + "_dvc_plots" - out.repo.plots.write_html(html_path, plots, metrics) - logger.info(f"\nfile://{os.path.abspath(html_path)}") + index_path = render(out.repo, plots, metrics=metrics, path=html_path) + logger.info(f"\nfile://{os.path.abspath(index_path)}") def summary_path_info(out: "Output") -> Optional["PathInfo"]: diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index 64d30068cc..cdb4dcac55 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -8,8 +8,7 @@ from funcy import cached_property, first, project from dvc.exceptions import DvcException -from dvc.repo.plots.data import PlotMetricTypeError -from dvc.types import StrPath +from dvc.render.vega import PlotMetricTypeError from dvc.utils import ( error_handler, errored_revisions, @@ -113,58 +112,33 @@ def _collect_from_revision( props = props or {} for path, repo_path in plot_files: - res[repo_path] = {"props": rev_props} + joined_props = {**rev_props, **props} + res[repo_path] = {"props": joined_props} res[repo_path].update( parse( fs, path, - rev_props=rev_props, - props=props, + props=joined_props, onerror=onerror, ) ) return res - @staticmethod - def render(data, revs=None, props=None, templates=None): - """Renders plots""" - props = props or {} - - # Merge data by plot file and apply overriding props - plots = _prepare_plots(data, revs, props) - - result = {} - for datafile, desc in plots.items(): - rendered = _render( - datafile, - desc["data"], - desc["props"], - templates, - ) - if rendered: - result[datafile] = rendered - - return result - def show( self, targets: List[str] = None, revs=None, props=None, - templates=None, recursive=False, onerror=None, ): if onerror is None: onerror = onerror_collect - data = self.collect( + + return self.collect( targets, revs, recursive, onerror=onerror, props=props ) - if templates is None: - templates = self.templates - return self.render(data, revs, props, templates) - def diff(self, *args, **kwargs): from .diff import diff @@ -217,26 +191,6 @@ def templates(self): return PlotTemplates(self.repo.dvc_dir) - def write_html( - self, - path: StrPath, - plots: Dict[str, Dict], - metrics: Optional[Dict[str, Dict]] = None, - html_template_path: Optional[StrPath] = None, - ): - if not html_template_path: - html_template_path = self.repo.config.get("plots", {}).get( - "html_template", None - ) - if html_template_path and not os.path.isabs(html_template_path): - html_template_path = os.path.join( - self.repo.dvc_dir, html_template_path - ) - - from dvc.utils.html import write - - write(path, plots, metrics, html_template_path) - def _is_plot(out: "Output") -> bool: return bool(out.plot) or bool(out.live) @@ -264,17 +218,19 @@ def _collect_plots( @error_handler -def parse(fs, path, props=None, rev_props=None, **kwargs): +def parse(fs, path, props=None, **kwargs): props = props or {} - rev_props = rev_props or {} _, extension = os.path.splitext(path) if extension in (".tsv", ".csv"): - header = {**rev_props, **props}.get("header", True) + header = props.get("header", True) if extension == ".csv": return _load_sv(path=path, fs=fs, delimiter=",", header=header) return _load_sv(path=path, fs=fs, delimiter="\t", header=header) if extension in LOADERS or extension in (".yml", ".yaml"): return LOADERS[extension](path=path, fs=fs) + if extension in (".jpeg", ".jpg", ".gif", ".png"): + with fs.open(path, "rb") as fd: + return fd.read() raise PlotMetricTypeError(path) @@ -291,89 +247,6 @@ def _plot_props(out: "Output") -> Dict: return project(out.plot, PLOT_PROPS) -def _prepare_plots(data, revs, props): - """Groups data by plot file. - - Also resolves props conflicts between revs and applies global props. - """ - # we go in order revs are supplied on props conflict first ones win. - revs = iter(data) if revs is None else revs - - plots, props_revs = {}, {} - for rev in revs: - # Asked for revision without data - if rev not in data: - continue - - for datafile, desc in data[rev].get("data", {}).items(): - # We silently skip on an absent data file, - # see also try/except/pass in .collect() - if "data" not in desc: - continue - - # props from command line overwrite plot props from out definition - full_props = {**desc["props"], **props} - - if datafile in plots: - saved = plots[datafile] - if saved["props"] != full_props: - logger.warning( - f"Inconsistent plot props for '{datafile}' in " - f"'{props_revs[datafile]}' and '{rev}'. " - f"Going to use ones from '{props_revs[datafile]}'" - ) - - saved["data"][rev] = desc["data"] - else: - plots[datafile] = { - "props": full_props, - "data": {rev: desc["data"]}, - } - # Save rev we got props from - props_revs[datafile] = rev - - return plots - - -def _render(datafile, datas, props, templates): - from .data import PlotData, plot_data - - # Copy it to not modify a passed value - props = props.copy() - - # Add x and y to fields if set - fields = props.get("fields") - if fields is not None: - fields = {*fields, props.get("x"), props.get("y")} - {None} - - template = templates.load(props.get("template") or "default") - - # If x is not set add index field - if not props.get("x") and template.has_anchor("x"): - props["append_index"] = True - props["x"] = PlotData.INDEX_FIELD - - # Parse all data, preprocess it and collect as a list of dicts - data = [] - for rev, unstructured_data in datas.items(): - rev_data = plot_data(datafile, rev, unstructured_data).to_datapoints( - fields=fields, - path=props.get("path"), - append_index=props.get("append_index", False), - ) - data.extend(rev_data) - - # If y is not set then use last field not used yet - if data: - if not props.get("y") and template.has_anchor("y"): - fields = list(first(data)) - skip = (PlotData.REVISION_FIELD, props.get("x")) - props["y"] = first(f for f in reversed(fields) if f not in skip) - - return template.render(data, props=props) - return None - - def _load_sv(path, fs, delimiter=",", header=True): with fs.open(path, "r") as fd: content = fd.read() diff --git a/dvc/repo/plots/data.py b/dvc/repo/plots/data.py deleted file mode 100644 index 9ce7518ab2..0000000000 --- a/dvc/repo/plots/data.py +++ /dev/null @@ -1,138 +0,0 @@ -import os -from copy import copy - -from funcy import first - -from dvc.exceptions import DvcException -from dvc.utils.serialize import ParseError - - -class PlotMetricTypeError(DvcException): - def __init__(self, file): - super().__init__( - "'{}' - file type error\n" - "Only JSON, YAML, CSV and TSV formats are supported.".format(file) - ) - - -class PlotDataStructureError(DvcException): - def __init__(self): - super().__init__( - "Plot data extraction failed. Please see " - "https://man.dvc.org/plot for supported data formats." - ) - - -class PlotParsingError(ParseError): - def __init__(self, path, revision): - self.path = path - self.revision = revision - - super().__init__(path, f"revision: '{revision}'") - - -def plot_data(filename, revision, content): - _, extension = os.path.splitext(filename.lower()) - if extension in (".json", ".yaml"): - return DictData(filename, revision, content) - if extension in (".csv", ".tsv"): - return ListData(filename, revision, content) - raise PlotMetricTypeError(filename) - - -def _filter_fields(data_points, filename, revision, fields=None, **kwargs): - if not fields: - return data_points - assert isinstance(fields, set) - - new_data = [] - for data_point in data_points: - new_dp = copy(data_point) - - keys = set(data_point.keys()) - if keys & fields != fields: - raise DvcException( - "Could not find fields: '{}' for '{}' at '{}'.".format( - ", ".join(fields), filename, revision - ) - ) - - to_del = keys - fields - for key in to_del: - del new_dp[key] - new_data.append(new_dp) - return new_data - - -def _lists(dictionary): - for _, value in dictionary.items(): - if isinstance(value, dict): - yield from _lists(value) - elif isinstance(value, list): - yield value - - -def _find_data(data, fields=None, **kwargs): - if not isinstance(data, dict): - return data - - if not fields: - # just look for first list of dicts - fields = set() - - for lst in _lists(data): - if ( - all(isinstance(dp, dict) for dp in lst) - and set(first(lst).keys()) & fields == fields - ): - return lst - raise PlotDataStructureError() - - -def _append_index(data_points, append_index=False, **kwargs): - if not append_index or PlotData.INDEX_FIELD in first(data_points).keys(): - return data_points - - for index, data_point in enumerate(data_points): - data_point[PlotData.INDEX_FIELD] = index - return data_points - - -def _append_revision(data_points, revision, **kwargs): - for data_point in data_points: - data_point[PlotData.REVISION_FIELD] = revision - return data_points - - -class PlotData: - REVISION_FIELD = "rev" - INDEX_FIELD = "step" - - def __init__(self, filename, revision, content, **kwargs): - self.filename = filename - self.revision = revision - self.content = content - - def _processors(self): - return [_filter_fields, _append_index, _append_revision] - - def to_datapoints(self, **kwargs): - data = self.content - - for data_proc in self._processors(): - data = data_proc( - data, filename=self.filename, revision=self.revision, **kwargs - ) - return data - - -class DictData(PlotData): - # For files usually parsed as dicts: eg JSON, Yaml - def _processors(self): - parent_processors = super()._processors() - return [_find_data] + parent_processors - - -class ListData(PlotData): - # For files parsed as list: CSV, TSV - pass diff --git a/tests/conftest.py b/tests/conftest.py index cc160688f6..32fce78614 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -155,3 +155,12 @@ def pytest_configure(config): enabled_remotes.discard(remote_name) if enabled: enabled_remotes.add(remote_name) + + +@pytest.fixture() +def custom_template(tmp_dir, dvc): + import shutil + + template = tmp_dir / "custom_template.json" + shutil.copy(tmp_dir / ".dvc" / "plots" / "default.json", template) + return template diff --git a/tests/func/plots/conftest.py b/tests/func/plots/conftest.py deleted file mode 100644 index f9d7d96a34..0000000000 --- a/tests/func/plots/conftest.py +++ /dev/null @@ -1,10 +0,0 @@ -import shutil - -import pytest - - -@pytest.fixture() -def custom_template(tmp_dir, dvc): - template = tmp_dir / "custom_template.json" - shutil.copy(tmp_dir / ".dvc" / "plots" / "default.json", template) - return template diff --git a/tests/func/plots/test_diff.py b/tests/func/plots/test_diff.py index 7162b88ac6..3d20410e64 100644 --- a/tests/func/plots/test_diff.py +++ b/tests/func/plots/test_diff.py @@ -1,12 +1,8 @@ -import json - -from dvc.repo.plots.data import PlotData from tests.func.plots.utils import _write_json def test_diff_dirty(tmp_dir, scm, dvc, run_copy_metrics): - metric_1 = [{"y": 2}, {"y": 3}] - _write_json(tmp_dir, metric_1, "metric_t.json") + _write_json(tmp_dir, [{"y": 2}, {"y": 3}], "metric_t.json") run_copy_metrics( "metric_t.json", "metric.json", @@ -14,8 +10,8 @@ def test_diff_dirty(tmp_dir, scm, dvc, run_copy_metrics): commit="init", ) - metric_2 = [{"y": 3}, {"y": 5}] - _write_json(tmp_dir, metric_2, "metric_t.json") + metric_head = [{"y": 3}, {"y": 5}] + _write_json(tmp_dir, metric_head, "metric_t.json") run_copy_metrics( "metric_t.json", "metric.json", @@ -23,34 +19,30 @@ def test_diff_dirty(tmp_dir, scm, dvc, run_copy_metrics): commit="second", ) - metric_3 = [{"y": 5}, {"y": 6}] - _write_json(tmp_dir, metric_3, "metric_t.json") + metric_1 = [{"y": 5}, {"y": 6}] + _write_json(tmp_dir, metric_1, "metric_t.json") run_copy_metrics( "metric_t.json", "metric.json", plots_no_cache=["metric.json"] ) - - plot_string = dvc.plots.diff(props={"fields": {"y"}})["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"y": 3, PlotData.INDEX_FIELD: 0, "rev": "HEAD"}, - {"y": 5, PlotData.INDEX_FIELD: 1, "rev": "HEAD"}, - {"y": 5, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, - {"y": 6, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "y" - - _write_json(tmp_dir, [{"y": 7}, {"y": 8}], "metric.json") - - plot_string = dvc.plots.diff(props={"fields": {"y"}})["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"y": 3, PlotData.INDEX_FIELD: 0, "rev": "HEAD"}, - {"y": 5, PlotData.INDEX_FIELD: 1, "rev": "HEAD"}, - {"y": 7, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, - {"y": 8, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "y" + props = {"fields": {"y"}} + diff_result = dvc.plots.diff(props=props) + assert diff_result == { + "workspace": { + "data": {"metric.json": {"data": metric_1, "props": props}} + }, + "HEAD": { + "data": {"metric.json": {"data": metric_head, "props": props}} + }, + } + metric_2 = [{"y": 7}, {"y": 8}] + _write_json(tmp_dir, metric_2, "metric.json") + + diff_result = dvc.plots.diff(props=props) + assert diff_result == { + "workspace": { + "data": {"metric.json": {"data": metric_2, "props": props}} + }, + "HEAD": { + "data": {"metric.json": {"data": metric_head, "props": props}} + }, + } diff --git a/tests/func/plots/test_modify.py b/tests/func/plots/test_modify.py index cf818dc11c..7c1aa07604 100644 --- a/tests/func/plots/test_modify.py +++ b/tests/func/plots/test_modify.py @@ -1,4 +1,3 @@ -import json import os import pytest @@ -94,8 +93,5 @@ def test_dir_plots(tmp_dir, dvc, run_copy_metrics): dvc.plots.modify("subdir", {"title": "TITLE"}) result = dvc.plots.show() - p1_content = json.loads(result[p1]) - p2_content = json.loads(result[p2]) - - assert p1_content["title"] == p2_content["title"] == "TITLE" - assert p1_content == p2_content + assert result["workspace"]["data"][p1]["props"]["title"] == "TITLE" + assert result["workspace"]["data"][p2]["props"]["title"] == "TITLE" diff --git a/tests/func/plots/test_show.py b/tests/func/plots/test_show.py index bdc32e4e8f..94e57049fe 100644 --- a/tests/func/plots/test_show.py +++ b/tests/func/plots/test_show.py @@ -1,8 +1,4 @@ -import json -import logging import os -import shutil -from collections import OrderedDict import pytest from funcy import get_in @@ -11,13 +7,8 @@ from dvc.exceptions import OverlappingOutputPathsError from dvc.main import main from dvc.path_info import PathInfo +from dvc.render.vega import PlotMetricTypeError from dvc.repo import Repo -from dvc.repo.plots.data import PlotData, PlotMetricTypeError -from dvc.repo.plots.template import ( - BadTemplateError, - NoFieldInDataError, - TemplateNotFoundError, -) from dvc.utils import onerror_collect from dvc.utils.fs import remove from dvc.utils.serialize import ( @@ -26,346 +17,13 @@ dump_yaml, modify_yaml, ) -from tests.func.plots.utils import _write_csv, _write_json - - -def test_plot_csv_one_column(tmp_dir, scm, dvc, run_copy_metrics): - # no header - metric = [{"val": 2}, {"val": 3}] - _write_csv(metric, "metric_t.csv", header=False) - run_copy_metrics( - "metric_t.csv", "metric.csv", plots_no_cache=["metric.csv"] - ) - - props = { - "header": False, - "x_label": "x_title", - "y_label": "y_title", - "title": "mytitle", - } - plot_string = dvc.plots.show(props=props)["metric.csv"] - - plot_content = json.loads(plot_string) - assert plot_content["title"] == "mytitle" - assert plot_content["data"]["values"] == [ - {"0": "2", PlotData.INDEX_FIELD: 0, "rev": "workspace"}, - {"0": "3", PlotData.INDEX_FIELD: 1, "rev": "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "0" - assert plot_content["encoding"]["x"]["title"] == "x_title" - assert plot_content["encoding"]["y"]["title"] == "y_title" - - -def test_plot_csv_multiple_columns(tmp_dir, scm, dvc, run_copy_metrics): - metric = [ - OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), - OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), - ] - _write_csv(metric, "metric_t.csv") - run_copy_metrics( - "metric_t.csv", "metric.csv", plots_no_cache=["metric.csv"] - ) - - plot_string = dvc.plots.show()["metric.csv"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - { - "val": "2", - PlotData.INDEX_FIELD: 0, - "rev": "workspace", - "first_val": "100", - "second_val": "100", - }, - { - "val": "3", - PlotData.INDEX_FIELD: 1, - "rev": "workspace", - "first_val": "200", - "second_val": "300", - }, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "val" - - -def test_plot_csv_choose_axes(tmp_dir, scm, dvc, run_copy_metrics): - metric = [ - OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), - OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), - ] - _write_csv(metric, "metric_t.csv") - run_copy_metrics( - "metric_t.csv", "metric.csv", plots_no_cache=["metric.csv"] - ) - - props = {"x": "first_val", "y": "second_val"} - plot_string = dvc.plots.show(props=props)["metric.csv"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - { - "val": "2", - "rev": "workspace", - "first_val": "100", - "second_val": "100", - }, - { - "val": "3", - "rev": "workspace", - "first_val": "200", - "second_val": "300", - }, - ] - assert plot_content["encoding"]["x"]["field"] == "first_val" - assert plot_content["encoding"]["y"]["field"] == "second_val" - - -def test_plot_json_single_val(tmp_dir, scm, dvc, run_copy_metrics): - metric = [{"val": 2}, {"val": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) - - plot_string = dvc.plots.show()["metric.json"] - - plot_json = json.loads(plot_string) - assert plot_json["data"]["values"] == [ - {"val": 2, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, - {"val": 3, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, - ] - assert plot_json["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_json["encoding"]["y"]["field"] == "val" - - -def test_plot_json_multiple_val(tmp_dir, scm, dvc, run_copy_metrics): - metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) - - plot_string = dvc.plots.show()["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - { - "val": 2, - PlotData.INDEX_FIELD: 0, - "first_val": 100, - "rev": "workspace", - }, - { - "val": 3, - PlotData.INDEX_FIELD: 1, - "first_val": 200, - "rev": "workspace", - }, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "val" - - -def test_plot_confusion(tmp_dir, dvc, run_copy_metrics): - confusion_matrix = [ - {"predicted": "B", "actual": "A"}, - {"predicted": "A", "actual": "A"}, - ] - _write_json(tmp_dir, confusion_matrix, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) - - props = {"template": "confusion", "x": "predicted", "y": "actual"} - show = dvc.plots.show(props=props) - plot_string = show["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"predicted": "B", "actual": "A", "rev": "workspace"}, - {"predicted": "A", "actual": "A", "rev": "workspace"}, - ] - assert plot_content["spec"]["transform"][0]["groupby"] == [ - "actual", - "predicted", - ] - assert plot_content["spec"]["encoding"]["x"]["field"] == "predicted" - assert plot_content["spec"]["encoding"]["y"]["field"] == "actual" - - -def test_plot_confusion_normalized(tmp_dir, dvc, run_copy_metrics): - confusion_matrix = [ - {"predicted": "B", "actual": "A"}, - {"predicted": "A", "actual": "A"}, - ] - _write_json(tmp_dir, confusion_matrix, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) - - props = { - "template": "confusion_normalized", - "x": "predicted", - "y": "actual", - } - plot_string = dvc.plots.show(props=props)["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"predicted": "B", "actual": "A", "rev": "workspace"}, - {"predicted": "A", "actual": "A", "rev": "workspace"}, - ] - assert plot_content["spec"]["transform"][0]["groupby"] == [ - "actual", - "predicted", - ] - assert plot_content["spec"]["transform"][1]["groupby"] == ["rev", "actual"] - assert plot_content["spec"]["encoding"]["x"]["field"] == "predicted" - assert plot_content["spec"]["encoding"]["y"]["field"] == "actual" - - -def test_plot_multiple_revs_default(tmp_dir, scm, dvc, run_copy_metrics): - metric_1 = [{"y": 2}, {"y": 3}] - _write_json(tmp_dir, metric_1, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="init", - tag="v1", - ) - - metric_2 = [{"y": 3}, {"y": 5}] - _write_json(tmp_dir, metric_2, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="second", - tag="v2", - ) - - metric_3 = [{"y": 5}, {"y": 6}] - _write_json(tmp_dir, metric_3, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="third", - ) - plot_string = dvc.plots.show( - revs=["HEAD", "v2", "v1"], props={"fields": {"y"}} - )["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"y": 5, PlotData.INDEX_FIELD: 0, "rev": "HEAD"}, - {"y": 6, PlotData.INDEX_FIELD: 1, "rev": "HEAD"}, - {"y": 3, PlotData.INDEX_FIELD: 0, "rev": "v2"}, - {"y": 5, PlotData.INDEX_FIELD: 1, "rev": "v2"}, - {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v1"}, - {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v1"}, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "y" - - -def test_plot_multiple_revs(tmp_dir, scm, dvc, run_copy_metrics): - templates_dir = dvc.plots.templates.templates_dir - shutil.copy( - os.path.join(templates_dir, "default.json"), - os.path.join(templates_dir, "template.json"), - ) - - metric_1 = [{"y": 2}, {"y": 3}] - _write_json(tmp_dir, metric_1, "metric_t.json") - stage = run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="init", - tag="v1", - ) - - metric_2 = [{"y": 3}, {"y": 5}] - _write_json(tmp_dir, metric_2, "metric_t.json") - assert dvc.reproduce(stage.addressing) == [stage] - scm.add(["metric.json", stage.path]) - scm.commit("second") - scm.tag("v2") - - metric_3 = [{"y": 5}, {"y": 6}] - _write_json(tmp_dir, metric_3, "metric_t.json") - assert dvc.reproduce(stage.addressing) == [stage] - scm.add(["metric.json", stage.path]) - scm.commit("third") - - props = {"template": "template.json"} - plot_string = dvc.plots.show(revs=["HEAD", "v2", "v1"], props=props)[ - "metric.json" - ] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"y": 5, PlotData.INDEX_FIELD: 0, "rev": "HEAD"}, - {"y": 6, PlotData.INDEX_FIELD: 1, "rev": "HEAD"}, - {"y": 3, PlotData.INDEX_FIELD: 0, "rev": "v2"}, - {"y": 5, PlotData.INDEX_FIELD: 1, "rev": "v2"}, - {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v1"}, - {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v1"}, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "y" - - -def test_plot_even_if_metric_missing( - tmp_dir, scm, dvc, caplog, run_copy_metrics -): - tmp_dir.scm_gen("some_file", "content", commit="there is no metric") - scm.tag("v1") - - metric = [{"y": 2}, {"y": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="there is metric", - tag="v2", - ) - - caplog.clear() - with caplog.at_level(logging.WARNING, "dvc"): - plots = dvc.plots.show(revs=["v1", "v2"], targets=["metric.json"]) - assert "'metric.json' was not found at: 'v1'." in caplog.text - - plot_content = json.loads(plots["metric.json"]) - assert plot_content["data"]["values"] == [ - {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v2"}, - {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v2"}, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "y" +from tests.func.plots.utils import _write_json def test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): - metric = [{"y": 2}, {"y": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - stage = run_copy_metrics( + metric1 = [{"y": 2}, {"y": 3}] + _write_json(tmp_dir, metric1, "metric_t.json") + run_copy_metrics( "metric_t.json", "metric.json", plots=["metric.json"], @@ -374,8 +32,8 @@ def test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): scm.tag("v1") # Make a different plot and then remove its datafile - metric = [{"y": 3}, {"y": 4}] - _write_json(tmp_dir, metric, "metric_t.json") + metric2 = [{"y": 3}, {"y": 4}] + _write_json(tmp_dir, metric2, "metric_t.json") stage = run_copy_metrics( "metric_t.json", "metric.json", @@ -386,72 +44,12 @@ def test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): remove(stage.outs[0].fspath) remove(stage.outs[0].cache_path) - plots = dvc.plots.show(revs=["v1", "v2"], targets=["metric.json"]) - plot_content = json.loads(plots["metric.json"]) - assert plot_content["data"]["values"] == [ - {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v1"}, - {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v1"}, - ] - - -def test_custom_template(tmp_dir, scm, dvc, custom_template, run_copy_metrics): - metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="init", - tag="v1", - ) - - props = {"template": os.fspath(custom_template), "x": "a", "y": "b"} - plot_string = dvc.plots.show(props=props)["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"a": 1, "b": 2, "rev": "workspace"}, - {"a": 2, "b": 3, "rev": "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == "a" - assert plot_content["encoding"]["y"]["field"] == "b" - - -def _replace(path, src, dst): - path.write_text(path.read_text().replace(src, dst)) - - -def test_should_raise_on_no_template(tmp_dir, dvc, run_copy_metrics): - metric = [{"val": 2}, {"val": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) - - with pytest.raises(TemplateNotFoundError): - props = {"template": "non_existing_template.json"} - dvc.plots.show("metric.json", props=props) - - -def test_bad_template(tmp_dir, dvc, run_copy_metrics): - metric = [{"val": 2}, {"val": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", + plots_data = dvc.plots.show(revs=["v1", "v2"], targets=["metric.json"]) + assert plots_data["v1"]["data"]["metric.json"]["data"] == metric1 + assert isinstance( + plots_data["v2"]["data"]["metric.json"]["error"], FileNotFoundError ) - tmp_dir.gen("template.json", json.dumps({"a": "b", "c": "d"})) - - with pytest.raises(BadTemplateError): - props = {"template": "template.json"} - dvc.plots.show("metric.json", props=props) - def test_plot_wrong_metric_type(tmp_dir, scm, dvc, run_copy_metrics): tmp_dir.gen("metric_t.txt", "some text") @@ -470,112 +68,6 @@ def test_plot_wrong_metric_type(tmp_dir, scm, dvc, run_copy_metrics): ) -def test_plot_choose_columns( - tmp_dir, scm, dvc, custom_template, run_copy_metrics -): - metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="init", - tag="v1", - ) - - props = { - "template": os.fspath(custom_template), - "fields": {"b", "c"}, - "x": "b", - "y": "c", - } - plot_string = dvc.plots.show(props=props)["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"b": 2, "c": 3, "rev": "workspace"}, - {"b": 3, "c": 4, "rev": "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == "b" - assert plot_content["encoding"]["y"]["field"] == "c" - - -def test_plot_default_choose_column(tmp_dir, scm, dvc, run_copy_metrics): - metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="init", - tag="v1", - ) - - plot_string = dvc.plots.show(props={"fields": {"b"}})["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {PlotData.INDEX_FIELD: 0, "b": 2, "rev": "workspace"}, - {PlotData.INDEX_FIELD: 1, "b": 3, "rev": "workspace"}, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "b" - - -def test_plot_yaml(tmp_dir, scm, dvc, run_copy_metrics): - metric = [{"val": 2}, {"val": 3}] - dump_yaml("metric_t.yaml", metric) - run_copy_metrics( - "metric_t.yaml", "metric.yaml", plots_no_cache=["metric.yaml"] - ) - - plot_string = dvc.plots.show()["metric.yaml"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - {"val": 2, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, - {"val": 3, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, - ] - - -def test_raise_on_wrong_field(tmp_dir, scm, dvc, run_copy_metrics): - metric = [{"val": 2}, {"val": 3}] - _write_json(tmp_dir, metric, "metric_t.json") - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="first run", - ) - - with pytest.raises(NoFieldInDataError): - dvc.plots.show("metric.json", props={"x": "no_val"}) - - with pytest.raises(NoFieldInDataError): - dvc.plots.show("metric.json", props={"y": "no_val"}) - - -def test_multiple_plots(tmp_dir, scm, dvc, run_copy_metrics): - metric1 = [ - OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), - OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), - ] - metric2 = [ - OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), - OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), - ] - _write_csv(metric1, "metric_t1.csv") - _write_json(tmp_dir, metric2, "metric_t2.json") - run_copy_metrics( - "metric_t1.csv", "metric1.csv", plots_no_cache=["metric1.csv"] - ) - run_copy_metrics( - "metric_t2.json", "metric2.json", plots_no_cache=["metric2.json"] - ) - - assert len(dvc.plots.show().keys()) == 2 - - @pytest.mark.parametrize("use_dvc", [True, False]) def test_show_non_plot(tmp_dir, scm, use_dvc): metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] @@ -586,28 +78,9 @@ def test_show_non_plot(tmp_dir, scm, use_dvc): else: dvc = Repo(uninitialized=True) - plot_string = dvc.plots.show(targets=["metric.json"])["metric.json"] - - plot_content = json.loads(plot_string) - assert plot_content["data"]["values"] == [ - { - "val": 2, - PlotData.INDEX_FIELD: 0, - "first_val": 100, - "rev": "workspace", - }, - { - "val": 3, - PlotData.INDEX_FIELD: 1, - "first_val": 200, - "rev": "workspace", - }, - ] - assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD - assert plot_content["encoding"]["y"]["field"] == "val" - - if not use_dvc: - assert not (tmp_dir / ".dvc").exists() + plots = dvc.plots.show(targets=["metric.json"]) + + assert plots["workspace"]["data"]["metric.json"]["data"] == metric def test_show_non_plot_and_plot_with_params( @@ -618,28 +91,13 @@ def test_show_non_plot_and_plot_with_params( run_copy_metrics( "metric.json", "metric2.json", plots_no_cache=["metric2.json"] ) + props = {"title": "TITLE"} + dvc.plots.modify("metric2.json", props=props) - dvc.plots.modify("metric2.json", props={"title": "TITLE"}) result = dvc.plots.show(targets=["metric.json", "metric2.json"]) - - plot_content = json.loads(result["metric.json"]) - plot2_content = json.loads(result["metric2.json"]) - - assert plot2_content["title"] == "TITLE" - - assert plot_content != plot2_content - plot_content.pop("title") - plot2_content.pop("title") - assert plot_content == plot2_content - - -def test_show_no_repo(tmp_dir): - metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] - _write_json(tmp_dir, metric, "metric.json") - - dvc = Repo(uninitialized=True) - - dvc.plots.show(["metric.json"]) + assert "metric.json" in result["workspace"]["data"] + assert "metric2.json" in result["workspace"]["data"] + assert result["workspace"]["data"]["metric2.json"]["props"] == props def test_show_from_subdir(tmp_dir, dvc, capsys): @@ -654,26 +112,17 @@ def test_show_from_subdir(tmp_dir, dvc, capsys): out, _ = capsys.readouterr() assert subdir.as_uri() in out - assert (subdir / "plots.html").exists() - - -def test_show_malformed_plots(tmp_dir, scm, dvc, caplog): - tmp_dir.gen("plot.json", '[{"m":1}]') - scm.add(["plot.json"]) - scm.commit("initial") - - tmp_dir.gen("plot.json", '[{"m":1]') + assert (subdir / "dvc_plots").is_dir() + assert (subdir / "dvc_plots" / "index.html").is_file() - result = dvc.plots.show(targets=["plot.json"], revs=["workspace", "HEAD"]) - plot_content = json.loads(result["plot.json"]) - - assert plot_content["data"]["values"] == [ - {"m": 1, "rev": "HEAD", "step": 0} - ] +def test_plots_show_non_existing(tmp_dir, dvc, caplog): + result = dvc.plots.show(targets=["plot.json"]) + assert isinstance( + result["workspace"]["data"]["plot.json"]["error"], FileNotFoundError + ) -def test_plots_show_non_existing(tmp_dir, dvc): - assert dvc.plots.show(targets=["plot.json"]) == {} + assert "'plot.json' was not found in current workspace." in caplog.text @pytest.mark.parametrize("clear_before_run", [True, False]) @@ -732,49 +181,13 @@ def test_dir_plots(tmp_dir, dvc, run_copy_metrics): plots=["subdir"], name="copy_double", ) + props = {"title": "TITLE"} dvc.plots.modify("subdir", {"title": "TITLE"}) result = dvc.plots.show() - p1_content = json.loads(result[p1]) - p2_content = json.loads(result[p2]) - - assert p1_content["title"] == p2_content["title"] == "TITLE" - - -def test_show_dir_plots(tmp_dir, dvc, run_copy_metrics): - subdir = tmp_dir / "subdir" - subdir.mkdir() - metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] - - fname = "file.json" - _write_json(tmp_dir, metric, fname) - - p1 = os.path.join("subdir", "p1.json") - p2 = os.path.join("subdir", "p2.json") - tmp_dir.dvc.run( - cmd=( - f"mkdir subdir && python copy.py {fname} {p1} && " - f"python copy.py {fname} {p2}" - ), - deps=[fname], - single_stage=False, - plots=["subdir"], - name="copy_double", - ) - - result = dvc.plots.show(targets=["subdir"]) - p1_content = json.loads(result[p1]) - p2_content = json.loads(result[p2]) - - assert p1_content == p2_content - - result = dvc.plots.show(targets=[p1]) - assert set(result.keys()) == {p1} - - remove(dvc.odb.local.cache_dir) - remove(subdir) - - assert dvc.plots.show() == {} + assert set(result["workspace"]["data"]) == {p1, p2} + assert result["workspace"]["data"][p1]["props"] == props + assert result["workspace"]["data"][p2]["props"] == props def test_ignore_binary_file(tmp_dir, dvc, run_copy_metrics): @@ -821,3 +234,30 @@ def test_log_errors( "DVC failed to load some plots for following revisions: 'workspace'." in error ) + + +def test_plots_binary(tmp_dir, scm, dvc, run_copy_metrics, custom_template): + with open("image.jpg", "wb") as fd: + fd.write(b"content") + + dvc.add(["image.jpg"]) + run_copy_metrics( + "image.jpg", + "plot.jpg", + commit="run training", + plots=["plot.jpg"], + name="s2", + single_stage=False, + ) + + scm.add(["dvc.yaml", "dvc.lock"]) + scm.commit("initial") + + scm.tag("v1") + + with open("plot.jpg", "wb") as fd: + fd.write(b"content2") + + result = dvc.plots.show(revs=["v1", "workspace"]) + assert result["v1"]["data"]["plot.jpg"]["data"] == b"content" + assert result["workspace"]["data"]["plot.jpg"]["data"] == b"content2" diff --git a/tests/func/test_live.py b/tests/func/test_live.py index 102a9adbe9..d259c11d42 100644 --- a/tests/func/test_live.py +++ b/tests/func/test_live.py @@ -6,6 +6,7 @@ from funcy import first from dvc import stage as stage_module +from dvc.render.utils import get_files LIVE_SCRIPT = dedent( """ @@ -122,9 +123,10 @@ def test_live_provides_metrics(tmp_dir, dvc, live_stage): } assert (tmp_dir / "logs").is_dir() - plots = dvc.plots.show() - assert os.path.join("logs", "accuracy.tsv") in plots - assert os.path.join("logs", "loss.tsv") in plots + plots_data = dvc.plots.show() + files = get_files(plots_data) + assert os.path.join("logs", "accuracy.tsv") in files + assert os.path.join("logs", "loss.tsv") in files def test_live_provides_no_metrics(tmp_dir, dvc, live_stage): @@ -134,9 +136,10 @@ def test_live_provides_no_metrics(tmp_dir, dvc, live_stage): assert dvc.metrics.show() == {"": {}} assert (tmp_dir / "logs").is_dir() - plots = dvc.plots.show() - assert os.path.join("logs", "accuracy.tsv") in plots - assert os.path.join("logs", "loss.tsv") in plots + plots_data = dvc.plots.show() + files = get_files(plots_data) + assert os.path.join("logs", "accuracy.tsv") in files + assert os.path.join("logs", "loss.tsv") in files @pytest.mark.parametrize("typ", ("live", "live_no_cache")) @@ -156,7 +159,7 @@ def test_experiments_track_summary(tmp_dir, scm, dvc, live_stage, typ): def test_live_html(tmp_dir, dvc, live_stage, html): live_stage(html=html, live="logs") - assert (tmp_dir / "logs.html").is_file() == html + assert (tmp_dir / "logs_dvc_plots" / "index.html").is_file() == html @pytest.fixture diff --git a/tests/unit/command/test_plots.py b/tests/unit/command/test_plots.py index cff6b3fc50..0387f90f71 100644 --- a/tests/unit/command/test_plots.py +++ b/tests/unit/command/test_plots.py @@ -7,7 +7,19 @@ from dvc.command.plots import CmdPlotsDiff, CmdPlotsShow -def test_plots_diff(dvc, mocker): +@pytest.fixture +def plots_data(): + yield { + "revision": { + "data": { + "plot.csv": {"data": [{"val": 1}, {"val": 2}], "props": {}}, + "other.jpg": {"data": b"content"}, + } + } + } + + +def test_plots_diff(dvc, mocker, plots_data): cli_args = parse_args( [ "plots", @@ -38,8 +50,9 @@ def test_plots_diff(dvc, mocker): assert cli_args.func == CmdPlotsDiff cmd = cli_args.func(cli_args) - m = mocker.patch( - "dvc.repo.plots.diff.diff", return_value={"datafile": "filledtemplate"} + m = mocker.patch("dvc.repo.plots.diff.diff", return_value=plots_data) + render_mock = mocker.patch( + "dvc.command.plots.render", return_value="html_path" ) assert cmd.run() == 0 @@ -58,9 +71,10 @@ def test_plots_diff(dvc, mocker): }, experiment=True, ) + render_mock.assert_not_called() -def test_plots_show_vega(dvc, mocker): +def test_plots_show_vega(dvc, mocker, plots_data): cli_args = parse_args( [ "plots", @@ -80,7 +94,10 @@ def test_plots_show_vega(dvc, mocker): m = mocker.patch( "dvc.repo.plots.Plots.show", - return_value={"datafile": "filledtemplate"}, + return_value=plots_data, + ) + render_mock = mocker.patch( + "dvc.command.plots.render", return_value="html_path" ) assert cmd.run() == 0 @@ -89,9 +106,10 @@ def test_plots_show_vega(dvc, mocker): targets=["datafile"], props={"template": "template", "header": False}, ) + render_mock.assert_not_called() -def test_plots_diff_vega(dvc, mocker, capsys): +def test_plots_diff_vega(dvc, mocker, capsys, plots_data): cli_args = parse_args( [ "plots", @@ -100,72 +118,87 @@ def test_plots_diff_vega(dvc, mocker, capsys): "HEAD~1", "--show-vega", "--targets", - "plots.csv", + "plot.csv", ] ) cmd = cli_args.func(cli_args) + mocker.patch("dvc.repo.plots.diff.diff", return_value=plots_data) mocker.patch( - "dvc.repo.plots.diff.diff", return_value={"plots.csv": "plothtml"} + "dvc.command.plots.find_vega", return_value="vega_json_content" + ) + render_mock = mocker.patch( + "dvc.command.plots.render", return_value="html_path" ) assert cmd.run() == 0 out, _ = capsys.readouterr() - assert "plothtml" in out + + assert "vega_json_content" in out + render_mock.assert_not_called() -def test_plots_diff_open(tmp_dir, dvc, mocker, capsys): +def test_plots_diff_open(tmp_dir, dvc, mocker, capsys, plots_data): mocked_open = mocker.patch("webbrowser.open", return_value=True) - cli_args = parse_args(["plots", "diff", "--targets", "datafile", "--open"]) - cmd = cli_args.func(cli_args) - mocker.patch( - "dvc.repo.plots.diff.diff", return_value={"datafile": "filledtemplate"} + cli_args = parse_args( + ["plots", "diff", "--targets", "plots.csv", "--open"] ) + cmd = cli_args.func(cli_args) + mocker.patch("dvc.repo.plots.diff.diff", return_value=plots_data) - assert cmd.run() == 0 - mocked_open.assert_called_once_with("plots.html") + index_path = tmp_dir / "dvc_plots" / "index.html" + mocker.patch("dvc.command.plots.render", return_value=index_path) - expected_url = posixpath.join(tmp_dir.as_uri(), "plots.html") + assert cmd.run() == 0 + mocked_open.assert_called_once_with(index_path) out, _ = capsys.readouterr() - assert expected_url in out + assert index_path.as_uri() in out -def test_plots_diff_open_failed(tmp_dir, dvc, mocker, capsys): +def test_plots_diff_open_failed(tmp_dir, dvc, mocker, capsys, plots_data): mocked_open = mocker.patch("webbrowser.open", return_value=False) - cli_args = parse_args(["plots", "diff", "--targets", "datafile", "--open"]) + cli_args = parse_args( + ["plots", "diff", "--targets", "plots.csv", "--open"] + ) cmd = cli_args.func(cli_args) mocker.patch( - "dvc.repo.plots.diff.diff", return_value={"datafile": "filledtemplate"} + "dvc.repo.plots.diff.diff", return_value={"datafile": plots_data} ) assert cmd.run() == 1 - mocked_open.assert_called_once_with("plots.html") + expected_url = tmp_dir / "dvc_plots" / "index.html" + mocked_open.assert_called_once_with(expected_url) error_message = "Failed to open. Please try opening it manually." - expected_url = posixpath.join(tmp_dir.as_uri(), "plots.html") out, err = capsys.readouterr() - assert expected_url in out + assert expected_url.as_uri() in out assert error_message in err @pytest.mark.parametrize( "output, expected_url_path", [ - ("plots file with spaces.html", "plots%20file%20with%20spaces.html"), - (os.path.join("dir", "..", "plots.html"), "plots.html"), + ( + "plots file with spaces", + posixpath.join("plots%20file%20with%20spaces", "index.html"), + ), + ( + os.path.join("dir", "..", "plots"), + posixpath.join("plots", "index.html"), + ), ], ids=["quote", "resolve"], ) def test_plots_path_is_quoted_and_resolved_properly( - tmp_dir, dvc, mocker, capsys, output, expected_url_path + tmp_dir, dvc, mocker, capsys, output, expected_url_path, plots_data ): cli_args = parse_args( ["plots", "diff", "--targets", "datafile", "--out", output] ) cmd = cli_args.func(cli_args) mocker.patch( - "dvc.repo.plots.diff.diff", return_value={"datafile": "filledtemplate"} + "dvc.repo.plots.diff.diff", return_value={"datafile": plots_data} ) assert cmd.run() == 0 @@ -173,3 +206,29 @@ def test_plots_path_is_quoted_and_resolved_properly( out, _ = capsys.readouterr() assert expected_url in out + + +@pytest.mark.parametrize( + "output", ("some_out", os.path.join("to", "subdir"), None) +) +def test_should_call_render(tmp_dir, mocker, capsys, plots_data, output): + cli_args = parse_args( + ["plots", "diff", "--targets", "plots.csv", "--out", output] + ) + cmd = cli_args.func(cli_args) + mocker.patch("dvc.repo.plots.diff.diff", return_value=plots_data) + + output = output or "dvc_plots" + index_path = tmp_dir / output / "index.html" + render_mock = mocker.patch( + "dvc.command.plots.render", return_value=index_path + ) + + assert cmd.run() == 0 + + out, _ = capsys.readouterr() + assert index_path.as_uri() in out + + render_mock.assert_called_once_with( + cmd.repo, plots_data, path=tmp_dir / output, html_template_path=None + ) diff --git a/tests/unit/render/__init__.py b/tests/unit/render/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/func/utils/test_html.py b/tests/unit/render/test_html.py similarity index 93% rename from tests/func/utils/test_html.py rename to tests/unit/render/test_html.py index 37d9266bc8..56ebe63232 100644 --- a/tests/func/utils/test_html.py +++ b/tests/unit/render/test_html.py @@ -1,6 +1,6 @@ import pytest -from dvc.utils.html import HTML, PAGE_HTML, MissingPlaceholderError +from dvc.render.html import HTML, PAGE_HTML, MissingPlaceholderError CUSTOM_PAGE_HTML = """ diff --git a/tests/unit/render/test_image.py b/tests/unit/render/test_image.py new file mode 100644 index 0000000000..dab190d7f6 --- /dev/null +++ b/tests/unit/render/test_image.py @@ -0,0 +1,46 @@ +import os + +import pytest + +from dvc.render.image import ImageRenderer + + +@pytest.mark.parametrize( + "extension, matches", + ( + (".csv", False), + (".json", False), + (".tsv", False), + (".yaml", False), + (".jpg", True), + (".gif", True), + (".jpeg", True), + (".png", True), + ), +) +def test_matches(extension, matches): + filename = "file" + extension + data = { + "HEAD": {"data": {filename: {}}}, + "v1": {"data": {filename: {}}}, + } + assert ImageRenderer.matches(data) == matches + + +def test_render(tmp_dir): + data = {"workspace": {"data": {"file.jpg": {"data": b"content"}}}} + + page_dir = os.path.join("some", "path") + html = ImageRenderer(data).generate_html(page_dir) + + assert (tmp_dir / page_dir).is_dir() + image_file = tmp_dir / page_dir / "static" / "workspace_file.jpg" + assert image_file.is_file() + + with open(image_file, "rb") as fobj: + assert fobj.read() == b"content" + + assert "

file.jpg

" in html + assert ( + f'' in html + ) diff --git a/tests/unit/render/test_render.py b/tests/unit/render/test_render.py new file mode 100644 index 0000000000..55496b6f3e --- /dev/null +++ b/tests/unit/render/test_render.py @@ -0,0 +1,67 @@ +import os + +from dvc.render.utils import find_vega, render + + +def assert_website_has_image(page_path, revision, filename, image_content): + index_path = page_path / "index.html" + assert index_path.is_file() + index_content = index_path.read_text() + + resources_filename = f"{revision}_{filename.replace(os.sep, '_')}" + image_path = page_path / "static" / resources_filename + assert image_path.is_file() + + img_html = f'' + assert img_html in index_content + with open(image_path, "rb") as fobj: + assert fobj.read() == image_content + + +def test_render(tmp_dir, dvc): + data = { + "HEAD": { + "data": { + "file.json": { + "data": [{"y": 5}, {"y": 6}], + "props": {"fields": {"y"}}, + }, + os.path.join("sub", "other_file.jpg"): {"data": b"content"}, + } + }, + "v2": { + "data": { + "file.json": { + "data": [{"y": 3}, {"y": 5}], + "props": {"fields": {"y"}}, + }, + "other_file.jpg": {"data": b"content2"}, + } + }, + "v1": { + "data": { + "some.csv": { + "data": [{"y": 2}, {"y": 3}], + "props": {"fields": {"y"}}, + }, + "another.gif": {"data": b"content3"}, + } + }, + } + + render(dvc, data, path=os.path.join("results", "dir")) + page_path = tmp_dir / "results" / "dir" + index_path = page_path / "index.html" + + assert index_path.is_file() + assert_website_has_image( + page_path, "HEAD", os.path.join("sub", "other_file.jpg"), b"content" + ) + assert_website_has_image(page_path, "v2", "other_file.jpg", b"content2") + assert_website_has_image(page_path, "v1", "another.gif", b"content3") + + index_content = index_path.read_text() + file_vega = find_vega(dvc, data, "file.json") + some_vega = find_vega(dvc, data, "some.csv") + assert file_vega in index_content.strip() + assert some_vega in index_content.strip() diff --git a/tests/unit/render/test_vega.py b/tests/unit/render/test_vega.py new file mode 100644 index 0000000000..16700c5fd4 --- /dev/null +++ b/tests/unit/render/test_vega.py @@ -0,0 +1,436 @@ +import json +import os +from collections import OrderedDict + +import pytest + +from dvc.render.utils import find_vega, group_by_filename +from dvc.render.vega import ( + INDEX_FIELD, + REVISION_FIELD, + VegaRenderer, + _find_data, + _lists, +) +from dvc.repo.plots.template import ( + BadTemplateError, + NoFieldInDataError, + TemplateNotFoundError, +) + + +@pytest.mark.parametrize( + "dictionary, expected_result", + [ + ({}, []), + ({"x": ["a", "b", "c"]}, [["a", "b", "c"]]), + ( + OrderedDict([("x", {"y": ["a", "b"]}), ("z", {"w": ["c", "d"]})]), + [["a", "b"], ["c", "d"]], + ), + ], +) +def test_finding_lists(dictionary, expected_result): + result = _lists(dictionary) + + assert list(result) == expected_result + + +def test_find_data_in_dict(tmp_dir): + m1 = [{"accuracy": 1, "loss": 2}, {"accuracy": 3, "loss": 4}] + m2 = [{"x": 1}, {"x": 2}] + dmetric = OrderedDict([("t1", m1), ("t2", m2)]) + + assert _find_data(dmetric) == m1 + assert _find_data(dmetric, fields={"x"}) == m2 + + +def test_group_plots_data(): + error = FileNotFoundError() + data = { + "v2": { + "data": { + "file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}, + "other_file.jpg": {"data": "content"}, + } + }, + "v1": { + "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} + }, + "workspace": { + "data": { + "file.json": {"error": error, "props": {}}, + "other_file.jpg": {"data": "content2"}, + } + }, + } + + results = group_by_filename(data) + assert { + "v2": { + "data": { + "file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}, + } + }, + "v1": { + "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} + }, + "workspace": { + "data": { + "file.json": {"error": error, "props": {}}, + } + }, + } in results + assert { + "v2": { + "data": { + "other_file.jpg": {"data": "content"}, + } + }, + "workspace": { + "data": { + "other_file.jpg": {"data": "content2"}, + } + }, + } in results + + +def test_one_column(tmp_dir, scm, dvc): + props = { + "x_label": "x_title", + "y_label": "y_title", + "title": "mytitle", + } + data = { + "workspace": { + "data": { + "file.json": {"data": [{"val": 2}, {"val": 3}], "props": props} + } + } + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["title"] == "mytitle" + assert plot_content["data"]["values"] == [ + {"val": 2, INDEX_FIELD: 0, REVISION_FIELD: "workspace"}, + {"val": 3, INDEX_FIELD: 1, REVISION_FIELD: "workspace"}, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "val" + assert plot_content["encoding"]["x"]["title"] == "x_title" + assert plot_content["encoding"]["y"]["title"] == "y_title" + + +def test_multiple_columns(tmp_dir, scm, dvc): + metric = [ + OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), + OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), + ] + + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": {}}}} + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + { + "val": 2, + INDEX_FIELD: 0, + REVISION_FIELD: "workspace", + "first_val": 100, + "second_val": 100, + }, + { + "val": 3, + INDEX_FIELD: 1, + REVISION_FIELD: "workspace", + "first_val": 200, + "second_val": 300, + }, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "val" + + +def test_choose_axes(tmp_dir, scm, dvc): + metric = [ + OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), + OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), + ] + + props = {"x": "first_val", "y": "second_val"} + + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + { + "val": 2, + REVISION_FIELD: "workspace", + "first_val": 100, + "second_val": 100, + }, + { + "val": 3, + REVISION_FIELD: "workspace", + "first_val": 200, + "second_val": 300, + }, + ] + assert plot_content["encoding"]["x"]["field"] == "first_val" + assert plot_content["encoding"]["y"]["field"] == "second_val" + + +def test_confusion(tmp_dir, dvc): + confusion_matrix = [ + {"predicted": "B", "actual": "A"}, + {"predicted": "A", "actual": "A"}, + ] + props = {"template": "confusion", "x": "predicted", "y": "actual"} + + data = { + "workspace": { + "data": {"file.json": {"data": confusion_matrix, "props": props}} + } + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + {"predicted": "B", "actual": "A", REVISION_FIELD: "workspace"}, + {"predicted": "A", "actual": "A", REVISION_FIELD: "workspace"}, + ] + assert plot_content["spec"]["transform"][0]["groupby"] == [ + "actual", + "predicted", + ] + assert plot_content["spec"]["encoding"]["x"]["field"] == "predicted" + assert plot_content["spec"]["encoding"]["y"]["field"] == "actual" + + +def test_multiple_revs_default(tmp_dir, scm, dvc): + metric_1 = [{"y": 2}, {"y": 3}] + metric_2 = [{"y": 3}, {"y": 5}] + metric_3 = [{"y": 5}, {"y": 6}] + + data = { + "HEAD": { + "data": { + "file.json": {"data": metric_3, "props": {"fields": {"y"}}} + } + }, + "v2": { + "data": { + "file.json": {"data": metric_2, "props": {"fields": {"y"}}} + } + }, + "v1": { + "data": { + "file.json": {"data": metric_1, "props": {"fields": {"y"}}} + } + }, + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + {"y": 5, INDEX_FIELD: 0, REVISION_FIELD: "HEAD"}, + {"y": 6, INDEX_FIELD: 1, REVISION_FIELD: "HEAD"}, + {"y": 3, INDEX_FIELD: 0, REVISION_FIELD: "v2"}, + {"y": 5, INDEX_FIELD: 1, REVISION_FIELD: "v2"}, + {"y": 2, INDEX_FIELD: 0, REVISION_FIELD: "v1"}, + {"y": 3, INDEX_FIELD: 1, REVISION_FIELD: "v1"}, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "y" + + +def test_metric_missing(tmp_dir, scm, dvc, caplog): + + metric = [{"y": 2}, {"y": 3}] + data = { + "v2": {"data": {"file.json": {"data": metric, "props": {}}}}, + "workspace": { + "data": {"file.json": {"error": FileNotFoundError(), "props": {}}} + }, + } + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + {"y": 2, INDEX_FIELD: 0, REVISION_FIELD: "v2"}, + {"y": 3, INDEX_FIELD: 1, REVISION_FIELD: "v2"}, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "y" + + +def test_custom_template(tmp_dir, scm, dvc, custom_template): + metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] + props = {"template": os.fspath(custom_template), "x": "a", "y": "b"} + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + {"a": 1, "b": 2, REVISION_FIELD: "workspace"}, + {"a": 2, "b": 3, REVISION_FIELD: "workspace"}, + ] + assert plot_content["encoding"]["x"]["field"] == "a" + assert plot_content["encoding"]["y"]["field"] == "b" + + +def test_raise_on_no_template(tmp_dir, dvc): + metric = [{"val": 2}, {"val": 3}] + props = {"template": "non_existing_template.json"} + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + + with pytest.raises(TemplateNotFoundError): + VegaRenderer(data, dvc.plots.templates).get_vega() + + +def test_bad_template(tmp_dir, dvc): + metric = [{"val": 2}, {"val": 3}] + tmp_dir.gen("template.json", json.dumps({"a": "b", "c": "d"})) + props = {"template": "template.json"} + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + + with pytest.raises(BadTemplateError): + VegaRenderer(data, dvc.plots.templates).get_vega() + + +def test_plot_choose_columns(tmp_dir, scm, dvc, custom_template): + metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] + props = { + "template": os.fspath(custom_template), + "fields": {"b", "c"}, + "x": "b", + "y": "c", + } + data = { + "workspace": {"data": {"file.json": {"data": metric, "props": props}}} + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + {"b": 2, "c": 3, REVISION_FIELD: "workspace"}, + {"b": 3, "c": 4, REVISION_FIELD: "workspace"}, + ] + assert plot_content["encoding"]["x"]["field"] == "b" + assert plot_content["encoding"]["y"]["field"] == "c" + + +def test_plot_default_choose_column(tmp_dir, scm, dvc): + metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] + data = { + "workspace": { + "data": {"file.json": {"data": metric, "props": {"fields": {"b"}}}} + } + } + + plot_string = VegaRenderer(data, dvc.plots.templates).get_vega() + plot_content = json.loads(plot_string) + + assert plot_content["data"]["values"] == [ + {INDEX_FIELD: 0, "b": 2, REVISION_FIELD: "workspace"}, + {INDEX_FIELD: 1, "b": 3, REVISION_FIELD: "workspace"}, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "b" + + +def test_raise_on_wrong_field(tmp_dir, scm, dvc): + metric = [{"val": 2}, {"val": 3}] + data = { + "workspace": { + "data": {"file.json": {"data": metric, "props": {"x": "no_val"}}} + } + } + + with pytest.raises(NoFieldInDataError): + VegaRenderer(data, dvc.plots.templates).get_vega() + + +@pytest.mark.parametrize( + "extension, matches", + ( + (".csv", True), + (".json", True), + (".tsv", True), + (".yaml", True), + (".jpg", False), + (".gif", False), + (".jpeg", False), + (".png", False), + ), +) +def test_matches(extension, matches): + filename = "file" + extension + data = { + "HEAD": {"data": {filename: {}}}, + "v1": {"data": {filename: {}}}, + } + assert VegaRenderer.matches(data) == matches + + +def test_find_vega(tmp_dir, dvc): + data = { + "HEAD": { + "data": { + "file.json": { + "data": [{"y": 5}, {"y": 6}], + "props": {"fields": {"y"}}, + }, + "other_file.jpg": {"data": b"content"}, + } + }, + "v2": { + "data": { + "file.json": { + "data": [{"y": 3}, {"y": 5}], + "props": {"fields": {"y"}}, + }, + "other_file.jpg": {"data": b"content2"}, + } + }, + "v1": { + "data": { + "file.json": { + "data": [{"y": 2}, {"y": 3}], + "props": {"fields": {"y"}}, + }, + "another.gif": {"data": b"content2"}, + } + }, + } + + plot_content = json.loads(find_vega(dvc, data, "file.json")) + + assert plot_content["data"]["values"] == [ + {"y": 5, INDEX_FIELD: 0, REVISION_FIELD: "HEAD"}, + {"y": 6, INDEX_FIELD: 1, REVISION_FIELD: "HEAD"}, + {"y": 3, INDEX_FIELD: 0, REVISION_FIELD: "v2"}, + {"y": 5, INDEX_FIELD: 1, REVISION_FIELD: "v2"}, + {"y": 2, INDEX_FIELD: 0, REVISION_FIELD: "v1"}, + {"y": 3, INDEX_FIELD: 1, REVISION_FIELD: "v1"}, + ] + assert plot_content["encoding"]["x"]["field"] == INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "y" diff --git a/tests/unit/repo/plots/test_data.py b/tests/unit/repo/plots/test_data.py deleted file mode 100644 index 93f2975234..0000000000 --- a/tests/unit/repo/plots/test_data.py +++ /dev/null @@ -1,44 +0,0 @@ -from collections import OrderedDict -from typing import Dict, List - -import pytest - -from dvc.repo.plots.data import DictData, _lists - - -@pytest.mark.parametrize( - "dictionary, expected_result", - [ - ({}, []), - ({"x": ["a", "b", "c"]}, [["a", "b", "c"]]), - ( - OrderedDict([("x", {"y": ["a", "b"]}), ("z", {"w": ["c", "d"]})]), - [["a", "b"], ["c", "d"]], - ), - ], -) -def test_finding_lists(dictionary, expected_result): - result = _lists(dictionary) - - assert list(result) == expected_result - - -def test_find_data_in_dict(tmp_dir): - m1 = [{"accuracy": 1, "loss": 2}, {"accuracy": 3, "loss": 4}] - m2 = [{"x": 1}, {"x": 2}] - dmetric = OrderedDict([("t1", m1), ("t2", m2)]) - - plot_data = DictData("-", "revision", dmetric) - - def points_with(datapoints: List, additional_info: Dict): - for datapoint in datapoints: - datapoint.update(additional_info) - - return datapoints - - assert list(map(dict, plot_data.to_datapoints())) == points_with( - m1, {"rev": "revision"} - ) - assert list( - map(dict, plot_data.to_datapoints(fields={"x"})) - ) == points_with(m2, {"rev": "revision"}) diff --git a/tests/unit/test_plots.py b/tests/unit/test_plots.py index b0e8f2049f..49ec95f58e 100644 --- a/tests/unit/test_plots.py +++ b/tests/unit/test_plots.py @@ -1,6 +1,8 @@ import json import os +from dvc.render.utils import get_files + def test_plots_order(tmp_dir, dvc): tmp_dir.gen( @@ -26,9 +28,9 @@ def test_plots_order(tmp_dir, dvc): name="stage2", ) - assert list(dvc.plots.show()) == [ + assert get_files(dvc.plots.show()) == { "p.json", os.path.join("sub", "p4.json"), "p1.json", os.path.join("sub", "p3.json"), - ] + }