Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Visualize task graphs using ipycytoscape #9091

Merged
merged 14 commits into from May 24, 2022
1 change: 1 addition & 0 deletions continuous_integration/environment-3.10.yaml
Expand Up @@ -49,6 +49,7 @@ dependencies:
- cytoolz
- distributed
- ipython
- ipycytoscape
- lz4
- numba
- partd
Expand Down
1 change: 1 addition & 0 deletions continuous_integration/environment-3.8.yaml
Expand Up @@ -48,6 +48,7 @@ dependencies:
- cytoolz
- distributed
- ipython
- ipycytoscape
- lz4
- numba
- partd
Expand Down
1 change: 1 addition & 0 deletions continuous_integration/environment-3.9.yaml
Expand Up @@ -49,6 +49,7 @@ dependencies:
- cytoolz
- distributed
- ipython
- ipycytoscape
- lz4
- numba
- partd
Expand Down
48 changes: 42 additions & 6 deletions dask/base.py
Expand Up @@ -16,6 +16,7 @@
from functools import partial
from numbers import Integral, Number
from operator import getitem
from typing import Literal

from packaging.version import parse as parse_version
from tlz import curry, groupby, identity, merge
Expand Down Expand Up @@ -601,7 +602,13 @@ def compute(


def visualize(
*args, filename="mydask", traverse=True, optimize_graph=False, maxval=None, **kwargs
*args,
filename="mydask",
traverse=True,
optimize_graph=False,
maxval=None,
engine: Literal["cytoscape", "ipycytoscape", "graphviz"] | None = None,
**kwargs,
):
"""
Visualize several dask graphs simultaneously.
Expand Down Expand Up @@ -655,8 +662,12 @@ def visualize(
verbose : bool, optional
Whether to label output and input boxes even if the data aren't chunked.
Beware: these labels can get very long. Default is False.
engine : {"graphviz", "ipycytoscape", "cytoscape"}, optional.
The visualization engine to use. If not provided, this checks the dask config
value "visualization.engine". If that is not set, it tries to import ``graphviz``
and ``ipycytoscape``, using the first one to succeed.
**kwargs
Additional keyword arguments to forward to ``to_graphviz``.
Additional keyword arguments to forward to the visualization engine.

Examples
--------
Expand All @@ -678,8 +689,6 @@ def visualize(

https://docs.dask.org/en/latest/optimize.html
"""
from dask.dot import dot_graph

args, _ = unpack_collections(*args, traverse=traverse)

dsk = dict(collections_to_dsk(args, optimize_graph=optimize_graph))
Expand Down Expand Up @@ -707,7 +716,7 @@ def visualize(
try:
cmap = kwargs.pop("cmap")
except KeyError:
cmap = plt.cm.RdBu
cmap = plt.cm.plasma
ian-r-rose marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(cmap, str):
import matplotlib.pyplot as plt

Expand Down Expand Up @@ -765,7 +774,34 @@ def label(x):
elif color:
raise NotImplementedError("Unknown value color=%s" % color)

return dot_graph(dsk, filename=filename, **kwargs)
# Determine which engine to dispatch to, first checking the kwarg, then config,
# then whichever of graphviz or ipycytoscape are installed, in that order.
engine = engine or config.get("visualization.engine", None)

if not engine:
try:
import graphviz # noqa: F401

engine = "graphviz"
except ImportError:
pass
try:
import ipycytoscape # noqa: F401

engine = "cytoscape"
except ImportError:
pass
ian-r-rose marked this conversation as resolved.
Show resolved Hide resolved

if engine == "graphviz":
from dask.dot import dot_graph

return dot_graph(dsk, filename=filename, **kwargs)
elif engine in ("cytoscape", "ipycytoscape"):
from dask.dot import cytoscape_graph

return cytoscape_graph(dsk, filename=filename, **kwargs)
else:
raise ValueError(f"Visualizer {engine} not recognized")
ian-r-rose marked this conversation as resolved.
Show resolved Hide resolved


def persist(*args, traverse=True, optimize_graph=True, scheduler=None, **kwargs):
Expand Down
12 changes: 12 additions & 0 deletions dask/dask-schema.yaml
Expand Up @@ -10,6 +10,18 @@ properties:
When the value is "null" (default), dask will create a directory from
where dask was launched: `cwd/dask-worker-space`

visualization:
type: object
properties:
engine:
type:
- string
- 'null'
description: |
Visualization engine to use when calling ``.visualize()`` on a Dask collection.
Currently supports ``'graphviz'``, ``'ipycytoscape'``, and ``'cytoscape'``
(alias for ``'ipycytoscape'``)

tokenize:
type: object
properties:
Expand Down
3 changes: 3 additions & 0 deletions dask/dask.yaml
@@ -1,5 +1,8 @@
temporary-directory: null # Directory for local disk like /tmp, /scratch, or /local

visualization:
engine: graphviz # Default visualization engine to use when calling `.visualize()` on a collection

tokenize:
ensure-deterministic: false # If true, tokenize will error instead of falling back to uuids

Expand Down