Spans: capture code snippets (#7930)

dask · Jun 21, 2023 · 3de722a · 3de722a
1 parent 49437c2
commit 3de722a
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 5 deletions.
diff --git a/distributed/scheduler.py b/distributed/scheduler.py
@@ -842,6 +842,8 @@ class ErredTask:
 class Computation:
     """Collection tracking a single compute or persist call
 
+    DEPRECATED: please use spans instead
+
     See also
     --------
     TaskPrefix
@@ -851,7 +853,7 @@ class Computation:
 
     start: float
     groups: set[TaskGroup]
-    code: SortedSet[SourceCode]
+    code: SortedSet[tuple[SourceCode, ...]]
     id: uuid.UUID
     annotations: dict
 
@@ -4320,7 +4322,7 @@ def update_graph(
         user_priority: int | dict[str, int] = 0,
         actors: bool | list[str] | None = None,
         fifo_timeout: float = 0.0,
-        code: tuple[str] | None = None,
+        code: tuple[SourceCode, ...] = (),
         annotations: dict | None = None,
         stimulus_id: str | None = None,
     ) -> None:
@@ -4454,7 +4456,7 @@ def update_graph(
             # _generate_taskstates is not the only thing that calls new_task(). A
             # TaskState may have also been created by client_desires_keys or scatter,
             # and only later gained a run_spec.
-            spans_ext.observe_tasks(runnable)
+            spans_ext.observe_tasks(runnable, code=code)
             # TaskGroup.span_id could be completely different from the one in the
             # original annotations, so it has been dropped. Drop it here as well in
             # order not to confuse SchedulerPlugin authors.

diff --git a/distributed/spans.py b/distributed/spans.py
@@ -14,6 +14,7 @@
 
 if TYPE_CHECKING:
     from distributed import Scheduler, Worker
+    from distributed.client import SourceCode
     from distributed.scheduler import TaskGroup, TaskState, TaskStateState, WorkerState
 
 
@@ -125,6 +126,10 @@ class Span:
     #: stop
     enqueued: float
 
+    #: Source code snippets, if it was sent by the client.
+    #: We're using a dict without values as an insertion-sorted set.
+    _code: dict[tuple[SourceCode, ...], None]
+
     _cumulative_worker_metrics: defaultdict[tuple[Hashable, ...], float]
 
     # Support for weakrefs to a class with __slots__
@@ -139,6 +144,7 @@ def __init__(self, name: tuple[str, ...], id_: str, parent: Span | None):
         self.enqueued = time()
         self.children = []
         self.groups = set()
+        self._code = {}
         self._cumulative_worker_metrics = defaultdict(float)
 
     def __repr__(self) -> str:
@@ -258,6 +264,17 @@ def nbytes_total(self) -> int:
         """
         return sum(tg.nbytes_total for tg in self.traverse_groups())
 
+    @property
+    def code(self) -> list[tuple[SourceCode, ...]]:
+        """Code snippets, sent by the client on compute(), persist(), and submit().
+
+        Only populated if ``distributed.diagnostics.computations.nframes`` is non-zero.
+        """
+        # Deduplicate, but preserve order
+        return list(
+            dict.fromkeys(sc for child in self.traverse_spans() for sc in child._code)
+        )
+
     @property
     def cumulative_worker_metrics(self) -> dict[tuple[Hashable, ...], float]:
         """Replica of Worker.digests_total and Scheduler.cumulative_worker_metrics, but
@@ -314,7 +331,9 @@ def __init__(self, scheduler: Scheduler):
         self.spans_search_by_name = defaultdict(list)
         self.spans_search_by_tag = defaultdict(list)
 
-    def observe_tasks(self, tss: Iterable[TaskState]) -> None:
+    def observe_tasks(
+        self, tss: Iterable[TaskState], code: tuple[SourceCode, ...]
+    ) -> None:
         """Acknowledge the existence of runnable tasks on the scheduler. These may
         either be new tasks, tasks that were previously unrunnable, or tasks that were
         already fed into this method already.
@@ -329,7 +348,9 @@ def observe_tasks(self, tss: Iterable[TaskState]) -> None:
             # different spans. If that happens, arbitrarily force everything onto the
             # span of the earliest encountered TaskGroup.
             tg = ts.group
-            if not tg.span_id:
+            if tg.span_id:
+                span = self.spans[tg.span_id]
+            else:
                 ann = ts.annotations.get("span")
                 if ann:
                     span = self._ensure_span(ann["name"], ann["ids"])
@@ -341,6 +362,9 @@ def observe_tasks(self, tss: Iterable[TaskState]) -> None:
                 tg.span_id = span.id
                 span.groups.add(tg)
 
+            if code:
+                span._code[code] = None
+
             # The span may be completely different from the one referenced by the
             # annotation, due to the TaskGroup collision issue explained above.
             # Remove the annotation to avoid confusion, and instead rely on

diff --git a/distributed/tests/test_spans.py b/distributed/tests/test_spans.py
@@ -582,3 +582,34 @@ async def test_merge_by_tags_metrics(c, s, a, b):
     assert ext.merge_by_tags("foo").enqueued == min(
         ext.spans[foo1].enqueued, ext.spans[foo2].enqueued
     )
+
+
+@gen_cluster(client=True, config={"distributed.diagnostics.computations.nframes": 10})
+async def test_code(c, s, a, b):
+    with span("foo") as foo:
+        for subspan in ("bar", "baz"):
+            with span(subspan):
+                for key in ("x", "y"):
+                    # Call update-graph four times in two different subspans,
+                    # all with the same code stack
+                    await c.submit(inc, 1, key=key)
+
+        await c.submit(inc, 2, key="z")
+        await c.submit(inc, 3, key="w")
+
+    code = s.extensions["spans"].spans[foo].code
+
+    # Identical code stacks have been deduplicated
+    assert len(code) == 3
+    assert "def _run(self)" in code[0][-2].code
+    assert "inc, 1" in code[0][-1].code
+    assert code[0][-1].lineno_relative == 10
+    assert code[1][-1].lineno_relative == 11
+    assert code[2][-1].lineno_relative == 8
+
+
+@gen_cluster(client=True)
+async def test_no_code_by_default(c, s, a, b):
+    with span("foo") as foo:
+        await c.submit(inc, 1, key="x")
+    assert s.extensions["spans"].spans[foo].code == []