From 2a5582ad162816f86307268e7e85be7827bf4491 Mon Sep 17 00:00:00 2001 From: Tom White Date: Thu, 23 Oct 2025 10:35:30 +0100 Subject: [PATCH 1/4] Live dashboard implemented using Dash and Cytoscape.js --- cubed/diagnostics/assets/cubed-dashboard.css | 53 ++ cubed/diagnostics/assets/cubed-logo.png | Bin 0 -> 7143 bytes cubed/diagnostics/dash.py | 635 +++++++++++++++++++ pyproject.toml | 2 + 4 files changed, 690 insertions(+) create mode 100644 cubed/diagnostics/assets/cubed-dashboard.css create mode 100644 cubed/diagnostics/assets/cubed-logo.png create mode 100644 cubed/diagnostics/dash.py diff --git a/cubed/diagnostics/assets/cubed-dashboard.css b/cubed/diagnostics/assets/cubed-dashboard.css new file mode 100644 index 00000000..49b5139e --- /dev/null +++ b/cubed/diagnostics/assets/cubed-dashboard.css @@ -0,0 +1,53 @@ +html { + font-family: "Helvetica", "Arial", sans-serif; + font-size: 12px; +} + +body { + margin: 0; + padding: 0; +} + +table { + table-layout: fixed; + width: 95%; + border-collapse: collapse; + white-space: nowrap; +} + +th { + text-align: left; + border-bottom: 1px solid #999999; +} + +td { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +svg { + /* Limit height of array image */ + max-height: 300px; +} + +#info-container { + width: 300px; + height: 100vh; + float: left; + background-color: #f7fafd; +} + +.info-panel { + margin-left: 5px; +} + +#graph-container { + margin-left: 300px; +} + +#cytoscape-controls { + position: absolute; + top: 10px; + right: 10px; +} diff --git a/cubed/diagnostics/assets/cubed-logo.png b/cubed/diagnostics/assets/cubed-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..71743ffb266331c2f79eaf13822184ed8d46b08c GIT binary patch literal 7143 zcmZu$bzB_TvYvrpGiY#kSrS}=ySoJjX95HWHb{WM-3byL5?lsPa0#w~;1U7}4DK2v z=p(y(_uhBkJN>J!I@RA-b*j$kf4ZZzG!$__6d(WqfTOG=r}LoSJO~3O`olen5&7;x z0o`;IWdK#9RND`eRBMRxOLcVs`-6=MKm$Gop#FwDTmT>i0PRm308jx^{$uL^c>pL6 zeu4)vim(Gb`Wxf$pnngG2YF!sr9C}8IqjV6-7KLnYffiZn+HE?F7V$ua#8-_zsW`Y z$7XoI0jLQ0Cm$3ROv%s<0Ko12Ex?o~xRd|@3WuE@#2uopCSvLA$O*M_eqqh&A1KpnxcYARLh`JV?th1{%oggPSCpUux zh>ng<%+>0ph>o1X-}Hx>ID@UbJ4}R&>vuVxe7_6f;t>`W=Hlk%;^pOdKybLho!p^b z98PYGe+Bth964(@OIJIXyPdNW-S4>27tXKU#TgiWC;Dgny-r8iKZ%^&{;ukwJ}xgP zjEjepo9q80x!b+`_wxQ1>-UqtXn$7qS2D4O)`-ZuT0`BPUGpT7UbL;k-6|LOdjAjb8( zq5sj~zjp6W@5A;3;L;;^$}6N$tQ6o zmdW{Z$KlPfB_9KUw z>6A_nxQ*!3~V6{n741zJX$wBzFZ$WQPk{cu5m`XX}IXhoRTwhifX zD}t2Pvg9{66n!V^?Io4qxcgj?hO@OfXZry1@A2gT3=)pXSRRxBD6i4DY@1BgGdcCB)i0AxUdK)XY@_6eWH8FB9Ucaxx-7IIQ&jB(%j4@DOX9t^K5L8wR{;>e+xFZ6a)&-5$GdDV*f z>@d&yb)I*;xV-ivLdg7qv!z0oVAGutj%ii86pwNDjATFrdoCudZ0H%iLb|ss8I+8# zPC)~;4@#M0Emna8=BJjdy8$THjYCXH;s{^qiH0~U(+0!JTfBYVIp~Tu04iQ#qOoAk zTqYG;TS`NY`L%GUS5_|ko~CpEyj2Mc{B|E$AQl3>XQ0CeeubI%R)3R`&~Kkr=T7C* zj!W~ThJ?(eui|8`u#038>8(p@o$@KM1TDSAuWUx`bBPCpAF=HlT~1t&bdU>A5SZ1w z%#prRBT?a73Bn#%UB{pFNGje;H642$P-WuD#k6V(&(;2>gZKq#1sLmtFSc$b7#Bsf zQ!$Nr=x606%a@w7-)r^=tB0CvQcaBxQ5P>U1D@0XmY*i^--_CdOB%JcszVp;j$s6i zXJ?c#o`>i+BA@dWSnaZ?Iq92B86$WMqPBQGxPHCBGExcss^`~=yJcTonX{>YrsOL zy}1kyRwc2>6gB0wrT}(;w!hcbOSXxw(&NXW8>;jbCO8cTVM){>5stY<^kR<;-H3K! za+tYaFE+oOY6nE|q#VDp54cK}ZH*z25W2FhxH8dukG1;Id73li&VxWs!%o8&>Sq2K z`*VMB{T%Vj5O7T_3NfkW-g4D^6XU7~>vgj+SBzhJO)c1k01W?@P*w`3=$kMd(z<-+ ze#OFd{~C#U9yUf>O8JT(NT$whuumn)IuJ&35amuIv0BLMwp?&Y*Nq~OXJ^P;(Nx|t zSGC0weo#LbWpbb_9)Ej%F^C4n)vDhr)iJz;2dnkywlUGDbZWO_RDaQ8p{+;+=qunm zEqV_`lVXuhsMNAdb2PVCOsqL~rltc)yHM?xw~bTl=}F~Z$%z)*_Ato97Un!YEnUnM z;dd5=SO0PD;dbQJ4mV`2q>)7~l zHyHP$=AeMiVe8VTo!XkiS`tg2HeP{hwDX)--@3S(A6>vgv1Ge2uvo1b)NUpO*>D!- z1BvCH$jBqOdg)ga$knEdWu}xpWZ)keGe(VoH*^OM_!KV&8PeusP*Jrb<5a4IkT$nW zVx?K{+!cBlbqLM+tobkKmd&eZwK`CtkxF>${9^CJEf2N^@tRRNrQayFg+tBO5G!e- zf@e<6&R=HiweWaL4-<1=6g4-OU!aCCEINsfHp_j+O!}E5;m{*slHz(itW{WbFk=v) zy;^$m&hLfEGsgoT@X9!bT2G6KQIxhxpfoKseHJan6@fZ9j41{whig#9fhcYC+H}=9 z*v7CW=cgyVsk0~7Z}{s+T4|gvi3yUJ&o1jGr2}?pmK++DWSMF`^Li-QTmZIPs=Io#ATLZ#xNodV)xD&K6 zyO6THa>(I&82F4c+6H%dk8Fd-ze@@ZGP|p`^zdc+pRsJ^;{T ze6_5?;V+^pL+hsD;dU=&h$mmNS9q*_W3PtmA3gP{H1<@sM5@an>N%^>Ip5Orf!6Y= zA%vgtl{wJM0@$s)WNf`ylHEJ5&e0F8+8kL6Ghp~iJU1xk$lu zm(Pq89qA?aV8GhbzKi_zsA3rT*@Tv0VGFc=omH&YNHG~Ae?|z&?0CyAq-oR@)=ry! zn5OFa2eCJQpAp(|urCm2#t}bV>;w|C!Q-IJn!N1Ti40Oswfh!&?b)&7H(oOWzUhQ> zHB@?HwEIxM4+-yTqioQJ&|hlWclogOPEnAiJXWcFw18hpMJycJKiFA*Lqu{@P!qp- zs(buN6*}QxfmNP0VAWRCul}4)j>CLF56F8hbHi(wIwqx3Qs@?{#ng5$@s{o5{+^@2 zAr%tS{-yc(*ir%UfEF*c%d<(YN?0C+(dA~?>TSk6-bvaHaJxt4^Ee*v=iclm4RH!j zjw3t5It)UW54wJYL@7hG)fCI*%Nu@nw03mddX_gx?64hf@v;L!UZqv`mSWQjHvhpdKH@tx>mQE&JX4cu#K6QTxdMFqiAzvqJ zVmfG#I&|;Gg}Dc9V68ndcc-%^YU<=vhNkwg_ zbL@))v_n!p_k$z#(r0TFj>>W=@%vfcoaNqtMqcb_WhZ)6eymBlobZqyw;3 zcq5whY_sv^*kw)hDC)@b4sqz;`-bl?==)LGoee9Iy6LDWmieRzV%F(XrNiP>Q2X)@ z<*Y5Dt_h-OyKiyB;?f6sq+i#^?QEmx{S5wEm)A9b{*tNymh&b&^(6Nt#fSiY_Y#J; z>eUem5!4(Syp+=S9b zal}N|6CqC~BR7vRx1tPA`cPy2yAVmUavL_0LYPZ0OD0h?c&t&Cx7!)P8_goZhQ?Qq z6rc+58bl-gcY@xnXN@T?@)7u@bD3ShRXik2;}Hs-%2nBf;#d|D0jYffMm|9sl`7H; zcDr8PRmaPs+9u`~zGO4{7HiY*=AChegc11-PZlp@!w5Q(r*o&KiiiGdzT+o4g{-su zE<{#4PG_>*eh&v`wB>>zF9;Xq={9$6XW8tK5p$f}b3+D)#1$gE_*lIU_VOa(SaKza zt0EMUMlvS-Fy5mgx@?g(rK=cFc5^gt$0|C0&>9_YOR#^dK8+X-T#3;iYe~efC_w!> zEK-V5TjMmQo3dD0Qk5Pp=FMjnrV*89U(_D~v3`d2^~e_n0jsSNu{yKW=9&~ZAN>8& zW$)ki4dt`d)5h;K;lJij>5Ey=lXUPf@dX>Giv7TyVCtLEm0(;+d1|H06VTtux0Z?~ zoYRgDg1R4Iq{@`sYV#iPs8+6ir@(SdwOU2+f?s(}wqIXf ziBo1k&y&h8IhK#3w%}jAzLhgBnVfXqQTNqOxxFJ`s&cfq8p8nx(7R%SNKtG(8(d-1{N1RA?j$Zhj}yue<2Pu< zK|i}$Tg5wUdCTZPUYC!%03XEYBSyWwT|7mdv*Td85xW|TOp_x}#v~o3nv>hm8CvxA z7$pG4^Z6ExyGHAD8CytO22n)1V%DYI#%bYpva-?J$k~u;CpdOz+^Jve8w3O^(ydQN ziVfA+yS<<#Vf-13e*xsG z+%Qdy=}KtJi+*e>8zU>|w>_ur_Wh!L>o))N`bX{H=SZEzstEjTHT|1cLAS%^QIrAf zW)&f0@`uffHp7_5Kjn6eN_?$}y2IjeuB97IwgMj)=DiVN9h-@)&37ypJbNmaw6|-^ z*t6TC5_kQ^=e`nQL8>vJ(gu9S^M2dRZ3b_u?;|_82MuEHEh9={yl>KTm*If0rT93o zW|&*u5tM9-rI()I61zRIy0GOXVai-)u6#(d5`12lQ5tqD?7(WsDaIWpEFZdN#*^H| zg;{9keY2x-cGuXUORkf zKHIW6909OiP2+i~Cd^albfM|f80M7-Ofv2b;M@PKG5O*2^KA{*G)s~H(u+3Wf!`d$ev>j?OPFHD%- z(cIB~e{O#t2{&$DtmI@Ur5;>y3Y0;cXfomJ?`d>D^Bqa*_e`9+l3kYfjExPo!CJDF zue+tm1_l^C|CzZ!a$LPHKeCILeu&nI))$tk@NEl*}TcqPss=UN%za8T$3Mo*Zn?5`#{UQ%Q3v6520LR6W4bAFqf z>^8t731xf>RdKvJ-?UJf?=GBAGhH4*?ac7)dvUA8Zx9>={mfU1pxerIojZyJWeyY^ z%uhtwN>HJp5dkGCvz`!LEx9MB1rg@(><+##6e=r264yDu)D$m# z0`~R(DZf|r@!Yde@D0dTGj6d~^f%@QyoD?@Z&CORA zTB?&7BG5>tJW{%-81MS}`Fi)!o6uE=&=FFl?`B&!9Ei&<&K40NDmsC2C{`0Pnec(8 z8=_*WNNy5V$yl;~^ts5Kt|(pvjVrkfReNiOqgJb|d8-5uB{4l@@3o$y{XmZn`$YQl z$U(GvqXBZBnlFkhb5^EX$Se9Jh?L9Y_ash?-6MH<-b&FS%G1qw9@S=d;-e{fBGmpy z3buNpEFZ^|ia7ZuAI8~np8?<3&jz~7J=)>u3qtsvz`>B9J4p+iT3JzY5!t=L05yT@R z9CFvmVTo8_8!&7$?)+uC;}dR#F7HEw5gRIRG!i_~g^__N=YleCa7!t(AMtTd`Pr8) zE#+b5BH)XcJjid+q2MUl&8H!cvV~c`=oLrlue0JG(XdV>ODRTTY>TjQ5Z>ru?02@QZ0z)1*Ps!o%`pP)%s0i>{qG*GOW(;BKNm_|A+d z+VTc_*Mj8CBtuDlf;VyY$#C?q0@3-j4C^B>dVe>PBd9YX;6IHle<2xxsLwD%^oWla2YiQz>K=5?d}as{gno}7`y($ct4cq zdMU|TwdL%acgvhDWxhuelOs#yL(J5mHP9=PwCqQ8PwurL{@p%JGKg)Jh5NQ(O9JJ- z3&n%BwVUqv1D)WNO=Xs=RbRAz4(Acmb_qFG^1gA~$Uvhas>cJJ zK0R;{+qWL`MmbC0)sakFB~0C$H74INpK31JH1?mj$9S(A$SC`wZX0v@N$$4x;>Y`N z8?Sa(<~v%>wR5sqJ$7;vR<9tT-FTle{u=L{?kJcHb(W_eXv8wT*M0qI8%gAIF5r;m z@lxHJblPG%9(KEN-*=h86RqrLur`SrNjTeTVFtWU0Ix)gf|h!w=ad@3-~bjenNt_5 z{cz*EF$l}M#Lw>}GvoW@EkIu-n#$_-EJH?B7ka4#_i?i3NYwixtNo5*G-uUKObn|4 zyc>-G^9C8h3Qtt$yYG{C7A%{tSm#Hx^~D!CF3zlrK4#DA-Vm6$Px1Kp<)9y0uJK#; zAr#$LQzJG-i{FLcF8>g*r249`oDmwnYlZ7z4o4#w3Qxb{DfY^l=m02=cz>pXxGOEy zz6-<~ao9b4_?pJ nLK%;WCZrPT3c3%Vx|B@Pw{3c>@EY>_- 0: + for n, d in self.dag.nodes(data=True): + if n == data["id"]: + node_type = d.get("type", None) + if node_type == "op": + return op_to_html(d) + elif node_type == "array": + return array_to_html(d, array_display_names, plan) + + return "Click on a node in the graph" + + return app + + def on_compute_start(self, event): + self.dag = event.dag + app = self.create_dash_app(event.plan) + run_dash_in_background_thread(app, *self.dash_args, **self.dash_kwargs) + + self.num_tasks = {} + self.completed_tasks = {} + self.progress = {} + self.running_operations = set() + self.completed_operations = set() + for name, node in visit_nodes(event.dag): + self.num_tasks[name] = node["primitive_op"].num_tasks + self.progress[name] = 0.0 + self.completed_tasks[name] = 0 + + def on_compute_end(self, event): + self.done = True + + def on_operation_start(self, event): + self.running_operations.add(event.name) + + def on_operation_end(self, event): + self.running_operations.remove(event.name) + self.completed_operations.add(event.name) + + def on_task_end(self, event): + self.completed_tasks[event.name] += event.num_tasks + self.progress[event.name] += event.num_tasks / self.num_tasks[event.name] + + +def run_dash(app, *args, **kwargs): + app.run(*args, **kwargs) + + +def run_dash_in_background_thread(app, *args, **kwargs): + threading.Thread( + target=run_dash, args=(app,) + args, kwargs=kwargs, daemon=False + ).start() + + +def plan_to_cytoscape( + plan, + rankdir="TB", + show_hidden=False, +): + dag = plan.dag.copy() # make a copy since we mutate the DAG below + + # remove edges from create-arrays output node to avoid cluttering the diagram + dag.remove_edges_from(list(dag.out_edges("arrays"))) + + if not show_hidden: + dag.remove_nodes_from( + list(n for n, d in dag.nodes(data=True) if d.get("hidden", False)) + ) + + # do an initial pass to extract array variable names from stack summaries + array_display_names = {} + for _, d in dag.nodes(data=True): + if "stack_summaries" in d: + stack_summaries = d["stack_summaries"] + first_cubed_i = min( + i for i, s in enumerate(stack_summaries) if s.is_cubed() + ) + caller_summary = stack_summaries[first_cubed_i - 1] + array_display_names.update(caller_summary.array_names_to_variable_names) + # add current stack info + frame = inspect.currentframe().f_back # go back one in the stack + stack_summaries = extract_stack_summaries(frame, limit=10) + first_cubed_i = min(i for i, s in enumerate(stack_summaries) if s.is_cubed()) + caller_summary = stack_summaries[first_cubed_i - 1] + array_display_names.update(caller_summary.array_names_to_variable_names) + + elements = [] + + # now set node attributes with visualization info + for n, d in dag.nodes(data=True): + label = n + node_type = d.get("type", None) + if node_type == "op": + func_name = d["func_name"] + label = f"{n}\n{func_name}".strip() + num_tasks = None + if "primitive_op" in d: + primitive_op = d["primitive_op"] + num_tasks = primitive_op.num_tasks + linecolor = LINE_COLOR + fillcolor = PRIMITIVE_OP_BACKGROUND_COLOR + else: + linecolor = VIRTUAL_LINE_COLOR + fillcolor = VIRTUAL_OP_BACKGROUND_COLOR + + if num_tasks is not None: + label += f"\ntasks: {int_repr(num_tasks)}" + + elements.append( + { + "data": { + "id": n, + "label": label, + "shape": "round-rectangle", + "fillcolor": fillcolor, + "linecolor": linecolor, + "borderstyle": "solid", + } + } + ) + + elif node_type == "array": + target = d["target"] + + if isinstance(target, LazyZarrArray) or is_storage_array(target): + linecolor = LINE_COLOR + fillcolor = INITIALIZED_ARRAY_BACKGROUND_COLOR + else: + linecolor = VIRTUAL_LINE_COLOR + fillcolor = VIRTUAL_ARRAY_BACKGROUND_COLOR + if n in array_display_names: + var_name = array_display_names[n] + label = f"{n}\n{var_name}" + + if plan.array_role(n) == ArrayRole.INTERMEDIATE: + borderstyle = "dashed" + else: + borderstyle = "solid" + + elements.append( + { + "data": { + "id": n, + "label": label, + "shape": "rectangle", + "fillcolor": fillcolor, + "linecolor": linecolor, + "borderstyle": borderstyle, + } + } + ) + + else: + elements.append( + { + "data": { + "id": n, + "label": label, + "shape": "rectangle", + "fillcolor": VIRTUAL_ARRAY_BACKGROUND_COLOR, + "linecolor": VIRTUAL_LINE_COLOR, + "borderstyle": "solid", + } + } + ) + + for source, target in dag.edges(): + elements.append({"data": {"source": source, "target": target}}) + + stylesheet = [ + { + "selector": "node", + "style": { + "font-family": "helvetica", + "font-size": "12", + "color": "black", + "background-color": "data(fillcolor)", + "border-color": "data(linecolor)", + "border-width": 2, + "border-style": "data(borderstyle)", + "opacity": "1.0", + "text-valign": "center", + "text-halign": "center", + "label": "data(label)", + "shape": "data(shape)", + "text-wrap": "wrap", + # note following is deprecated, see https://stackoverflow.com/a/78033670 + "width": "label", + "height": 36, + "line-height": 1.2, + "padding": 10, + }, + }, + { + "selector": "edge", + "style": { + "width": 2, + "line-color": "black", + "line-cap": "square", + "target-arrow-shape": "triangle", + "target-arrow-color": "black", + "curve-style": "bezier", + "source-endpoint": "outside-to-node", + }, + }, + { + "selector": "node:selected", + "style": {"underlay-color": HIGHLIGHT_COLOR, "underlay-opacity": "0.5"}, + }, + ] + + layout = { + "name": "dagre", + "rankDir": rankdir, + "rankSep": 36, + "nodeDimensionsIncludeLabels": True, + "fit": False, + } + cyto = dash_cytoscape.Cytoscape( + id="cytoscape-component", + layout=layout, + # specify width and height here as dash cytoscape will set defaults that override css + style={"width": "100%", "height": "100vh"}, + stylesheet=stylesheet, + elements=elements, + autoungrabify=True, + minZoom=0.1, + maxZoom=3, + ) + + return cyto, layout, array_display_names + + +def ops_to_arrays(dag): + """Return a map from op name to the names of the arrays it produces""" + op_name_to_array_names = {} + for n, d in dag.nodes(data=True): + node_type = d.get("type", None) + if node_type == "op" and "primitive_op" in d: + op_name_to_array_names[n] = list(successors_unordered(dag, n)) + + return op_name_to_array_names + + +def plan_to_html(plan): + return html.Div( + children=[ + html.H3("Plan"), + html.Table( + [tr("Stages", int_repr(plan.num_stages))] + + [tr("Operations", int_repr(plan.num_primitive_ops))] + + [tr("Tasks", int_repr(plan.num_tasks))] + + [tr("Allowed memory", memory_repr(plan.allowed_mem))] + + [tr("Max projected memory", memory_repr(plan.max_projected_mem))] + + [tr("Optimized", str(plan.optimized))] + ), + html.H3("Storage"), + html.Table( + [tr_header("", "Arrays", "Bytes", "Chunks")] + + [ + tr( + "Input", + int_repr(plan.total_input_narrays), + memory_repr(plan.total_input_nbytes), + int_repr(plan.total_input_nchunks), + ) + ] + + [ + tr( + "Intermediate", + int_repr(plan.total_intermediate_narrays), + memory_repr(plan.total_intermediate_nbytes), + int_repr(plan.total_intermediate_nchunks), + ) + ] + + [ + tr( + "Output", + int_repr(plan.total_output_narrays), + memory_repr(plan.total_output_nbytes), + int_repr(plan.total_output_nchunks), + ) + ] + + [ + tr( + "Total", + int_repr(plan.total_narrays), + memory_repr(plan.total_nbytes), + int_repr(plan.total_nchunks), + ) + ] + ), + html.H3("IO"), + html.Table( + [tr_header("", "Arrays", "Bytes", "Chunks")] + + [ + tr( + "Read", + int_repr(plan.total_narrays_read), + memory_repr(plan.total_nbytes_read), + int_repr(plan.total_nchunks_read), + ) + ] + + [ + tr( + "Write", + int_repr(plan.total_narrays_written), + memory_repr(plan.total_nbytes_written), + int_repr(plan.total_nchunks_written), + ) + ] + ), + ], + className="info-panel", + ) + + +def op_to_html(data): + children = [tr("Name", data["name"])] + children.append(tr("Operation", data["op_name"])) + + if "primitive_op" in data: + primitive_op = data["primitive_op"] + children.append(tr("Projected memory", memory_repr(primitive_op.projected_mem))) + children.append(tr("Tasks", int_repr(primitive_op.num_tasks))) + if primitive_op.write_chunks is not None: + children.append(tr("Write chunk shape", str(primitive_op.write_chunks))) + + if "pipeline" in data: + pipeline = data["pipeline"] + if isinstance(pipeline.config, BlockwiseSpec): + children.append( + tr("Num input blocks", str(pipeline.config.num_input_blocks)) + ) + children.append( + tr("Num output blocks", str(pipeline.config.num_output_blocks)) + ) + + if "stack_summaries" in data and data["stack_summaries"] is not None: + # add call stack information + stack_summaries = data["stack_summaries"] + + first_cubed_i = min(i for i, s in enumerate(stack_summaries) if s.is_cubed()) + caller_summary = stack_summaries[first_cubed_i - 1] + + calls = " -> ".join( + [s.name for s in stack_summaries if not s.is_on_python_lib_path()] + ) + + line = f"{caller_summary.lineno} in {caller_summary.name}" + + # use title to set tooltip for long line + children.append(html.Tr([html.Td("Calls"), html.Td(calls, title=calls)])) + children.append(tr("Line", line)) + + return html.Div(children=[html.Table(children=children)]) + + +def array_to_html(data, array_display_names, plan): + target = data["target"] + + name = data["name"] + children = [tr("Name", data["name"])] + if name in array_display_names: + children.append(tr("Variable name", array_display_names[name])) + + children.append(tr("Shape", str(target.shape))) + children.append(tr("Chunk shape", str(target.chunks))) + children.append(tr("Data type", str(target.dtype))) + children.append(tr("Chunk memory", memory_repr(chunk_memory(target)))) + if hasattr(target, "nbytes"): + children.append(tr("Bytes", memory_repr(target.nbytes))) + if hasattr(target, "nchunks"): + children.append(tr("Chunks", int_repr(target.nchunks))) + + children.append(tr("Role", str(plan.array_role(name).value))) + children.append(tr("Stored", str(hasattr(target, "store")))) + if hasattr(target, "store"): + # Use title to set tooltip for long line + children.append( + html.Tr( + [html.Td("Store"), html.Td(str(target.store), title=str(target.store))] + ) + ) + + svg = array_to_svg(target) + + return html.Div(children=[html.Table(children=children), svg]) + + +def array_to_svg(array): + from cubed.vendor.dask.array.svg import svg + + chunks = normalize_chunks(array.chunks, shape=array.shape, dtype=array.dtype) + s = svg(chunks, size=250) + + from xml.dom.minidom import Node, parseString + + document = parseString(s) + svg_element = document.getElementsByTagName("svg")[0] + + def _extract_style(el): + if not el.hasAttribute("style"): + return None + return { + k.strip(): v.strip() + for (k, v) in [x.split(":") for x in el.getAttribute("style").split(";")] + } + + def handle_svg(svg): + width = svg.getAttribute("width") + height = svg.getAttribute("height") + children = [] + for child in svg.childNodes: + if child.nodeType == Node.ELEMENT_NODE: + if child.tagName == "line": + children.append(handle_line(child)) + elif child.tagName == "polygon": + children.append(handle_polygon(child)) + elif child.tagName == "text": + children.append(handle_text(child)) + # convert width and height to a viewbox so that the image scales to the space available + return dsvg.Svg( + children, viewBox=f"0 0 {width} {height}", style=_extract_style(svg) + ) + + def handle_line(line): + x1 = line.getAttribute("x1") + y1 = line.getAttribute("y1") + x2 = line.getAttribute("x2") + y2 = line.getAttribute("y2") + return dsvg.Line(x1=x1, y1=y1, x2=x2, y2=y2, style=_extract_style(line)) + + def handle_polygon(polygon): + points = polygon.getAttribute("points") + return dsvg.Polygon(points=points, style=_extract_style(polygon)) + + def handle_text(text): + x = text.getAttribute("x") + y = text.getAttribute("y") + fontSize = text.getAttribute("font-size") + fontWeight = text.getAttribute("font-weight") + textAnchor = text.getAttribute("text-anchor") + transform = text.getAttribute("transform") + value = text.childNodes[0].data + return dsvg.Text( + [value], + x=x, + y=y, + fontSize=fontSize, + fontWeight=fontWeight, + textAnchor=textAnchor, + transform=transform, + style=_extract_style(text), + ) + + return handle_svg(svg_element) + + +def tr_header(*ths): + return html.Tr([html.Th(th) for th in ths]) + + +def tr(*tds): + return html.Tr([html.Td(td) for td in tds]) + + +def int_repr(value): + return f"{value:,}" diff --git a/pyproject.toml b/pyproject.toml index 3d0010a1..ca9d1bf3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,8 @@ diagnostics = [ "matplotlib", "rich", "seaborn", + "dash", + "dash-cytoscape", ] beam = ["apache-beam", "gcsfs"] dask = ["dask < 2024.12.0"] From 711ccb67b31d3ccdb3b9ff5aa04fd835ab55f036 Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 18 Nov 2025 10:05:55 +0000 Subject: [PATCH 2/4] Demo notebook --- add-random-dash.ipynb | 69 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 add-random-dash.ipynb diff --git a/add-random-dash.ipynb b/add-random-dash.ipynb new file mode 100644 index 00000000..491de227 --- /dev/null +++ b/add-random-dash.ipynb @@ -0,0 +1,69 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "746026fa-e903-43e6-af4c-847f54cbe96c", + "metadata": {}, + "outputs": [], + "source": [ + "import cubed\n", + "import cubed.array_api as xp\n", + "import cubed.random\n", + "from cubed.diagnostics.dash import Dashboard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d43ee666-6c50-461c-8723-e2def4bc04fe", + "metadata": {}, + "outputs": [], + "source": [ + "a = cubed.random.random((25000, 25000), chunks=(5000, 5000))\n", + "b = cubed.random.random((25000, 25000), chunks=(5000, 5000))\n", + "c = xp.add(a, b)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d130e71-d4fa-454e-b508-ccc6f7179ecb", + "metadata": {}, + "outputs": [], + "source": [ + "with Dashboard(debug=True):\n", + " cubed.to_zarr(c, store=None, optimize_graph=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d8de071-1d68-4a7f-9ea0-c0c748f5e274", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 70a7ab2923a07937cdabd961df9d962cc1adfa53 Mon Sep 17 00:00:00 2001 From: Tom White Date: Fri, 21 Nov 2025 13:22:32 +0000 Subject: [PATCH 3/4] Fix mypy and exclude from coverage (until dashboard is tested in unit tests) --- .coveragerc | 1 + pyproject.toml | 1 + setup.cfg | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/.coveragerc b/.coveragerc index 728e0e18..70598bd6 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,6 +2,7 @@ omit = */tests/* cubed/array_api/* + cubed/diagnostics/dash.py cubed/diagnostics/memray.py cubed/icechunk.py cubed/runtime/executors/beam.py diff --git a/pyproject.toml b/pyproject.toml index ca9d1bf3..cb6bcdc7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ diagnostics = [ "seaborn", "dash", "dash-cytoscape", + "dash-svg", ] beam = ["apache-beam", "gcsfs"] dask = ["dask < 2024.12.0"] diff --git a/setup.cfg b/setup.cfg index c244c1f4..719dba11 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,6 +23,10 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-dask.*] ignore_missing_imports = True +[mypy-dash_cytoscape.*] +ignore_missing_imports = True +[mypy-dash_svg.*] +ignore_missing_imports = True [mypy-donfig.*] ignore_missing_imports = True [mypy-distributed.*] From 8c0be70c104b4bb58cfbaa5d7b59a890e394a850 Mon Sep 17 00:00:00 2001 From: Tom White Date: Fri, 21 Nov 2025 13:28:56 +0000 Subject: [PATCH 4/4] Use new function for finding array variable names --- cubed/diagnostics/dash.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/cubed/diagnostics/dash.py b/cubed/diagnostics/dash.py index 8b2db858..eac9384f 100644 --- a/cubed/diagnostics/dash.py +++ b/cubed/diagnostics/dash.py @@ -18,6 +18,7 @@ from cubed.storage.zarr import LazyZarrArray from cubed.utils import ( chunk_memory, + extract_array_names_from_stack_summaries, extract_stack_summaries, memory_repr, normalize_chunks, @@ -234,21 +235,18 @@ def plan_to_cytoscape( ) # do an initial pass to extract array variable names from stack summaries - array_display_names = {} + stacks = [] for _, d in dag.nodes(data=True): if "stack_summaries" in d: stack_summaries = d["stack_summaries"] - first_cubed_i = min( - i for i, s in enumerate(stack_summaries) if s.is_cubed() - ) - caller_summary = stack_summaries[first_cubed_i - 1] - array_display_names.update(caller_summary.array_names_to_variable_names) + stacks.append(stack_summaries) # add current stack info - frame = inspect.currentframe().f_back # go back one in the stack + # TODO: following isn't right yet + # go back one in the stack to the caller of 'compute' + frame = inspect.currentframe().f_back stack_summaries = extract_stack_summaries(frame, limit=10) - first_cubed_i = min(i for i, s in enumerate(stack_summaries) if s.is_cubed()) - caller_summary = stack_summaries[first_cubed_i - 1] - array_display_names.update(caller_summary.array_names_to_variable_names) + stacks.append(stack_summaries) + array_display_names = extract_array_names_from_stack_summaries(stacks) elements = []