Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SubhaloID cannot be computed for small chunksizes #57

Open
cbyrohl opened this issue Jul 13, 2023 · 1 comment
Open

SubhaloID cannot be computed for small chunksizes #57

cbyrohl opened this issue Jul 13, 2023 · 1 comment
Assignees
Labels
bug Something isn't working

Comments

@cbyrohl
Copy link
Owner

cbyrohl commented Jul 13, 2023

from scida import load
series = load("TNG50-1")
ds = series.get_dataset(redshift=3.0)
data = ds.data
print(ds)
data["PartType0"]["SubhaloID"].compute().magnitude

fails on the last line with

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/dask/highlevelgraph.py:550, in HighLevelGraph.get_all_external_keys(self)
    549 try:
--> 550     return self._all_external_keys
    551 except AttributeError:

AttributeError: 'HighLevelGraph' object has no attribute '_all_external_keys'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[3], line 1
----> 1 data["PartType0"]["SubhaloID"].compute().magnitude

File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/pint/facets/dask/__init__.py:32, in check_dask_array.<locals>.wrapper(self, *args, **kwargs)
     29 @functools.wraps(f)
     30 def wrapper(self, *args, **kwargs):
     31     if isinstance(self._magnitude, dask_array.Array):
---> 32         return f(self, *args, **kwargs)
     33     else:
     34         msg = "Method {} only implemented for objects of {}, not {}".format(
     35             f.__name__, dask_array.Array, self._magnitude.__class__
     36         )

File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/pint/facets/dask/__init__.py:92, in DaskQuantity.compute(self, **kwargs)
     78 @check_dask_array
     79 def compute(self, **kwargs):
     80     """Compute the Dask array wrapped by pint.PlainQuantity.
     81 
     82     Parameters
   (...)
     90         A pint.PlainQuantity wrapped numpy array.
     91     """
---> 92     (result,) = compute(self, **kwargs)
     93     return result

File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/dask/base.py:589, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    581     return args
    583 schedule = get_scheduler(
    584     scheduler=scheduler,
    585     collections=collections,
    586     get=get,
    587 )
--> 589 dsk = collections_to_dsk(collections, optimize_graph, **kwargs)
    590 keys, postcomputes = [], []
    591 for x in collections:

File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/dask/base.py:362, in collections_to_dsk(collections, optimize_graph, optimizations, **kwargs)
    360 for opt, val in groups.items():
    361     dsk, keys = _extract_graph_and_keys(val)
--> 362     dsk = opt(dsk, keys, **kwargs)
    364     for opt_inner in optimizations:
    365         dsk = opt_inner(dsk, keys, **kwargs)

File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/dask/array/optimization.py:51, in optimize(dsk, keys, fuse_keys, fast_functions, inline_functions_fast_functions, rename_fused_keys, **kwargs)
     49 dsk = optimize_blockwise(dsk, keys=keys)
     50 dsk = fuse_roots(dsk, keys=keys)
---> 51 dsk = dsk.cull(set(keys))
     53 # Perform low-level fusion unless the user has
     54 # specified False explicitly.
     55 if config.get("optimization.fuse.active") is False:

File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/dask/highlevelgraph.py:707, in HighLevelGraph.cull(self, keys)
    703 from dask.layers import Blockwise
    705 keys_set = set(flatten(keys))
--> 707 all_ext_keys = self.get_all_external_keys()
    708 ret_layers: dict = {}
    709 ret_key_deps: dict = {}

File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/dask/highlevelgraph.py:557, in HighLevelGraph.get_all_external_keys(self)
    552 keys: set = set()
    553 for layer in self.layers.values():
    554     # Note: don't use `keys |= ...`, because the RHS is a
    555     # collections.abc.Set rather than a real set, and this will
    556     # cause a whole new set to be constructed.
--> 557     keys.update(layer.get_output_keys())
    558 self._all_external_keys = keys
    559 return keys

File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/dask/blockwise.py:486, in Blockwise.get_output_keys(self)
    480     return {(self.output, *p) for p in self.output_blocks}
    482 # Return all possible output keys (no culling)
    483 return {
    484     (self.output, *p)
    485     for p in itertools.product(
--> 486         *[range(self.dims[i]) for i in self.output_indices]
    487     )
    488 }

File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/dask/blockwise.py:486, in <listcomp>(.0)
    480     return {(self.output, *p) for p in self.output_blocks}
    482 # Return all possible output keys (no culling)
    483 return {
    484     (self.output, *p)
    485     for p in itertools.product(
--> 486         *[range(self.dims[i]) for i in self.output_indices]
    487     )
    488 }

File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/dask/blockwise.py:446, in Blockwise.dims(self)
    442 """Returns a dictionary mapping between each index specified in
    443 `self.indices` and the number of output blocks for that indice.
    444 """
    445 if not hasattr(self, "_dims"):
--> 446     self._dims = _make_dims(self.indices, self.numblocks, self.new_axes)
    447 return self._dims

File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/dask/blockwise.py:1484, in _make_dims(indices, numblocks, new_axes)
   1480 def _make_dims(indices, numblocks, new_axes):
   1481     """Returns a dictionary mapping between each index specified in
   1482     `indices` and the number of output blocks for that indice.
   1483     """
-> 1484     dims = broadcast_dimensions(indices, numblocks)
   1485     for k, v in new_axes.items():
   1486         dims[k] = len(v) if isinstance(v, tuple) else 1

File ~/.cache/pypoetry/virtualenvs/paper-labs-analysis-abIgZPoN-py3.9/lib/python3.9/site-packages/dask/blockwise.py:1475, in broadcast_dimensions(argpairs, numblocks, sentinels, consolidate)
   1472     return toolz.valmap(consolidate, g2)
   1474 if g2 and not set(map(len, g2.values())) == {1}:
-> 1475     raise ValueError("Shapes do not align %s" % g)
   1477 return toolz.valmap(toolz.first, g2)

ValueError: Shapes do not align {'.0': {1, 2, 571}}

However, when increasing the chunksize from 128MiB to 256MiB or more, the calculation succeeds.

@cbyrohl cbyrohl changed the title SubhaloID cannot be SubhaloID cannot be computed Aug 3, 2023
@cbyrohl cbyrohl added the bug Something isn't working label Aug 10, 2023
@dnelson86
Copy link
Collaborator

This seems like a dask issue, we can clarify on that end. No immediate rush.

@cbyrohl cbyrohl changed the title SubhaloID cannot be computed SubhaloID cannot be computed for small chunksizes Aug 12, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants