Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

regression since v0.5.0: RecursionError: maximum recursion depth exceeded #94

Closed
basnijholt opened this issue Sep 9, 2020 · 8 comments

Comments

@basnijholt
Copy link
Contributor

basnijholt commented Sep 9, 2020

What happened:

I am trying to load a xarray.Dataset from a Gen2 filesystem.

I tried it wth adlfs latest release and with master.

The error does not occur with v0.4.0!!

What you expected to happen:
The file to load without an error.

Minimal Complete Verifiable Example:

This code used to work (in v0.4.0).

url = "abfs://majodata/some_folder/calibration/left.nc"
with fsspec.open(url, **STORAGE_OPTIONS) as f:
    ds = xr.open_dataset(f)

gives

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/miniconda3/envs/py38/lib/python3.8/site-packages/xarray/backends/file_manager.py in _acquire_with_cache_info(self, needs_lock)
    197             try:
--> 198                 file = self._cache[self._key]
    199             except KeyError:

~/miniconda3/envs/py38/lib/python3.8/site-packages/xarray/backends/lru_cache.py in __getitem__(self, key)
     52         with self._lock:
---> 53             value = self._cache[key]
     54             self._cache.move_to_end(key)

KeyError: [<class 'h5netcdf.core.File'>, (<adlfs.spec.AzureBlobFile object at 0x2b9c7aa6d790>,), 'r', (('invalid_netcdf', None),)]

During handling of the above exception, another exception occurred:

RecursionError                            Traceback (most recent call last)
<ipython-input-8-9024338e9664> in <module>
     11 with fsspec.open(url, **STORAGE_OPTIONS) as f:
     12 #     x = f.readlines()
---> 13     ds = xr.open_dataset(f)
     14 # with fsspec.open(url, **STORAGE_OPTIONS) as f:
     15 #     ds = xr.open_dataset(f)

~/miniconda3/envs/py38/lib/python3.8/site-packages/xarray/backends/api.py in open_dataset(filename_or_obj, group, decode_cf, mask_and_scale, decode_times, autoclose, concat_characters, decode_coords, engine, chunks, lock, cache, drop_variables, backend_kwargs, use_cftime, decode_timedelta)
    538             store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs)
    539         elif engine == "h5netcdf":
--> 540             store = backends.H5NetCDFStore.open(
    541                 filename_or_obj, group=group, lock=lock, **backend_kwargs
    542             )

~/miniconda3/envs/py38/lib/python3.8/site-packages/xarray/backends/h5netcdf_.py in open(cls, filename, mode, format, group, lock, autoclose, invalid_netcdf, phony_dims)
    143 
    144         manager = CachingFileManager(h5netcdf.File, filename, mode=mode, kwargs=kwargs)
--> 145         return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
    146 
    147     def _acquire(self, needs_lock=True):

~/miniconda3/envs/py38/lib/python3.8/site-packages/xarray/backends/h5netcdf_.py in __init__(self, manager, group, mode, lock, autoclose)
    104         # todo: utilizing find_root_and_group seems a bit clunky
    105         #  making filename available on h5netcdf.Group seems better
--> 106         self._filename = find_root_and_group(self.ds)[0].filename
    107         self.is_remote = is_remote_uri(self._filename)
    108         self.lock = ensure_lock(lock)

~/miniconda3/envs/py38/lib/python3.8/site-packages/xarray/backends/h5netcdf_.py in ds(self)
    154     @property
    155     def ds(self):
--> 156         return self._acquire()
    157 
    158     def open_store_variable(self, name, var):

~/miniconda3/envs/py38/lib/python3.8/site-packages/xarray/backends/h5netcdf_.py in _acquire(self, needs_lock)
    146 
    147     def _acquire(self, needs_lock=True):
--> 148         with self._manager.acquire_context(needs_lock) as root:
    149             ds = _nc4_require_group(
    150                 root, self._group, self._mode, create_group=_h5netcdf_create_group

~/miniconda3/envs/py38/lib/python3.8/contextlib.py in __enter__(self)
    111         del self.args, self.kwds, self.func
    112         try:
--> 113             return next(self.gen)
    114         except StopIteration:
    115             raise RuntimeError("generator didn't yield") from None

~/miniconda3/envs/py38/lib/python3.8/site-packages/xarray/backends/file_manager.py in acquire_context(self, needs_lock)
    184     def acquire_context(self, needs_lock=True):
    185         """Context manager for acquiring a file."""
--> 186         file, cached = self._acquire_with_cache_info(needs_lock)
    187         try:
    188             yield file

~/miniconda3/envs/py38/lib/python3.8/site-packages/xarray/backends/file_manager.py in _acquire_with_cache_info(self, needs_lock)
    202                     kwargs = kwargs.copy()
    203                     kwargs["mode"] = self._mode
--> 204                 file = self._opener(*self._args, **kwargs)
    205                 if self._mode == "w":
    206                     # ensure file doesn't get overriden when opened again

~/miniconda3/envs/py38/lib/python3.8/site-packages/h5netcdf/core.py in __init__(self, path, mode, invalid_netcdf, phony_dims, **kwargs)
    678                 else:
    679                     self._preexisting_file = mode in {'r', 'r+', 'a'}
--> 680                     self._h5file = h5py.File(path, mode, **kwargs)
    681         except Exception:
    682             self._closed = True

~/miniconda3/envs/py38/lib/python3.8/site-packages/h5py/_hl/files.py in __init__(self, name, mode, driver, libver, userblock_size, swmr, rdcc_nslots, rdcc_nbytes, rdcc_w0, track_order, **kwds)
    404             with phil:
    405                 fapl = make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0, **kwds)
--> 406                 fid = make_fid(name, mode, userblock_size,
    407                                fapl, fcpl=make_fcpl(track_order=track_order),
    408                                swmr=swmr)

~/miniconda3/envs/py38/lib/python3.8/site-packages/h5py/_hl/files.py in make_fid(name, mode, userblock_size, fapl, fcpl, swmr)
    171         if swmr and swmr_support:
    172             flags |= h5f.ACC_SWMR_READ
--> 173         fid = h5f.open(name, flags, fapl=fapl)
    174     elif mode == 'r+':
    175         fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)

h5py/_objects.pyx in h5py._objects.with_phil.wrapper()

h5py/_objects.pyx in h5py._objects.with_phil.wrapper()

h5py/h5f.pyx in h5py.h5f.open()

h5py/defs.pyx in h5py.defs.H5Fopen()

h5py/h5fd.pyx in h5py.h5fd.H5FD_fileobj_read()

h5py/h5fd.pyx in h5py.h5fd.H5FD_fileobj_read()

h5py/h5fd.pyx in h5py.h5fd.H5FD_fileobj_read()

h5py/h5fd.pyx in h5py.h5fd.H5FD_fileobj_read()

h5py/h5fd.pyx in h5py.h5fd.H5FD_fileobj_read()

h5py/h5fd.pyx in h5py.h5fd.H5FD_fileobj_read()

h5py/h5fd.pyx in h5py.h5fd.H5FD_fileobj_read()

h5py/h5fd.pyx in h5py.h5fd.H5FD_fileobj_read()

h5py/h5fd.pyx in h5py.h5fd.H5FD_fileobj_read()

h5py/h5fd.pyx in h5py.h5fd.H5FD_fileobj_read()

~/miniconda3/envs/py38/lib/python3.8/site-packages/adlfs/spec.py in readinto(self, b)
   1541 
   1542     def readinto(self, b):
-> 1543         return self.readinto(b)
   1544 
   1545     def read(self, length=-1):

... last 1 frames repeated, from the frame below ...

~/miniconda3/envs/py38/lib/python3.8/site-packages/adlfs/spec.py in readinto(self, b)
   1541 
   1542     def readinto(self, b):
-> 1543         return self.readinto(b)
   1544 
   1545     def read(self, length=-1):

RecursionError: maximum recursion depth exceeded

Anything else we need to know?:

Environment:

import dask, xarray, fsspec, adlfs
print(dask.__version__, xarray.__version__, fsspec.__version__, adlfs.__version__)

2.22.0 0.16.0 0.8.0 v0.5.0

  • Python version: 3.8.1
  • Operating System: CentOS
  • Install method (conda, pip, source): pip
@basnijholt
Copy link
Contributor Author

basnijholt commented Sep 9, 2020

When updating xarray from 0.15.0 to 0.16.0, the error changes to:

see error

---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-1-9024338e9664> in <module>
      9 url = "abfs://majodata/M040219.1H-01/calibration/left.nc"
     10 # url = '/gscratch/home/a-banijh/data/CPH/M040219.1H-01/phase_two/01/left.nc'
---> 11 with fsspec.open(url, **STORAGE_OPTIONS) as f:
     12 #     x = f.readlines()
     13     ds = xr.open_dataset(f)

~/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/fsspec/core.py in open(urlpath, mode, compression, encoding, errors, protocol, newline, **kwargs)
    267     ``OpenFile`` object.
    268     """
--> 269     return open_files(
    270         [urlpath],
    271         mode,

~/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/fsspec/core.py in open_files(urlpath, mode, compression, encoding, errors, name_function, num, protocol, newline, **kwargs)
    197     List of ``OpenFile`` objects.
    198     """
--> 199     fs, fs_token, paths = get_fs_token_paths(
    200         urlpath,
    201         mode,

~/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/fsspec/core.py in get_fs_token_paths(urlpath, mode, num, name_function, storage_options, protocol)
    375                 "share the same protocol"
    376             )
--> 377         cls = get_filesystem_class(protocol)
    378         optionss = list(map(cls._get_kwargs_from_urls, urlpath))
    379         paths = [cls._strip_protocol(u) for u in urlpath]

~/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/fsspec/registry.py in get_filesystem_class(protocol)
     99             raise ValueError("Protocol not known: %s" % protocol)
    100         bit = known_implementations[protocol]
--> 101         registry[protocol] = _import_class(bit["class"])
    102     cls = registry[protocol]
    103     if getattr(cls, "protocol", None) in ("abstract", None):

~/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/fsspec/registry.py in _import_class(cls, minv)
    112     minversion = minv.get(mod, None)
    113 
--> 114     mod = importlib.import_module(mod)
    115     if minversion:
    116         version = getattr(mod, "__version__", None)

~/miniconda3/envs/majoanalysis/lib/python3.8/importlib/__init__.py in import_module(name, package)
    125                 break
    126             level += 1
--> 127     return _bootstrap._gcd_import(name[level:], package, level)
    128 
    129 

~/miniconda3/envs/majoanalysis/lib/python3.8/importlib/_bootstrap.py in _gcd_import(name, package, level)

~/miniconda3/envs/majoanalysis/lib/python3.8/importlib/_bootstrap.py in _find_and_load(name, import_)

~/miniconda3/envs/majoanalysis/lib/python3.8/importlib/_bootstrap.py in _find_and_load_unlocked(name, import_)

~/miniconda3/envs/majoanalysis/lib/python3.8/importlib/_bootstrap.py in _load_unlocked(spec)

~/miniconda3/envs/majoanalysis/lib/python3.8/importlib/_bootstrap_external.py in exec_module(self, module)

~/miniconda3/envs/majoanalysis/lib/python3.8/importlib/_bootstrap.py in _call_with_frames_removed(f, *args, **kwds)

~/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/adlfs/__init__.py in <module>
----> 1 from .spec import AzureDatalakeFileSystem
      2 from .spec import AzureBlobFileSystem, AzureBlobFile
      3 from ._version import get_versions
      4 
      5 __all__ = ["AzureBlobFileSystem", "AzureBlobFile", "AzureDatalakeFileSystem"]

~/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/adlfs/spec.py in <module>
     18 from azure.storage.blob._models import BlobBlock, BlobProperties
     19 from fsspec import AbstractFileSystem
---> 20 from fsspec.asyn import (
     21     sync,
     22     AsyncFileSystem,

~/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/fsspec/asyn.py in <module>
      6 import threading
      7 
----> 8 from .utils import other_paths
      9 from .spec import AbstractFileSystem
     10 

ImportError: cannot import name 'other_paths' from 'fsspec.utils' (/gscratch/home/a-banijh/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/fsspec/utils.py)

edit: updating to master fsspec brings back the old error message.
edit2: The error does not occur with v0.4.0!

@basnijholt basnijholt changed the title RecursionError: maximum recursion depth exceeded since v0.5.0: RecursionError: maximum recursion depth exceeded Sep 9, 2020
@basnijholt basnijholt changed the title since v0.5.0: RecursionError: maximum recursion depth exceeded regression since v0.5.0: RecursionError: maximum recursion depth exceeded Sep 9, 2020
@raybellwaves
Copy link
Contributor

You able to provide a minimal example of what the file is? I see it's a .nc but curious how it was created e.g. share xr.show_versions(). In fact can you test a simple .nc file? seeing libraries like h5netcdf in the Traceback makes this hard to debug.

@hayesgb
Copy link
Collaborator

hayesgb commented Sep 10, 2020

I may have found the bug. Can you test it with the branch labeled "readinto_branch"?

@raybellwaves
Copy link
Contributor

This worked for me ok:

$ conda create -n test_env python=3.8
$ conda activate test_env
$ pip install xarray adlfs scipy
$ python

>>> import xarray as xr
>>> import adlfs
>>> import fsspec
>>> 
>>> fs = adlfs.AzureBlobFileSystem(account_name='ACCOUNT_NAME', account_key='ACCOUNT_KEY')
>>> da = xr.DataArray(1)
>>> da.to_netcdf('da.nc')
>>> fs.put('da.nc', 'tmp/da.nc')
>>>
>>> url = "abfs://tmp/da.nc"
>>> STORAGE_OPTIONS = {'account_name': 'ACCOUNT_NAME', 'account_key': 'ACCOUNT_KEY'}
>>> with fsspec.open(url, **STORAGE_OPTIONS) as f:
...    ds = xr.open_dataset(f)
>>> ds.__xarray_dataarray_variable__.values
array(1)

@basnijholt
Copy link
Contributor Author

@hayesgb, yes, I can confirm that the code of thereadinto_branch works for me too! 🎉

@hayesgb
Copy link
Collaborator

hayesgb commented Sep 10, 2020

Fixed in release v0.5.1

@hayesgb hayesgb closed this as completed Sep 10, 2020
@basnijholt
Copy link
Contributor Author

@hayesgb, thanks for the quick fix! 🎉

@basnijholt
Copy link
Contributor Author

@hayesgb, from time to time I still get this error:

WARNING:azure.core.pipeline.policies._distributed_tracing:Unable to start network span: maximum recursion depth exceeded in __instancecheck__
ERROR:aiohttp.internal:Exception in eof callback
Traceback (most recent call last):
  File "/gscratch/home/a-banijh/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/aiohttp/streams.py", line 164, in on_eof
    callback()
  File "/gscratch/home/a-banijh/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/aiohttp/client_reqrep.py", line 897, in _response_eof
    self._connection.release()
  File "/gscratch/home/a-banijh/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/aiohttp/connector.py", line 177, in release
    self._connector._release(
  File "/gscratch/home/a-banijh/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/aiohttp/connector.py", line 622, in _release
    self._release_acquired(key, protocol)
  File "/gscratch/home/a-banijh/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/aiohttp/connector.py", line 608, in _release_acquired
    self._drop_acquired_per_host(key, proto)
  File "/gscratch/home/a-banijh/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/aiohttp/connector.py", line 365, in _drop_acquired_per_host
    if key not in acquired_per_host:
  File "<attrs generated hash aiohttp.client_reqrep.ConnectionKey>", line 2, in __hash__
    return hash((
RecursionError: maximum recursion depth exceeded while calling a Python object
ERROR:aiohttp.internal:Exception in eof callback
Traceback (most recent call last):
  File "/gscratch/home/a-banijh/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/aiohttp/streams.py", line 164, in on_eof
    callback()
  File "/gscratch/home/a-banijh/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/aiohttp/client_reqrep.py", line 897, in _response_eof
    self._connection.release()
  File "/gscratch/home/a-banijh/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/aiohttp/connector.py", line 179, in release
    should_close=self._protocol.should_close)
  File "/gscratch/home/a-banijh/miniconda3/envs/majoanalysis/lib/python3.8/site-packages/aiohttp/client_proto.py", line 48, in should_close
    not self._payload.is_eof() or self._upgraded):
RecursionError: maximum recursion depth exceeded

Not sure if it is the same. And I cannot easily reproduce it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants