Skip to content

Commit

Permalink
Honor option "WD_CACHE_DISABLE" also with FSSPEC-based cache
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Apr 25, 2021
1 parent ccbf5ee commit ecd786c
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 21 deletions.
35 changes: 20 additions & 15 deletions wetterdienst/util/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,10 @@

log = logging.getLogger()

# Python on Windows has no "fcntl", which is required by the dbm backend.
# TODO: Make cache backend configurable, e.g. optionally use Redis for running
# in multi-threaded environments.
platform = platform.system()
backend = "dogpile.cache.dbm"
if "WD_CACHE_DISABLE" in os.environ or platform == "Windows":
backend = "dogpile.cache.memory"
# Whether caching should be disabled at all.
WD_CACHE_DISABLE = "WD_CACHE_DISABLE" in os.environ

# Compute cache directory.
# Configure cache directory.
try:
cache_dir = os.environ["WD_CACHE_DIR"]
except KeyError:
Expand All @@ -33,13 +28,23 @@
# Ensure cache directories exist.
# FIXME: Get rid of this as it executes "os.makedirs()" on the module level.
# This is not really good style but it is needed for the dogpile setup.
cache_directories = [
os.path.join(cache_dir, "dogpile"),
os.path.join(cache_dir, "fsspec"),
]
for cache_directory in cache_directories:
if not os.path.exists(cache_directory):
os.makedirs(cache_directory)
if not WD_CACHE_DISABLE:
cache_directories = [
os.path.join(cache_dir, "dogpile"),
os.path.join(cache_dir, "fsspec"),
]
for cache_directory in cache_directories:
if not os.path.exists(cache_directory):
os.makedirs(cache_directory)

# Configure cache backend.
# TODO: Make cache backend configurable, e.g. optionally use Redis for running
# in multi-threaded environments.
platform = platform.system()
backend = "dogpile.cache.dbm"
# Python on Windows has no "fcntl", which is required by the dbm backend.
if WD_CACHE_DISABLE or platform == "Windows":
backend = "dogpile.cache.memory"


class CacheExpiry(Enum):
Expand Down
33 changes: 27 additions & 6 deletions wetterdienst/util/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
# v1: Global HTTP session object for custom implementation based on "requests".
session = requests.Session()

# Whether caching should be disabled at all.
WD_CACHE_DISABLE = "WD_CACHE_DISABLE" in os.environ


# v2: Remote filesystem access through FSSPEC.
class NetworkFilesystemManager:
Expand All @@ -42,12 +45,16 @@ def register(cls, ttl=CacheExpiry.NO_CACHE):
ttl_name, ttl_value = cls.resolve_ttl(ttl)
key = f"ttl-{ttl_name}"
real_cache_dir = os.path.join(cache_dir, "fsspec", key)
filesystem = WholeFileCacheFileSystem(
fs=HTTPFileSystem(use_listings_cache=True),
cache_storage=real_cache_dir,
expiry_time=ttl_value,
)
cls.filesystems[key] = filesystem
filesystem_real = HTTPFileSystem(use_listings_cache=True)
if WD_CACHE_DISABLE:
filesystem_effective = filesystem_real
else:
filesystem_effective = WholeFileCacheFileSystem(
fs=filesystem_real,
cache_storage=real_cache_dir,
expiry_time=ttl_value,
)
cls.filesystems[key] = filesystem_effective

@classmethod
def get(cls, ttl=CacheExpiry.NO_CACHE):
Expand Down Expand Up @@ -108,6 +115,20 @@ def list_remote_files_legacy(url: str, recursive: bool) -> List[str]:
def list_remote_files_fsspec(
url: str, recursive: bool = False, ttl: CacheExpiry = CacheExpiry.FILEINDEX
) -> List[str]:
"""
A function used to create a listing of all files of a given path on the server.
The default ttl with ``CacheExpiry.FILEINDEX`` is "5 minutes".
Args:
:param url: The URL which should be searched for files.
:param recursive: Definition if the function should iteratively list files
from sub folders.
:param ttl: The cache expiration time.
Returns:
A list of strings representing the files from the path.
"""

# Acquire filesystem instance.
filesystem = NetworkFilesystemManager.get(ttl=ttl)
Expand Down

0 comments on commit ecd786c

Please sign in to comment.