Skip to content

Commit

Permalink
fix: detect chaged files on disk when reloading metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Nov 25, 2022
1 parent 4c84269 commit 1d948df
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 6 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
2.11.10
- fix: detect chaged files on disk when reloading metadata
- setup: bump dclab from 0.46.4 to 0.47.0 (new bg_med feature
and other convenience methods)
2.11.9
- fix: include logs when exporting data (#126)
- setup: bump dclab from 0.46.2 to 0.46.4 (new features)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
description=description,
long_description=open('README.rst').read() if exists('README.rst') else '',
install_requires=["fcswrite>=0.5.1",
"dclab[dcor,lme4]>=0.46.4",
"dclab[dcor,lme4]>=0.47.0",
"h5py>=2.8.0",
"numpy>=1.21", # CVE-2021-33430
"pyqt5",
Expand Down
51 changes: 48 additions & 3 deletions shapeout2/meta_tool.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,55 @@
"""Convenience methods to retrieve meta data from .rtdc files"""
import functools
import pathlib

import dclab

import numpy as np


class dataset_monitoring_lru_cache:
"""Decorator for caching RT-DC data extracted from DCOR or files
This is a modification of dclab.util.file_monitoring_lru_cache
with an exception that when the `path` starts with "https://",
then caching is done as well.
"""
def __init__(self, maxsize=100):
self.lru_cache = functools.lru_cache(maxsize=maxsize)
self.cached_wrapper = None

def __call__(self, func):
@self.lru_cache
def cached_wrapper(path, path_stats, *args, **kwargs):
assert path_stats, "We need stat for validating the cache"
return func(path, *args, **kwargs)

@functools.wraps(func)
def wrapper(path, *args, **kwargs):
full_path = pathlib.Path(path).resolve()
if full_path.exists():
path_stat = full_path.stat()
return cached_wrapper(
path=full_path,
path_stats=(path_stat.st_mtime_ns, path_stat.st_size),
*args,
**kwargs)
elif isinstance(path, str) and path.startswith("https://"):
# DCOR metadata does not change
return cached_wrapper(
path=path,
path_stats="placeholder",
*args,
**kwargs)
else:
return func(path, *args, **kwargs)

wrapper.cache_clear = cached_wrapper.cache_clear
wrapper.cache_info = cached_wrapper.cache_info

return wrapper


def get_info(path, section, key):
config = get_rtdc_config(path)
return config[section][key]
Expand All @@ -22,14 +67,14 @@ def get_repr(path, append_path=False):
return rep


@functools.lru_cache(maxsize=100)
@dataset_monitoring_lru_cache(maxsize=100)
def get_rtdc_config(path):
with dclab.new_dataset(path) as ds:
config = ds.config.copy()
return config


@functools.lru_cache(maxsize=100)
@dataset_monitoring_lru_cache(maxsize=100)
def get_rtdc_features(path, scalar=True, only_loaded=False):
"""Return available features in a dataset"""
av_feat = []
Expand All @@ -56,7 +101,7 @@ def get_rtdc_features_bulk(paths, scalar=True):
return sorted(set(features))


@functools.lru_cache(maxsize=10000)
@dataset_monitoring_lru_cache(maxsize=10000)
def get_rtdc_features_minmax(path, *features):
"""Return dict with min/max of scalar features in a dataset"""
mmdict = {}
Expand Down
4 changes: 2 additions & 2 deletions shapeout2/session.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import functools
import hashlib
import io
import json
Expand All @@ -9,6 +8,7 @@
import zipfile

import dclab
from dclab.util import file_monitoring_lru_cache

from .pipeline import Dataslot, Filter, Pipeline, Plot
from ._version import version
Expand Down Expand Up @@ -162,7 +162,7 @@ def import_filter_set(path, pipeline, strict=False):
pipeline.add_filter(filt=filt)


@functools.lru_cache(maxsize=1000)
@file_monitoring_lru_cache(maxsize=1000)
def hash_file_partially(path, size=524288):
"""Hash parts of a file for basic identification
Expand Down

0 comments on commit 1d948df

Please sign in to comment.