Skip to content

Commit

Permalink
Merge pull request #394 from creare-com/feature/only-cache-data-sourc…
Browse files Browse the repository at this point in the history
…e-outputs-default

Feature/only cache data source outputs default
  • Loading branch information
mpu-creare authored Apr 17, 2020
2 parents f4ba19e + 7494a98 commit 0fcf40f
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 35 deletions.
8 changes: 4 additions & 4 deletions podpac/core/algorithm/test/test_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_multi_threading(self):

with podpac.settings:
podpac.settings.set_unsafe_eval(True)
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False
podpac.settings["DEFAULT_CACHE"] = []
podpac.settings["RAM_CACHE_ENABLED"] = False

Expand All @@ -78,7 +78,7 @@ def test_multi_threading_cache_race(self):
with podpac.settings:
podpac.settings["MULTITHREADING"] = True
podpac.settings["N_THREADS"] = 3
podpac.settings["CACHE_OUTPUT_DEFAULT"] = True
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = True
podpac.settings["DEFAULT_CACHE"] = ["ram"]
podpac.settings["RAM_CACHE_ENABLED"] = True
podpac.settings.set_unsafe_eval(True)
Expand Down Expand Up @@ -113,7 +113,7 @@ def test_multi_threading_stress_nthreads(self):
with podpac.settings:
podpac.settings["MULTITHREADING"] = True
podpac.settings["N_THREADS"] = 8
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False
podpac.settings["DEFAULT_CACHE"] = []
podpac.settings["RAM_CACHE_ENABLED"] = False
podpac.settings.set_unsafe_eval(True)
Expand All @@ -126,7 +126,7 @@ def test_multi_threading_stress_nthreads(self):
with podpac.settings:
podpac.settings["MULTITHREADING"] = True
podpac.settings["N_THREADS"] = 9 # 2 threads available after first 7
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False
podpac.settings["DEFAULT_CACHE"] = []
podpac.settings["RAM_CACHE_ENABLED"] = False
podpac.settings.set_unsafe_eval(True)
Expand Down
32 changes: 16 additions & 16 deletions podpac/core/algorithm/test/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ def test_auto_chunk(self):
node = Min(source=source)

with podpac.settings:
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False
podpac.settings["CHUNK_SIZE"] = "auto"
node.eval(coords)

def test_chunked_fallback(self):
with podpac.settings:
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False

class First(Reduce):
def reduce(self, x):
Expand All @@ -75,7 +75,7 @@ class BaseTests(object):

def test_full(self):
with podpac.settings:
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False
podpac.settings["CHUNK_SIZE"] = None

node = self.NodeClass(source=source)
Expand All @@ -91,15 +91,15 @@ def test_full(self):
def test_full_chunked(self):
with podpac.settings:
node = self.NodeClass(source=source, dims=coords.dims)
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False
podpac.settings["CHUNK_SIZE"] = 500
output = node.eval(coords)
# xr.testing.assert_allclose(output, self.expected_full)
np.testing.assert_allclose(output.data, self.expected_full.data)

def test_lat_lon(self):
with podpac.settings:
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False
podpac.settings["CHUNK_SIZE"] = None
node = self.NodeClass(source=source, dims=["lat", "lon"])
output = node.eval(coords)
Expand All @@ -108,7 +108,7 @@ def test_lat_lon(self):

def test_lat_lon_chunked(self):
with podpac.settings:
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False
podpac.settings["CHUNK_SIZE"] = 500
node = self.NodeClass(source=source, dims=["lat", "lon"])
output = node.eval(coords)
Expand All @@ -117,7 +117,7 @@ def test_lat_lon_chunked(self):

def test_time(self):
with podpac.settings:
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False
podpac.settings["CHUNK_SIZE"] = None
node = self.NodeClass(source=source, dims="time")
output = node.eval(coords)
Expand All @@ -126,7 +126,7 @@ def test_time(self):

def test_time_chunked(self):
with podpac.settings:
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False
podpac.settings["CHUNK_SIZE"] = 500
node = self.NodeClass(source=source, dims="time")
output = node.eval(coords)
Expand All @@ -135,7 +135,7 @@ def test_time_chunked(self):

def test_multiple_outputs(self):
with podpac.settings:
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False
podpac.settings["CHUNK_SIZE"] = None
node = self.NodeClass(source=multisource, dims=["lat", "lon"])
output = node.eval(coords)
Expand Down Expand Up @@ -278,15 +278,15 @@ class TestDayOfYear(object):

class F(DayOfYearWindow):
cache_output = tl.Bool(False)
cache_update = tl.Bool(True)
force_eval = tl.Bool(True)

def function(self, data, output):
return len(data)


class FM(DayOfYearWindow):
cache_output = tl.Bool(False)
cache_update = tl.Bool(True)
force_eval = tl.Bool(True)

def function(self, data, output):
return np.mean(data)
Expand All @@ -302,7 +302,7 @@ def test_doy_window1(self):
)

node = Arange()
nodedoywindow = F(source=node, window=1, cache_output=False, cache_update=True)
nodedoywindow = F(source=node, window=1, cache_output=False, force_eval=True)
o = nodedoywindow.eval(coords)

np.testing.assert_array_equal(o, [2, 2, 1, 1, 2, 2])
Expand All @@ -316,7 +316,7 @@ def test_doy_window2(self):
)

node = Arange()
nodedoywindow = F(source=node, window=2, cache_output=False, cache_update=True)
nodedoywindow = F(source=node, window=2, cache_output=False, force_eval=True)
o = nodedoywindow.eval(coords)

np.testing.assert_array_equal(o, [6, 5, 3, 3, 5, 6])
Expand All @@ -330,11 +330,11 @@ def test_doy_window2_mean_rescale_float(self):
)

node = Arange()
nodedoywindow = FM(source=node, window=2, cache_output=False, cache_update=True)
nodedoywindow = FM(source=node, window=2, cache_output=False, force_eval=True)
o = nodedoywindow.eval(coords)

nodedoywindow_s = FM(
source=node, window=2, cache_output=False, cache_update=True, scale_float=[0, coords.size], rescale=True
source=node, window=2, cache_output=False, force_eval=True, scale_float=[0, coords.size], rescale=True
)
o_s = nodedoywindow_s.eval(coords)

Expand All @@ -359,7 +359,7 @@ def test_doy_window2_mean_rescale_max_min(self):
source=node,
window=2,
cache_output=False,
cache_update=True,
force_eval=True,
scale_max=node_max,
scale_min=node_min,
rescale=False,
Expand Down
10 changes: 9 additions & 1 deletion podpac/core/data/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,10 @@ class DataSource(Node):
Default is 'numpy'
cache_coordinates : bool
Whether to cache coordinates using the podpac ``cache_ctrl``. Default False.
cache_output : bool
Should the node's output be cached? If not provided or None, uses default based on
settings["CACHE_DATASOURCE_OUTPUT_DEFAULT"]. If True, outputs will be cached and retrieved from cache. If False,
outputs will not be cached OR retrieved from cache (even if they exist in cache).
Notes
-----
Expand All @@ -156,6 +159,7 @@ class DataSource(Node):

coordinate_index_type = tl.Enum(["slice", "list", "numpy"], default_value="numpy") # , "xarray", "pandas"],
cache_coordinates = tl.Bool(False)
cache_output = tl.Bool()

# privates
_interpolation = tl.Instance(Interpolation)
Expand All @@ -173,6 +177,10 @@ def _default_interpolation(self):
self._set_interpolation()
return self._interpolation

@tl.default("cache_output")
def _cache_output_default(self):
return settings["CACHE_DATASOURCE_OUTPUT_DEFAULT"]

# ------------------------------------------------------------------------------------------------------------------
# Properties
# ------------------------------------------------------------------------------------------------------------------
Expand Down
6 changes: 3 additions & 3 deletions podpac/core/data/file_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ class LoadFileMixin(S3Mixin):
Attributes
----------
cache_dataset : bool
Whether to cache the dataset after loading.
Default is False. Whether to cache the dataset after loading (as an optimization).
"""

cache_dataset = tl.Bool(True)
cache_dataset = tl.Bool(False)

@cached_property
def _dataset_caching_node(self):
Expand All @@ -88,7 +88,7 @@ def _dataset_caching_node(self):
@cached_property
def dataset(self):
# use the _dataset_caching_node "stub" here because the only node attr we care about is the source
if self._dataset_caching_node.has_cache(key="dataset"):
if self.cache_dataset and self._dataset_caching_node.has_cache(key="dataset"):
data = self._dataset_caching_node.get_cache(key="dataset")
with BytesIO(data) as f:
return self._open(BytesIO(data), cache=False)
Expand Down
2 changes: 1 addition & 1 deletion podpac/core/data/test/test_file_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_cache_dataset(self):

with podpac.settings:
podpac.settings["DEFAULT_CACHE"] = ["ram"]
node = MockLoadFile(source="file:///%s" % path)
node = MockLoadFile(source="file:///%s" % path, cache_dataset=True)
node.dataset

# node caches dataset object
Expand Down
16 changes: 10 additions & 6 deletions podpac/core/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,13 @@ class Node(tl.HasTraits):
Attributes
----------
cache_output: bool
Should the node's output be cached? If not provided or None, uses default based on settings.
cache_update: bool
Default is True. Should the node's cached output be updated from the source data?
Should the node's output be cached? If not provided or None, uses default based on settings
(CACHE_NODE_OUTPUT_DEFAULT for general Nodes, and CACHE_DATASOURCE_OUTPUT_DEFAULT for DataSource nodes).
If True, outputs will be cached and retrieved from cache. If False, outputs will not be cached OR retrieved from cache (even if
they exist in cache).
force_eval: bool
Default is False. Should the node's cached output be updated from the source data? If True it ignores the cache
when computing outputs but puts results into the cache (thereby updating the cache)
cache_ctrl: :class:`podpac.core.cache.cache.CacheCtrl`
Class that controls caching. If not provided, uses default based on settings.
dtype : type
Expand Down Expand Up @@ -126,7 +130,7 @@ class Node(tl.HasTraits):

dtype = tl.Any(default_value=float)
cache_output = tl.Bool()
cache_update = tl.Bool(False)
force_eval = tl.Bool(False)
cache_ctrl = tl.Instance(CacheCtrl, allow_none=True)

# list of attribute names, used by __repr__ and __str__ to display minimal info about the node
Expand Down Expand Up @@ -157,7 +161,7 @@ def _validate_units(self, d):

@tl.default("cache_output")
def _cache_output_default(self):
return settings["CACHE_OUTPUT_DEFAULT"]
return settings["CACHE_NODE_OUTPUT_DEFAULT"]

@tl.default("cache_ctrl")
def _cache_ctrl_default(self):
Expand Down Expand Up @@ -969,7 +973,7 @@ def wrapper(self, coordinates, output=None):
key = cache_key
cache_coordinates = coordinates.transpose(*sorted(coordinates.dims)) # order agnostic caching

if not self.cache_update and self.has_cache(key, cache_coordinates):
if not self.force_eval and self.cache_output and self.has_cache(key, cache_coordinates):
data = self.get_cache(key, cache_coordinates)
if output is not None:
order = [dim for dim in output.dims if dim not in data.dims] + list(data.dims)
Expand Down
7 changes: 5 additions & 2 deletions podpac/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
"UNSAFE_EVAL_HASH": uuid.uuid4().hex, # unique id for running unsafe evaluations
# cache
"DEFAULT_CACHE": ["ram"],
"CACHE_OUTPUT_DEFAULT": True,
"CACHE_DATASOURCE_OUTPUT_DEFAULT": True,
"CACHE_NODE_OUTPUT_DEFAULT": False,
"RAM_CACHE_MAX_BYTES": 1e9, # ~1GB
"DISK_CACHE_MAX_BYTES": 10e9, # ~10GB
"S3_CACHE_MAX_BYTES": 10e9, # ~10GB
Expand Down Expand Up @@ -109,8 +110,10 @@ class PodpacSettings(dict):
Notification email for when AWS usage reaches 80% of the `AWS_BUDGET_AMOUNT`
DEFAULT_CACHE : list
Defines a default list of cache stores in priority order. Defaults to `['ram']`.
CACHE_OUTPUT_DEFAULT : bool
CACHE_NODE_OUTPUT_DEFAULT : bool
Default value for node ``cache_output`` trait. If True, the outputs of nodes (eval) will be automatically cached.
CACHE_DATASOURCE_OUTPUT_DEFAULT : bool
Default value for DataSource nodes ``cache_output`` trait. If True, the outputs of nodes (eval) will be automatically cached.
RAM_CACHE_MAX_BYTES : int
Maximum RAM cache size in bytes.
Note, for RAM cache only, the limit is applied to the total amount of RAM used by the python process;
Expand Down
4 changes: 2 additions & 2 deletions podpac/core/test/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ def test_output(self):

def test_cache_output(self):
with podpac.settings:
podpac.settings["CACHE_OUTPUT_DEFAULT"] = False
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False
node = Node()
assert not node.cache_output

podpac.settings["CACHE_OUTPUT_DEFAULT"] = True
podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = True
node = Node()
assert node.cache_output

Expand Down

0 comments on commit 0fcf40f

Please sign in to comment.