Skip to content

Commit

Permalink
Merge e4c4a18 into 6d774e6
Browse files Browse the repository at this point in the history
  • Loading branch information
jmilloy committed Apr 15, 2020
2 parents 6d774e6 + e4c4a18 commit 07dd315
Show file tree
Hide file tree
Showing 13 changed files with 583 additions and 176 deletions.
135 changes: 68 additions & 67 deletions podpac/core/cache/cache_ctrl.py
Expand Up @@ -4,13 +4,19 @@

import podpac
from podpac.core.settings import settings

from podpac.core.cache.utils import CacheWildCard, CacheException
from podpac.core.cache.ram_cache_store import RamCacheStore
from podpac.core.cache.disk_cache_store import DiskCacheStore
from podpac.core.cache.s3_cache_store import S3CacheStore


_CACHE_STORES = {"ram": RamCacheStore, "disk": DiskCacheStore, "s3": S3CacheStore}

_CACHE_NAMES = {RamCacheStore: "ram", DiskCacheStore: "disk", S3CacheStore: "s3"}

_CACHE_MODES = ["ram", "disk", "network", "all"]


def get_default_cache_ctrl():
"""
Get the default CacheCtrl according to the settings.
Expand All @@ -27,46 +33,39 @@ def get_default_cache_ctrl():
return make_cache_ctrl(settings["DEFAULT_CACHE"])


def make_cache_ctrl(stores):
def make_cache_ctrl(names):
"""
Make a cache_ctrl from a list of cache store types.
Arguments
---------
stores : str or list
cache store or stores, e.g. 'ram' or ['ram', 'disk'].
names : str or list
cache name or names, e.g. 'ram' or ['ram', 'disk'].
Returns
-------
ctrl : CacheCtrl
CachCtrl using the specified cache stores
CachCtrl using the specified cache names
"""

if isinstance(stores, six.string_types):
stores = [stores]
if isinstance(names, six.string_types):
names = [names]

cache_stores = []
for elem in stores:
if elem == "ram":
cache_stores.append(RamCacheStore())
elif elem == "disk":
cache_stores.append(DiskCacheStore())
elif elem == "s3":
cache_stores.append(S3CacheStore())
else:
raise ValueError("Unknown cache store type '%s'" % elem)
for name in names:
if name not in _CACHE_STORES:
raise ValueError("Unknown cache store type '%s', options are %s" % (name, list(_CACHE_STORES)))

return CacheCtrl(cache_stores)
return CacheCtrl([_CACHE_STORES[name]() for name in names])


def clear_cache(mode=None):
def clear_cache(mode="all"):
"""
Clear the entire default cache_ctrl.
Arguments
---------
mode : str
determines what types of the `CacheStore` are affected: 'ram','disk','network','all'.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
"""

cache_ctrl = get_default_cache_ctrl()
Expand All @@ -88,20 +87,22 @@ def __init__(self, cache_stores=[]):
Parameters
----------
cache_stores : list, optional
list of CacheStore objects to manage, in the order that they should be interogated.
list of CacheStore objects to manage, in the order that they should be interrogated.
"""

self._cache_stores = cache_stores
self._cache_mode = None

def _get_cache_stores(self, mode):
if mode is None:
mode = self._cache_mode
if mode is None:
mode = "all"
def __repr__(self):
return "CacheCtrl(cache_stores=%s)" % self.cache_stores

@property
def cache_stores(self):
return [_CACHE_NAMES[store.__class__] for store in self._cache_stores]

def _get_cache_stores_by_mode(self, mode="all"):
return [c for c in self._cache_stores if mode in c.cache_modes]

def put(self, node, data, key, coordinates=None, mode=None, update=False):
def put(self, node, data, key, coordinates=None, mode="all", update=True):
"""Cache data for specified node.
Parameters
Expand All @@ -115,30 +116,30 @@ def put(self, node, data, key, coordinates=None, mode=None, update=False):
coordinates : Coordinates, optional
Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output
mode : str
determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
update : bool
If True existing data in cache will be updated with `data`, If False, error will be thrown if attempting put something into the cache with the same node, key, coordinates of an existing entry.
"""

if not isinstance(node, podpac.Node):
raise TypeError("node must of type 'Node', not '%s'" % type(Node))
raise TypeError("Invalid node (must be of type Node, not '%s')" % type(node))

if not isinstance(key, six.string_types):
raise TypeError("key must be a string type, not '%s'" % (type(key)))
raise TypeError("Invalid key (must be a string, not '%s')" % (type(key)))

if not isinstance(coordinates, podpac.Coordinates) and coordinates is not None:
raise TypeError("coordinates must be of type 'Coordinates', not '%s'" % type(coordinates))
raise TypeError("Invalid coordinates (must be of type 'Coordinates', not '%s')" % type(coordinates))

if not isinstance(mode, six.string_types) and mode is not None:
raise TypeError("mode must be of type 'str', not '%s'" % type(mode))
if mode not in _CACHE_MODES:
raise ValueError("Invalid mode (must be one of %s, not '%s')" % (_CACHE_MODES, mode))

if key == "*":
raise ValueError("key cannot be '*'")
raise ValueError("Invalid key ('*' is reserved)")

for c in self._get_cache_stores(mode):
for c in self._get_cache_stores_by_mode(mode):
c.put(node=node, data=data, key=key, coordinates=coordinates, update=update)

def get(self, node, key, coordinates=None, mode=None):
def get(self, node, key, coordinates=None, mode="all"):
"""Get cached data for this node.
Parameters
Expand All @@ -150,7 +151,7 @@ def get(self, node, key, coordinates=None, mode=None):
coordinates : Coordinates, optional
Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output
mode : str
determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
Returns
-------
Expand All @@ -164,26 +165,26 @@ def get(self, node, key, coordinates=None, mode=None):
"""

if not isinstance(node, podpac.Node):
raise TypeError("node must of type 'Node', not '%s'" % type(Node))
raise TypeError("Invalid node (must be of type Node, not '%s')" % type(node))

if not isinstance(key, six.string_types):
raise TypeError("key must be a string type, not '%s'" % (type(key)))
raise TypeError("Invalid key (must be a string, not '%s')" % (type(key)))

if not isinstance(coordinates, podpac.Coordinates) and coordinates is not None:
raise TypeError("coordinates must be of type 'Coordinates', not '%s'" % type(coordinates))
raise TypeError("Invalid coordinates (must be of type 'Coordinates', not '%s')" % type(coordinates))

if not isinstance(mode, six.string_types) and mode is not None:
raise TypeError("mode must be of type 'str', not '%s'" % type(mode))
if mode not in _CACHE_MODES:
raise ValueError("Invalid mode (must be one of %s, not '%s')" % (_CACHE_MODES, mode))

if key == "*":
raise ValueError("key cannot be '*'")
raise ValueError("Invalid key ('*' is reserved)")

for c in self._get_cache_stores(mode):
for c in self._get_cache_stores_by_mode(mode):
if c.has(node=node, key=key, coordinates=coordinates):
return c.get(node=node, key=key, coordinates=coordinates)
raise CacheException("Requested data is not in any cache stores.")

def has(self, node, key, coordinates=None, mode=None):
def has(self, node, key, coordinates=None, mode="all"):
"""Check for cached data for this node
Parameters
Expand All @@ -195,7 +196,7 @@ def has(self, node, key, coordinates=None, mode=None):
coordinates: Coordinate, optional
Coordinates for which cached object should be checked
mode : str
determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
Returns
-------
Expand All @@ -204,27 +205,27 @@ def has(self, node, key, coordinates=None, mode=None):
"""

if not isinstance(node, podpac.Node):
raise TypeError("node must of type 'Node', not '%s'" % type(Node))
raise TypeError("Invalid node (must be of type Node, not '%s')" % type(node))

if not isinstance(key, six.string_types):
raise TypeError("key must be a string type, not '%s'" % (type(key)))
raise TypeError("Invalid key (must be a string, not '%s')" % (type(key)))

if not isinstance(coordinates, podpac.Coordinates) and coordinates is not None:
raise TypeError("coordinates must be of type 'Coordinates', not '%s'" % type(coordinates))
raise TypeError("Invalid coordinates (must be of type 'Coordinates', not '%s')" % type(coordinates))

if not isinstance(mode, six.string_types) and mode is not None:
raise TypeError("mode must be of type 'str', not '%s'" % type(mode))
if mode not in _CACHE_MODES:
raise ValueError("Invalid mode (must be one of %s, not '%s')" % (_CACHE_MODES, mode))

if key == "*":
raise ValueError("key cannot be '*'")
raise ValueError("Invalid key ('*' is reserved)")

for c in self._get_cache_stores(mode):
for c in self._get_cache_stores_by_mode(mode):
if c.has(node=node, key=key, coordinates=coordinates):
return True

return False

def rem(self, node, key, coordinates=None, mode=None):
def rem(self, node, key, coordinates=None, mode="all"):
"""Delete cached data for this node.
Parameters
Expand All @@ -236,42 +237,42 @@ def rem(self, node, key, coordinates=None, mode=None):
coordinates : Coordinates, str
Delete only cached objects for these coordinates. Use `'*'` to match all coordinates.
mode : str
determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
"""

if not isinstance(node, podpac.Node):
raise TypeError("node must of type 'Node', not '%s'" % type(podpac.Node))
raise TypeError("Invalid node (must be of type Node, not '%s')" % type(node))

if not isinstance(key, six.string_types):
raise TypeError("key must be a string type, not '%s'" % (type(key)))
raise TypeError("Invalid key (must be a string, not '%s')" % (type(key)))

if not isinstance(coordinates, podpac.Coordinates) and coordinates is not None and coordinates != "*":
raise TypeError("coordinates must be '*' or of type 'Coordinates' not '%s'" % type(coordinates))
raise TypeError("Invalid coordinates (must be '*' or of type 'Coordinates', not '%s')" % type(coordinates))

if not isinstance(mode, six.string_types) and mode is not None:
raise TypeError("mode must be of type 'str', not '%s'" % type(mode))
if mode not in _CACHE_MODES:
raise ValueError("Invalid mode (must be one of %s, not '%s')" % (_CACHE_MODES, mode))

if key == "*":
key = CacheWildCard()

if coordinates == "*":
coordinates = CacheWildCard()

for c in self._get_cache_stores(mode):
for c in self._get_cache_stores_by_mode(mode):
c.rem(node=node, key=key, coordinates=coordinates)

def clear(self, mode=None):
def clear(self, mode="all"):
"""
Clear all cached data.
Parameters
------------
mode : str
determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
"""

if not isinstance(mode, six.string_types) and mode is not None:
raise TypeError("mode must be of type 'str', not '%s'" % type(mode))
if mode not in _CACHE_MODES:
raise ValueError("Invalid mode (must be one of %s, not '%s')" % (_CACHE_MODES, mode))

for c in self._get_cache_stores(mode):
for c in self._get_cache_stores_by_mode(mode):
c.clear()
2 changes: 1 addition & 1 deletion podpac/core/cache/cache_store.py
Expand Up @@ -29,7 +29,7 @@ def size(self):

raise NotImplementedError

def put(self, node, data, key, coordinates=None, update=False):
def put(self, node, data, key, coordinates=None, update=True):
"""Cache data for specified node.
Parameters
Expand Down
10 changes: 5 additions & 5 deletions podpac/core/cache/file_cache_store.py
Expand Up @@ -36,7 +36,7 @@ class FileCacheStore(CacheStore):
# public cache API methods
# -----------------------------------------------------------------------------------------------------------------

def put(self, node, data, key, coordinates=None, update=False):
def put(self, node, data, key, coordinates=None, update=True):
"""Cache data for specified node.
Parameters
Expand All @@ -54,10 +54,10 @@ def put(self, node, data, key, coordinates=None, update=False):
"""

# check for existing entry
if self.has(node, key, coordinates):
if not update:
raise CacheException("Cache entry already exists. Use `update=True` to overwrite.")
self.rem(node, key, coordinates)
if not update and self.has(node, key, coordinates):
raise CacheException("Cache entry already exists. Use `update=True` to overwrite.")

self.rem(node, key, coordinates)

# serialize
path_root = self._path_join(self._get_node_dir(node), self._get_filename(node, key, coordinates))
Expand Down
9 changes: 5 additions & 4 deletions podpac/core/cache/ram_cache_store.py
Expand Up @@ -55,7 +55,7 @@ def size(self):
process = psutil.Process(os.getpid())
return process.memory_info().rss # this is actually the total size of the process

def put(self, node, data, key, coordinates=None, update=False):
def put(self, node, data, key, coordinates=None, update=True):
"""Cache data for specified node.
Parameters
Expand All @@ -77,9 +77,10 @@ def put(self, node, data, key, coordinates=None, update=False):

full_key = self._get_full_key(node, key, coordinates)

if full_key in _thread_local.cache:
if not update:
raise CacheException("Cache entry already exists. Use update=True to overwrite.")
if not update and full_key in _thread_local.cache:
raise CacheException("Cache entry already exists. Use update=True to overwrite.")

self.rem(node, key, coordinates)

if self.max_size is not None and self.size >= self.max_size:
# # TODO removal policy
Expand Down

0 comments on commit 07dd315

Please sign in to comment.