Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Moved cache to ormcache

  • Loading branch information...
commit a9512f4f0f9b0ba87796f12539691e17e14e1797 1 parent d78be6e
David Cramer authored
View
6 README.rst
@@ -40,6 +40,12 @@ The row-level cache is primarily managed by the ``CachedModel`` class. This clas
* objects -- a CacheManager instance.
* nocache -- the default manager instance.
+Usage::
+
+ from ormcache.models import CachedModel
+ class Article(CachedModel):
+ ...
+
QuerySet Caches
---------------
View
2  ormcache/__init__.py
@@ -0,0 +1,2 @@
+from manager import CacheManager
+from models import CachedModel
View
19 ormcache/exceptions.py
@@ -0,0 +1,19 @@
+class CachedModelException(Exception): pass
+
+
+# Our invalidation classes
+class CacheInvalidationWarning(CachedModelException): pass
+
+class CacheMissingWarning(CacheInvalidationWarning):
+ """
+ CacheMissingWarning is thrown when we're trying to fetch a queryset
+ and it's missing objects in the database.
+ """
+ pass
+
+class CacheExpiredWarning(CacheInvalidationWarning):
+ """
+ CacheExpiredWarning is thrown when we're trying to fetch from the cache
+ but the pre-expiration has been hit.
+ """
+ pass
View
28 ormcache/manager.py
@@ -0,0 +1,28 @@
+from django.db.models.manager import Manager
+from query import CachedQuerySet
+
+class CacheManager(Manager):
+ """
+ A manager to store and retrieve cached objects using CACHE_BACKEND
+
+ <string key_prefix> -- the key prefix for all cached objects on this model. [default: db_table]
+ <int timeout> -- in seconds, the maximum time before data is invalidated. [default: DEFAULT_CACHE_TIME]
+ """
+ def __init__(self, *args, **kwargs):
+ self.key_prefix = kwargs.pop('key_prefix', None)
+ self.timeout = kwargs.pop('timeout', None)
+ super(CacheManager, self).__init__(*args, **kwargs)
+
+ def get_query_set(self):
+ return CachedQuerySet(model=self.model, timeout=self.timeout, key_prefix=self.key_prefix)
+
+ def cache(self, *args, **kwargs):
+ return self.get_query_set().cache(*args, **kwargs)
+
+ def clean(self, *args, **kwargs):
+ # Use reset instead if you are using memcached, as clean makes no sense (extra bandwidth when
+ # memcached will automatically clean iself).
+ return self.get_query_set().clean(*args, **kwargs)
+
+ def reset(self, *args, **kwargs):
+ return self.get_query_set().reset(*args, **kwargs)
View
166 ormcache/models.py
@@ -0,0 +1,166 @@
+from django.db.models.manager import Manager
+from django.db.models.base import ModelBase, Model
+from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
+from django.db.models.fields import FieldDoesNotExist
+from django.db.models.options import Options
+from django.db.models import signals
+from django.db.models.loading import register_models, get_model
+from django.dispatch import dispatcher
+from django.utils.functional import curry
+from django.conf import settings
+
+from django.core.cache import cache
+
+import types
+import sys
+
+from manager import CacheManager
+from utils import get_cache_key_for_pk
+
+DEFAULT_CACHE_TIME = 60*60*60 # the maximum an item should be in the cache
+
+# Signals rundown:
+# .cache(expire_on=['create', 'update', 'delete'])
+# use namespaces possible so the cache key becomes key_name:expire_namespace(not always present):hash
+
+# for example, a call with no expires:
+# db_table:hash
+
+# a call with a delete expires
+# db_table:0,0,0:hash
+
+# the numbers represent our current namespace level for the 3 expiration methods
+# in order to do this, we'd have to actually store another cache key per model
+# and to support threading, query that cache key everytime we do any cache queryset
+# hit
+# e.g. cache.get('ns:db_table') = 0,0,0
+
+# when a new row is created, we'd set that to 1,0,0
+# which would invalidate anything that had a create expiration set because the key is
+# now invalid, because the namespace changed.
+
+# if you only had create expirations set
+# your namespace would be :0: -- its all about the queryset call, you still have to
+# call it the same way throughout your code
+
+# We can also add a table namespace, which says "delete everything" so our
+# cache key now becomes db_table:ns_count:0,0,0:hash
+# where the 0,0,0: is optional
+
+# ns_count would be stored in the same ns:db_table key and starts at 0
+# this would most likely only be incremented if you did a push to your site
+# and needed to say wipe all articles because the dataset changed.
+
+class CachedModelBase(ModelBase):
+ # TODO: find a way to not overwrite __new__ like this
+ def __new__(cls, name, bases, attrs):
+ # If this isn't a subclass of CachedModel, don't do anything special.
+ try:
+ if not filter(lambda b: issubclass(b, CachedModel), bases):
+ return super(CachedModelBase, cls).__new__(cls, name, bases, attrs)
+ except NameError:
+ # 'CachedModel' isn't defined yet, meaning we're looking at Django's own
+ # Model class, defined below.
+ return super(CachedModelBase, cls).__new__(cls, name, bases, attrs)
+
+ # Create the class.
+ new_class = type.__new__(cls, name, bases, {'__module__': attrs.pop('__module__')})
+ new_class.add_to_class('_meta', Options(attrs.pop('Meta', None)))
+ new_class.add_to_class('DoesNotExist', types.ClassType('DoesNotExist', (ObjectDoesNotExist,), {}))
+ new_class.add_to_class('MultipleObjectsReturned',
+ types.ClassType('MultipleObjectsReturned', (MultipleObjectsReturned, ), {}))
+
+ # Build complete list of parents
+ for base in bases:
+ # TODO: Checking for the presence of '_meta' is hackish.
+ if '_meta' in dir(base):
+ new_class._meta.parents.append(base)
+ new_class._meta.parents.extend(base._meta.parents)
+
+
+ if getattr(new_class._meta, 'app_label', None) is None:
+ # Figure out the app_label by looking one level up.
+ # For 'django.contrib.sites.models', this would be 'sites'.
+ model_module = sys.modules[new_class.__module__]
+ new_class._meta.app_label = model_module.__name__.split('.')[-2]
+
+ # Bail out early if we have already created this class.
+ m = get_model(new_class._meta.app_label, name, False)
+ if m is not None:
+ return m
+
+ # Add all attributes to the class.
+ for obj_name, obj in attrs.items():
+ new_class.add_to_class(obj_name, obj)
+
+ # Add Fields inherited from parents
+ for parent in new_class._meta.parents:
+ for field in parent._meta.fields:
+ # Only add parent fields if they aren't defined for this class.
+ try:
+ new_class._meta.get_field(field.name)
+ except FieldDoesNotExist:
+ field.contribute_to_class(new_class, field.name)
+
+ new_class._prepare()
+
+ register_models(new_class._meta.app_label, new_class)
+ # Because of the way imports happen (recursively), we may or may not be
+ # the first class for this model to register with the framework. There
+ # should only be one class for each model, so we must always return the
+ # registered version.
+ return get_model(new_class._meta.app_label, name, False)
+
+class CachedModel(Model):
+ """
+ docstring for CachedModel
+ """
+ __metaclass__ = CachedModelBase
+
+# objects = CacheManager()
+# nocache = Manager()
+
+ # Maybe this would work?
+ @classmethod
+ def _prepare(cls):
+ # TODO: How do we extend the parent classes classmethod properly?
+ # super(CachedModel, cls)._prepare() errors
+ opts = cls._meta
+ opts._prepare(cls)
+
+ if opts.order_with_respect_to:
+ cls.get_next_in_order = curry(cls._get_next_or_previous_in_order, is_next=True)
+ cls.get_previous_in_order = curry(cls._get_next_or_previous_in_order, is_next=False)
+ setattr(opts.order_with_respect_to.rel.to, 'get_%s_order' % cls.__name__.lower(), curry(method_get_order, cls))
+ setattr(opts.order_with_respect_to.rel.to, 'set_%s_order' % cls.__name__.lower(), curry(method_set_order, cls))
+
+ # Give the class a docstring -- its definition.
+ if cls.__doc__ is None:
+ cls.__doc__ = "%s(%s)" % (cls.__name__, ", ".join([f.attname for f in opts.fields]))
+
+ if hasattr(cls, 'get_absolute_url'):
+ cls.get_absolute_url = curry(get_absolute_url, opts, cls.get_absolute_url)
+
+ cls.add_to_class('objects', CacheManager())
+ cls.add_to_class('nocache', Manager())
+ cls.add_to_class('_default_manager', cls.nocache)
+ dispatcher.send(signal=signals.class_prepared, sender=cls)
+
+ @staticmethod
+ def _get_cache_key_for_pk(model, pk):
+ return get_cache_key_for_pk(model, pk)
+
+ @property
+ def cache_key(self):
+ return self._get_cache_key_for_pk(self.__class__, self.pk)
+
+ def save(self, *args, **kwargs):
+ cache.set(self._get_cache_key_for_pk(self.__class__, self.pk), self)
+ super(CachedModel, self).save(*args, **kwargs)
+
+ def delete(self, *args, **kwargs):
+ # TODO: create an option that tells the model whether or not it should
+ # do a cache.delete when the object is deleted. For memcached we
+ # wouldn't care about deleting.
+ cache.delete(self._get_cache_key_for_pk(self.__class__, self.pk))
+ super(CachedModel, self).delete(*args, **kwargs)
View
302 ormcache/query.py
@@ -0,0 +1,302 @@
+from django.db.models.query import QuerySet, GET_ITERATOR_CHUNK_SIZE
+from django.db import backend, connection
+from django.core.cache import cache
+from django.conf import settings
+
+from utils import get_cache_key_for_pk
+from exceptions import CacheMissingWarning
+
+# TODO: if the query is passing pks then we need to make it pull the cache key from the model
+# and try to fetch that first
+# if there are additional filters to apply beyond pks we then filter those after we're already pulling the pks
+
+# TODO: should we also run these additional filters each time we pull back a ref list to check for validation?
+
+# TODO: all related field calls need to be removed and replaced with cache key sets of some sorts
+# (just remove the join and make it do another qs.filter(pk__in) to pull them, which would do a many cache get callb)
+
+DEFAULT_CACHE_TIME = 60*60*24 # 24 hours
+
+class FauxCachedQuerySet(list):
+ """
+ We generate a FauxCachedQuerySet when we are returning a
+ CachedQuerySet from a CachedModel.
+ """
+ pass
+
+class CachedQuerySet(QuerySet):
+ """
+ Extends the QuerySet object and caches results via CACHE_BACKEND.
+ """
+ def __init__(self, model=None, key_prefix=None, timeout=None, key_name=None, *args, **kwargs):
+ self._cache_keys = {}
+ self._cache_reset = False
+ self._cache_clean = False
+ if key_prefix:
+ self.cache_key_prefix = key_prefix
+ else:
+ if model:
+ self.cache_key_prefix = model._meta.db_table
+ else:
+ self.cache_key_prefix = ''
+ self.cache_key_name = key_name
+ if timeout:
+ self.cache_timeout = timeout
+ else:
+ self.cache_timeout = getattr(cache, 'default_timeout', getattr(settings, 'DEFAULT_CACHE_TIME', DEFAULT_CACHE_TIME))
+ QuerySet.__init__(self, model, *args, **kwargs)
+
+ def _clone(self, klass=None, **kwargs):
+ c = QuerySet._clone(self, klass, **kwargs)
+ c._cache_clean = kwargs.pop('_cache_clean', self._cache_clean)
+ c._cache_reset = kwargs.pop('_cache_reset', self._cache_reset)
+ c.cache_key_prefix = kwargs.pop('cache_key_prefix', self.cache_key_prefix)
+ c.cache_timeout = kwargs.pop('cache_timeout', self.cache_timeout)
+ c._cache_keys = {}
+ return c
+
+ def _get_sorted_clause_key(self):
+ return (isinstance(i, basestring) and i.lower().replace('`', '').replace("'", '') or str(tuple(sorted(i))) for i in self._get_sql_clause())
+
+ def _get_cache_key(self, extra=''):
+ # TODO: Need to figure out if this is the best use.
+ # Maybe we should use extra for cache_key_name, extra was planned for use
+ # in things like .count() as it's a different cache key than the normal queryset,
+ # but that also doesn't make sense because theoretically count() is already different
+ # sql so the sorted_sql_clause should have figured that out.
+ if self.cache_key_name is not None:
+ return '%s:%s' % (self.cache_key_prefix, self.cache_key_name)
+ if extra not in self._cache_keys:
+ self._cache_keys[extra] = '%s:%s:%s' % (self.cache_key_prefix, str(hash(''.join(self._get_sorted_clause_key()))), extra)
+ return self._cache_keys[extra]
+
+ def _prepare_queryset_for_cache(self, queryset):
+ """
+ This is where the magic happens. We need to first see if our result set
+ is in the cache. If it isn't, we need to do the query and set the cache
+ to (ModelClass, (*<pks>,), (*<select_related fields>,), <n keys>).
+ """
+ # TODO: make this split up large sets of data based on an option
+ # and sets the last param, keys, to how many datasets are stored
+ # in the cache to regenerate.
+ keys = tuple(obj.pk for obj in queryset)
+ if self._select_related:
+ if not self._max_related_depth:
+ fields = [f.name for f in opts.fields if f.rel and not f.null]
+ else:
+ # TODO: handle depth relate lookups
+ fields = ()
+ else:
+ fields = ()
+
+ return (queryset[0].__class__, keys, fields, 1)
+
+ def _get_queryset_from_cache(self, cache_object):
+ """
+ We transform the cache storage into an actual QuerySet object
+ automagickly handling the keys depth and select_related fields (again,
+ using the recursive methods of CachedQuerySet).
+
+ We effectively would just be doing a cache.multi_get(*pks), grabbing
+ the pks for each releation, e.g. user, and then doing a
+ CachedManager.objects.filter() on them. This also then makes that
+ queryset reusable. So the question is, should that queryset have been
+ reusable? It could be invalidated by some other code which we aren't
+ tieing directly into the parent queryset so maybe we can't do the
+ objects.filter() query here and we have to do it internally.
+ """
+ # TODO: make this work for people who have, and who don't have, instance caching
+ model, keys, fields, length = cache_object
+
+ results = self._get_objects_for_keys(model, keys)
+
+ if fields:
+ # TODO: optimize this so it's only one get_many call instead of one per select_related field
+ # XXX: this probably isn't handling depth beyond 1, didn't test even depth of 1 yet
+ for f in fields:
+ field = model._meta.get_field(f)
+ field_results = dict((r.id, r) for r in self._get_objects_for_keys(f.rel.to, [getattr(r, field.db_column) for r in results]))
+ for r in results:
+ setattr(r, f.name, field_results[getattr(r, field.db_column)])
+ return results
+
+ def _get_objects_for_keys(self, model, keys):
+ # First we fetch any keys that we can from the cache
+ results = cache.get_many([get_cache_key_for_pk(model, k) for k in keys]).values()
+
+ # Now we need to compute which keys weren't present in the cache
+ missing = [k for k in results.iterkeys() if not results[k]]
+
+ # We no longer need to know what the keys were so turn it into a list
+ results = list(results)
+ # Query for any missing objects
+ # TODO: should this only be doing the cache.set if it's from a CachedModel?
+ # if not then we need to expire it, hook signals?
+ objects = list(model._default_manager.filter(pk__in=missing))
+ for o in objects:
+ cache.set(o.cache_key, o)
+ results.extend(objects)
+
+ # Do a simple len() lookup (maybe we shouldn't rely on it returning the right
+ # number of objects
+ cnt = len(missing) - len(objects)
+ if cnt:
+ raise CacheMissingWarning("%d objects missing in the database" % (cnt,))
+ return results
+
+ def _get_data(self):
+ ck = self._get_cache_key()
+ if self._result_cache is None or self._cache_clean or self._cache_reset:
+ if self._cache_clean:
+ cache.delete(ck)
+ return
+ if self._cache_reset:
+ result_cache = None
+ else:
+ result_cache = cache.get(ck)
+ if result_cache is None:
+ # We need to lookup the initial table queryset, without related
+ # fields selected. We then need to loop through each field which
+ # should be selected and doing another CachedQuerySet() call for
+ # each set of data.
+
+ # This will allow it to transparently, and recursively, handle
+ # all calls to the cache.
+
+ # We will use _prepare_queryset_for_cache to store it in the
+ # the cache, and _get_queryset_from_cache to pull it.
+
+ # Maybe we should override getstate and setstate instead?
+
+ # We first have to remove select_related values from the QuerySet
+ # as we don't want to pull these in to the dataset as they may already exist
+ # in memory.
+
+ # TODO: create a function that works w/ our patch and Django trunk which will
+ # grab the select_related fields for us given X model and (Y list or N depth).
+
+ # TODO: find a clean way to say "is this only matching pks?" if it is we wont
+ # need to store a result set in memory but we'll need to apply the filters by hand.
+ qs = QuerySet._clone(QuerySet(), **self.__dict__)
+ self._result_cache = qs._get_data()
+ self._cache_reset = False
+ cache.set(ck, self._prepare_queryset_for_cache(self._result_cache), self.cache_timeout*60)
+ else:
+ try:
+ self._result_cache = self._get_queryset_from_cache(result_cache)
+ except CacheMissingWarning:
+ # When an object is missing we reset the cached list.
+ # TODO: this should be some kind of option at a global and model level.
+ return self.reset()._get_data()
+ return FauxCachedQuerySet(self._result_cache)
+
+ def execute(self):
+ """
+ Forces execution on the queryset
+ """
+ self._get_data()
+ return self
+
+ def get(self, *args, **kwargs):
+ """
+ Performs the SELECT and returns a single object matching the given
+ keyword arguments.
+ """
+ if self._cache_clean:
+ clone = self.filter(*args, **kwargs)
+ if not clone._order_by:
+ clone._order_by = ()
+ cache.delete(self._get_cache_key())
+ else:
+ return QuerySet.get(self, *args, **kwargs)
+
+ def clean(self):
+ """
+ Removes queryset from the cache upon execution.
+ """
+ return self._clone(_cache_clean=True)
+
+ def count(self):
+ return QuerySet.count(self)
+ count = cache.get(self._get_cache_key('count'))
+ if count is None:
+ count = int(QuerySet.count(self))
+ cache.set(self._get_cache_key('count'), count, self.cache_timeout)
+ return count
+
+ def cache(self, *args, **kwargs):
+ """
+ Overrides CacheManager's options for this QuerySet.
+
+ <string key_prefix> -- the key prefix for all cached objects
+ on this model. [default: db_table]
+ <int timeout> -- in seconds, the maximum time before data is
+ invalidated.
+ <string key_name> -- the key suffix for this cached queryset
+ useful if you want to cache the same queryset with two expiration
+ methods.
+ """
+ return self._clone(cache_key_prefix=kwargs.pop('key_prefix', self.cache_key_prefix), cache_timeout=kwargs.pop('timeout', self.cache_timeout), cache_key_name=kwargs.pop('key_name', self.cache_key_name))
+
+ def reset(self):
+ """
+ Updates the queryset in the cache upon execution.
+ """
+ return self._clone(_cache_reset=True)
+
+ def values(self, *fields):
+ return self._clone(klass=CachedValuesQuerySet, _fields=fields)
+
+# need a better way to do this.. (will mix-ins work?)
+class CachedValuesQuerySet(CachedQuerySet):
+ def __init__(self, *args, **kwargs):
+ super(CachedQuerySet, self).__init__(*args, **kwargs)
+ # select_related isn't supported in values().
+ self._select_related = False
+
+ def iterator(self):
+ try:
+ select, sql, params = self._get_sql_clause()
+ except EmptyResultSet:
+ raise StopIteration
+
+ # self._fields is a list of field names to fetch.
+ if self._fields:
+ #columns = [self.model._meta.get_field(f, many_to_many=False).column for f in self._fields]
+ if not self._select:
+ columns = [self.model._meta.get_field(f, many_to_many=False).column for f in self._fields]
+ else:
+ columns = []
+ for f in self._fields:
+ if f in [field.name for field in self.model._meta.fields]:
+ columns.append( self.model._meta.get_field(f, many_to_many=False).column )
+ elif not self._select.has_key( f ):
+ raise FieldDoesNotExist, '%s has no field named %r' % ( self.model._meta.object_name, f )
+
+ field_names = self._fields
+ else: # Default to all fields.
+ columns = [f.column for f in self.model._meta.fields]
+ field_names = [f.column for f in self.model._meta.fields]
+
+ select = ['%s.%s' % (backend.quote_name(self.model._meta.db_table), backend.quote_name(c)) for c in columns]
+
+ # Add any additional SELECTs.
+ if self._select:
+ select.extend(['(%s) AS %s' % (quote_only_if_word(s[1]), backend.quote_name(s[0])) for s in self._select.items()])
+
+ if getattr(self, '_db_use_master', False):
+ cursor = connection.write_cursor()
+ else:
+ cursor = connection.read_cursor()
+ cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
+ while 1:
+ rows = cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)
+ if not rows:
+ raise StopIteration
+ for row in rows:
+ yield dict(zip(field_names, row))
+
+ def _clone(self, klass=None, **kwargs):
+ c = super(CachedValuesQuerySet, self)._clone(klass, **kwargs)
+ c._fields = self._fields[:]
+ return c
View
2  ormcache/utils.py
@@ -0,0 +1,2 @@
+def get_cache_key_for_pk(model, pk):
+ return '%s:%s' % (model._meta.db_table, pk)
Please sign in to comment.
Something went wrong with that request. Please try again.