From d65526d6886067a8ef368e5b02fce80e1e4c4903 Mon Sep 17 00:00:00 2001 From: Adrian Holovaty Date: Sun, 9 Oct 2005 00:55:08 +0000 Subject: [PATCH] Fixed #580 -- Added mega support for generating Vary headers, including some view decorators, and changed the CacheMiddleware to account for the Vary header. Also added GZipMiddleware and ConditionalGetMiddleware, which are no longer handled by CacheMiddleware itself. Also updated the cache.txt and middleware.txt docs. Thanks to Hugo and Sune for the excellent patches git-svn-id: http://code.djangoproject.com/svn/django/trunk@810 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/middleware/cache.py | 116 +++++++++------------ django/middleware/gzip.py | 24 +++++ django/middleware/http.py | 37 +++++++ django/middleware/sessions.py | 2 + django/views/decorators/cache.py | 70 +++---------- django/views/decorators/gzip.py | 6 ++ django/views/decorators/http.py | 9 ++ docs/cache.txt | 174 +++++++++++++++++++++++++------ docs/middleware.txt | 12 +++ 9 files changed, 297 insertions(+), 153 deletions(-) create mode 100644 django/middleware/gzip.py create mode 100644 django/middleware/http.py create mode 100644 django/views/decorators/gzip.py create mode 100644 django/views/decorators/http.py diff --git a/django/middleware/cache.py b/django/middleware/cache.py index 7f4057eec762a..8216c40ae16a5 100644 --- a/django/middleware/cache.py +++ b/django/middleware/cache.py @@ -1,88 +1,70 @@ +import copy from django.conf import settings from django.core.cache import cache +from django.utils.cache import get_cache_key, learn_cache_key, patch_response_headers from django.utils.httpwrappers import HttpResponseNotModified -from django.utils.text import compress_string -import datetime, md5 class CacheMiddleware: """ Cache middleware. If this is enabled, each Django-powered page will be - cached for CACHE_MIDDLEWARE_SECONDS seconds. Cache is based on URLs. Pages - with GET or POST parameters are not cached. + cached for CACHE_MIDDLEWARE_SECONDS seconds. Cache is based on URLs. - If the cache is shared across multiple sites using the same Django - installation, set the CACHE_MIDDLEWARE_KEY_PREFIX to the name of the site, - or some other string that is unique to this Django instance, to prevent key - collisions. + Only parameter-less GET or HEAD-requests with status code 200 are cached. - This middleware will also make the following optimizations: + This middleware expects that a HEAD request is answered with a response + exactly like the corresponding GET request. - * If the CACHE_MIDDLEWARE_GZIP setting is True, the content will be - gzipped. + When a hit occurs, a shallow copy of the original response object is + returned from process_request. - * ETags will be added, using a simple MD5 hash of the page's content. + Pages will be cached based on the contents of the request headers + listed in the response's "Vary" header. This means that pages shouldn't + change their "Vary" header. + + This middleware also sets ETag, Last-Modified, Expires and Cache-Control + headers on the response object. """ + def __init__(self, cache_timeout=None, key_prefix=None): + self.cache_timeout = cache_timeout + if cache_timeout is None: + self.cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS + self.key_prefix = key_prefix + if key_prefix is None: + self.key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX + def process_request(self, request): - """ - Checks whether the page is already cached. If it is, returns the cached - version. Also handles ETag stuff. - """ - if request.GET or request.POST: - request._cache_middleware_set_cache = False + "Checks whether the page is already cached and returns the cached version if available." + if not request.META['REQUEST_METHOD'] in ('GET', 'HEAD') or request.GET: + request._cache_update_cache = False return None # Don't bother checking the cache. - accept_encoding = '' - if settings.CACHE_MIDDLEWARE_GZIP: - try: - accept_encoding = request.META['HTTP_ACCEPT_ENCODING'] - except KeyError: - pass - accepts_gzip = 'gzip' in accept_encoding - request._cache_middleware_accepts_gzip = accepts_gzip - - # This uses the same cache_key as views.decorators.cache.cache_page, - # so the cache can be shared. - cache_key = 'views.decorators.cache.cache_page.%s.%s.%s' % \ - (settings.CACHE_MIDDLEWARE_KEY_PREFIX, request.path, accepts_gzip) - request._cache_middleware_key = cache_key + cache_key = get_cache_key(request, self.key_prefix) + if cache_key is None: + request._cache_update_cache = True + return None # No cache information available, need to rebuild. response = cache.get(cache_key, None) if response is None: - request._cache_middleware_set_cache = True - return None - else: - request._cache_middleware_set_cache = False - # Logic is from http://simon.incutio.com/archive/2003/04/23/conditionalGet - try: - if_none_match = request.META['HTTP_IF_NONE_MATCH'] - except KeyError: - if_none_match = None - try: - if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE'] - except KeyError: - if_modified_since = None - if if_none_match is None and if_modified_since is None: - pass - elif if_none_match is not None and response['ETag'] != if_none_match: - pass - elif if_modified_since is not None and response['Last-Modified'] != if_modified_since: - pass - else: - return HttpResponseNotModified() - return response + request._cache_update_cache = True + return None # No cache information available, need to rebuild. + + request._cache_update_cache = False + return copy.copy(response) def process_response(self, request, response): - """ - Sets the cache, if needed. - """ - if request._cache_middleware_set_cache: - content = response.get_content_as_string(settings.DEFAULT_CHARSET) - if request._cache_middleware_accepts_gzip: - content = compress_string(content) - response.content = content - response['Content-Encoding'] = 'gzip' - response['ETag'] = md5.new(content).hexdigest() - response['Content-Length'] = '%d' % len(content) - response['Last-Modified'] = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT') - cache.set(request._cache_middleware_key, response, settings.CACHE_MIDDLEWARE_SECONDS) + "Sets the cache, if needed." + if not request._cache_update_cache: + # We don't need to update the cache, just return. + return response + if not request.META['REQUEST_METHOD'] == 'GET': + # This is a stronger requirement than above. It is needed + # because of interactions between this middleware and the + # HTTPMiddleware, which throws the body of a HEAD-request + # away before this middleware gets a chance to cache it. + return response + if not response.status_code == 200: + return response + patch_response_headers(response, self.cache_timeout) + cache_key = learn_cache_key(request, response, self.cache_timeout, self.key_prefix) + cache.set(cache_key, response, self.cache_timeout) return response diff --git a/django/middleware/gzip.py b/django/middleware/gzip.py new file mode 100644 index 0000000000000..201bec20007f7 --- /dev/null +++ b/django/middleware/gzip.py @@ -0,0 +1,24 @@ +import re +from django.utils.text import compress_string +from django.utils.cache import patch_vary_headers + +re_accepts_gzip = re.compile(r'\bgzip\b') + +class GZipMiddleware: + """ + This middleware compresses content if the browser allows gzip compression. + It sets the Vary header accordingly, so that caches will base their storage + on the Accept-Encoding header. + """ + def process_response(self, request, response): + patch_vary_headers(response, ('Accept-Encoding',)) + if response.has_header('Content-Encoding'): + return response + + ae = request.META.get('HTTP_ACCEPT_ENCODING', '') + if not re_accepts_gzip.search(ae): + return response + + response.content = compress_string(response.content) + response['Content-Encoding'] = 'gzip' + return response diff --git a/django/middleware/http.py b/django/middleware/http.py new file mode 100644 index 0000000000000..2bccd60903736 --- /dev/null +++ b/django/middleware/http.py @@ -0,0 +1,37 @@ +import datetime + +class ConditionalGetMiddleware: + """ + Handles conditional GET operations. If the response has a ETag or + Last-Modified header, and the request has If-None-Match or + If-Modified-Since, the response is replaced by an HttpNotModified. + + Removes the content from any response to a HEAD request. + + Also sets the Date and Content-Length response-headers. + """ + def process_response(self, request, response): + now = datetime.datetime.utcnow() + response['Date'] = now.strftime('%a, %d %b %Y %H:%M:%S GMT') + if not response.has_header('Content-Length'): + response['Content-Length'] = str(len(response.content)) + + if response.has_header('ETag'): + if_none_match = request.META.get('HTTP_IF_NONE_MATCH', None) + if if_none_match == response['ETag']: + response.status_code = 304 + response.content = '' + response['Content-Length'] = '0' + + if response.has_header('Last-Modified'): + last_mod = response['Last-Modified'] + if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE', None) + if if_modified_since == response['Last-Modified']: + response.status_code = 304 + response.content = '' + response['Content-Length'] = '0' + + if request.META['REQUEST_METHOD'] == 'HEAD': + response.content = '' + + return response diff --git a/django/middleware/sessions.py b/django/middleware/sessions.py index a588e3e95be97..42b2118410c20 100644 --- a/django/middleware/sessions.py +++ b/django/middleware/sessions.py @@ -1,5 +1,6 @@ from django.conf.settings import SESSION_COOKIE_NAME, SESSION_COOKIE_AGE, SESSION_COOKIE_DOMAIN from django.models.core import sessions +from django.utils.cache import patch_vary_headers import datetime TEST_COOKIE_NAME = 'testcookie' @@ -61,6 +62,7 @@ def process_request(self, request): def process_response(self, request, response): # If request.session was modified, or if response.session was set, save # those changes and set a session cookie. + patch_vary_headers(response, ('Cookie',)) try: modified = request.session.modified except AttributeError: diff --git a/django/views/decorators/cache.py b/django/views/decorators/cache.py index de80851363a59..09f9a0139f1a7 100644 --- a/django/views/decorators/cache.py +++ b/django/views/decorators/cache.py @@ -1,57 +1,17 @@ -from django.core.cache import cache -from django.utils.httpwrappers import HttpResponseNotModified -from django.utils.text import compress_string -from django.conf.settings import DEFAULT_CHARSET -import datetime, md5 +""" +Decorator for views that tries getting the page from the cache and +populates the cache if the page isn't in the cache yet. -def cache_page(view_func, cache_timeout, key_prefix=''): - """ - Decorator for views that tries getting the page from the cache and - populates the cache if the page isn't in the cache yet. Also takes care - of ETags and gzips the page if the client supports it. +The cache is keyed by the URL and some data from the headers. Additionally +there is the key prefix that is used to distinguish different cache areas +in a multi-site setup. You could use the sites.get_current().domain, for +example, as that is unique across a Django project. - The cache is keyed off of the page's URL plus the optional key_prefix - variable. Use key_prefix if your Django setup has multiple sites that - use cache; otherwise the cache for one site would affect the other. A good - example of key_prefix is to use sites.get_current().domain, because that's - unique across all Django instances on a particular server. - """ - def _check_cache(request, *args, **kwargs): - try: - accept_encoding = request.META['HTTP_ACCEPT_ENCODING'] - except KeyError: - accept_encoding = '' - accepts_gzip = 'gzip' in accept_encoding - cache_key = 'views.decorators.cache.cache_page.%s.%s.%s' % (key_prefix, request.path, accepts_gzip) - response = cache.get(cache_key, None) - if response is None: - response = view_func(request, *args, **kwargs) - content = response.get_content_as_string(DEFAULT_CHARSET) - if accepts_gzip: - content = compress_string(content) - response.content = content - response['Content-Encoding'] = 'gzip' - response['ETag'] = md5.new(content).hexdigest() - response['Content-Length'] = '%d' % len(content) - response['Last-Modified'] = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT') - cache.set(cache_key, response, cache_timeout) - else: - # Logic is from http://simon.incutio.com/archive/2003/04/23/conditionalGet - try: - if_none_match = request.META['HTTP_IF_NONE_MATCH'] - except KeyError: - if_none_match = None - try: - if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE'] - except KeyError: - if_modified_since = None - if if_none_match is None and if_modified_since is None: - pass - elif if_none_match is not None and response['ETag'] != if_none_match: - pass - elif if_modified_since is not None and response['Last-Modified'] != if_modified_since: - pass - else: - return HttpResponseNotModified() - return response - return _check_cache +Additionally, all headers from the response's Vary header will be taken into +account on caching -- just like the middleware does. +""" + +from django.utils.decorators import decorator_from_middleware +from django.middleware.cache import CacheMiddleware + +cache_page = decorator_from_middleware(CacheMiddleware) diff --git a/django/views/decorators/gzip.py b/django/views/decorators/gzip.py new file mode 100644 index 0000000000000..dc6edad049007 --- /dev/null +++ b/django/views/decorators/gzip.py @@ -0,0 +1,6 @@ +"Decorator for views that gzips pages if the client supports it." + +from django.utils.decorators import decorator_from_middleware +from django.middleware.gzip import GZipMiddleware + +gzip_page = decorator_from_middleware(GZipMiddleware) diff --git a/django/views/decorators/http.py b/django/views/decorators/http.py new file mode 100644 index 0000000000000..13062b630f95d --- /dev/null +++ b/django/views/decorators/http.py @@ -0,0 +1,9 @@ +""" +Decorator for views that supports conditional get on ETag and Last-Modified +headers. +""" + +from django.utils.decorators import decorator_from_middleware +from django.middleware.http import ConditionalGetMiddleware + +conditional_page = decorator_from_middleware(ConditionalGetMiddleware) diff --git a/docs/cache.txt b/docs/cache.txt index 0a7ee1c25af10..f690e5f90460b 100644 --- a/docs/cache.txt +++ b/docs/cache.txt @@ -2,25 +2,27 @@ Django's cache framework ======================== -So, you got slashdotted. Now what? +So, you got slashdotted_. Now what? Django's cache framework gives you three methods of caching dynamic pages in memory or in a database. You can cache the output of entire pages, you can cache only the pieces that are difficult to produce, or you can cache your entire site. +.. _slashdotted: http://en.wikipedia.org/wiki/Slashdot_effect + Setting up the cache ==================== -The cache framework is split into a set of "backends" that provide different -methods of caching data. There's a simple single-process memory cache (mostly -useful as a fallback) and a memcached_ backend (the fastest option, by far, if -you've got the RAM). +The cache framework allows for different "backends" -- different methods of +caching data. There's a simple single-process memory cache (mostly useful as a +fallback) and a memcached_ backend (the fastest option, by far, if you've got +the RAM). Before using the cache, you'll need to tell Django which cache backend you'd like to use. Do this by setting the ``CACHE_BACKEND`` in your settings file. -The CACHE_BACKEND setting is a "fake" URI (really an unregistered scheme). +The ``CACHE_BACKEND`` setting is a "fake" URI (really an unregistered scheme). Examples: ============================== =========================================== @@ -39,7 +41,7 @@ Examples: simple:/// A simple single-process memory cache; you probably don't want to use this except for testing. Note that this cache backend is - NOT threadsafe! + NOT thread-safe! locmem:/// A more sophisticated local memory cache; this is multi-process- and thread-safe. @@ -72,22 +74,24 @@ For example:: Invalid arguments are silently ignored, as are invalid values of known arguments. +.. _memcached: http://www.danga.com/memcached/ + The per-site cache ================== -Once the cache is set up, the simplest way to use the cache is to simply -cache your entire site. Just add ``django.middleware.cache.CacheMiddleware`` -to your ``MIDDLEWARE_CLASSES`` setting, as in this example:: +Once the cache is set up, the simplest way to use the cache is to cache your +entire site. Just add ``django.middleware.cache.CacheMiddleware`` to your +``MIDDLEWARE_CLASSES`` setting, as in this example:: MIDDLEWARE_CLASSES = ( "django.middleware.cache.CacheMiddleware", "django.middleware.common.CommonMiddleware", ) -Make sure it's the first entry in ``MIDDLEWARE_CLASSES``. (The order of -``MIDDLEWARE_CLASSES`` matters.) +(The order of ``MIDDLEWARE_CLASSES`` matters. See "Order of MIDDLEWARE_CLASSES" +below.) -Then, add the following three required settings: +Then, add the following three required settings to your Django settings file: * ``CACHE_MIDDLEWARE_SECONDS`` -- The number of seconds each page should be cached. @@ -102,16 +106,20 @@ Then, add the following three required settings: in the cache. That means subsequent requests won't have the overhead of zipping, and the cache will hold more pages because each one is smaller. -Pages with GET or POST parameters won't be cached. +The cache middleware caches every page that doesn't have GET or POST +parameters. Additionally, ``CacheMiddleware`` automatically sets a few headers +in each ``HttpResponse``: -The cache middleware also makes a few more optimizations: - -* Sets and deals with ``ETag`` headers. -* Sets the ``Content-Length`` header. * Sets the ``Last-Modified`` header to the current date/time when a fresh (uncached) version of the page is requested. +* Sets the ``Expires`` header to the current date/time plus the defined + ``CACHE_MIDDLEWARE_SECONDS``. +* Sets the ``Cache-Control`` header to give a max age for the page -- again, + from the ``CACHE_MIDDLEWARE_SECONDS`` setting. + +See the `middleware documentation`_ for more on middleware. -It doesn't matter where in the middleware stack you put the cache middleware. +.. _`middleware documentation`: http://www.djangoproject.com/documentation/middleware/ The per-page cache ================== @@ -134,25 +142,25 @@ Or, using Python 2.4's decorator syntax:: def slashdot_this(request): ... -This will cache the result of that view for 15 minutes. (The cache timeout is -in seconds.) +``cache_page`` takes a single argument: the cache timeout, in seconds. In the +above example, the result of the ``slashdot_this()`` view will be cached for 15 +minutes. The low-level cache API ======================= -There are times, however, that caching an entire rendered page doesn't gain -you very much. The Django developers have found it's only necessary to cache a -list of object IDs from an intensive database query, for example. In cases like -these, you can use the cache API to store objects in the cache with any level -of granularity you like. +Sometimes, however, caching an entire rendered page doesn't gain you very much. +For example, you may find it's only necessary to cache the result of an +intensive database. In cases like this, you can use the low-level cache API to +store objects in the cache with any level of granularity you like. The cache API is simple:: - # the cache module exports a cache object that's automatically - # created from the CACHE_BACKEND setting + # The cache module exports a cache object that's automatically + # created from the CACHE_BACKEND setting. >>> from django.core.cache import cache - # The basic interface is set(key, value, timeout_seconds) and get(key) + # The basic interface is set(key, value, timeout_seconds) and get(key). >>> cache.set('my_key', 'hello, world!', 30) >>> cache.get('my_key') 'hello, world!' @@ -161,7 +169,7 @@ The cache API is simple:: >>> cache.get('my_key') None - # get() can take a default argument + # get() can take a default argument. >>> cache.get('my_key', 'has_expired') 'has_expired' @@ -183,4 +191,108 @@ The cache API is simple:: That's it. The cache has very few restrictions: You can cache any object that can be pickled safely, although keys must be strings. -.. _memcached: http://www.danga.com/memcached/ +Controlling cache: Using Vary headers +===================================== + +The Django cache framework works with `HTTP Vary headers`_ to allow developers +to instruct caching mechanisms to differ their cache contents depending on +request HTTP headers. + +Essentially, the ``Vary`` response HTTP header defines which request headers a +cache mechanism should take into account when building its cache key. + +By default, Django's cache system creates its cache keys using the requested +path -- e.g., ``"/stories/2005/jun/23/bank_robbed/"``. This means every request +to that URL will use the same cached version, regardless of user-agent +differences such as cookies or language preferences. + +That's where ``Vary`` comes in. + +If your Django-powered page outputs different content based on some difference +in request headers -- such as a cookie, or language, or user-agent -- you'll +need to use the ``Vary`` header to tell caching mechanisms that the page output +depends on those things. + +To do this in Django, use the convenient ``vary_on_headers`` view decorator, +like so:: + + from django.views.decorators.vary import vary_on_headers + + # Python 2.3 syntax. + def my_view(request): + ... + my_view = vary_on_headers(my_view, 'User-Agent') + + # Python 2.4 decorator syntax. + @vary_on_headers('User-Agent') + def my_view(request): + ... + +In this case, a caching mechanism (such as Django's own cache middleware) will +cache a separate version of the page for each unique user-agent. + +The advantage to using the ``vary_on_headers`` decorator rather than manually +setting the ``Vary`` header (using something like +``response['Vary'] = 'user-agent'``) is that the decorator adds to the ``Vary`` +header (which may already exist) rather than setting it from scratch. + +Note that you can pass multiple headers to ``vary_on_headers()``: + + @vary_on_headers('User-Agent', 'Cookie') + def my_view(request): + ... + +Because varying on cookie is such a common case, there's a ``vary_on_cookie`` +decorator. These two views are equivalent:: + + @vary_on_cookie + def my_view(request): + ... + + @vary_on_headers('Cookie') + def my_view(request): + ... + +Also note that the headers you pass to ``vary_on_headers`` are not case +sensitive. ``"User-Agent"`` is the same thing as ``"user-agent"``. + +You can also use a helper function, ``patch_vary_headers()``, directly:: + + from django.utils.cache import patch_vary_headers + def my_view(request): + ... + response = render_to_response('template_name', context) + patch_vary_headers(response, ['Cookie']) + return response + +``patch_vary_headers`` takes an ``HttpResponse`` instance as its first argument +and a list/tuple of header names as its second argument. + +.. _`HTTP Vary headers`: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.44 + +Other optimizations +=================== + +Django comes with a few other pieces of middleware that can help optimize your +apps' performance: + + * ``django.middleware.http.ConditionalGetMiddleware`` adds support for + conditional GET. This makes use of ``ETag`` and ``Last-Modified`` + headers. + + * ``django.middleware.gzip.GZipMiddleware`` compresses content for browsers + that understand gzip compression (all modern browsers). + +Order of MIDDLEWARE_CLASSES +=========================== + +If you use ``CacheMiddleware``, it's important to put it in the right place +within the ``MIDDLEWARE_CLASSES`` setting, because the cache middleware needs +to know which headers by which to vary the cache storage. Middleware always +adds something the ``Vary`` response header when it can. + +Put the ``CacheMiddleware`` after any middlewares that might add something to +the ``Vary`` header. The following middlewares do so: + + * ``SessionMiddleware`` adds ``Cookie`` + * ``GzipMiddleware`` adds ``Accept-Encoding`` diff --git a/docs/middleware.txt b/docs/middleware.txt index f3901bb693bfa..21e62fa18cd88 100644 --- a/docs/middleware.txt +++ b/docs/middleware.txt @@ -88,6 +88,18 @@ Available middleware addresses defined in the ``INTERNAL_IPS`` setting. This is used by Django's automatic documentation system. +``django.middleware.gzip.GZipMiddleware`` + Compresses content for browsers that understand gzip compression (all + modern browsers). + +``django.middleware.http.ConditionalGetMiddleware`` + Handles conditional GET operations. If the response has a ``ETag`` or + ``Last-Modified`` header, and the request has ``If-None-Match`` or + ``If-Modified-Since``, the response is replaced by an HttpNotModified. + + Also removes the content from any response to a HEAD request and sets the + ``Date`` and ``Content-Length`` response-headers. + ``django.middleware.sessions.SessionMiddleware`` Enables session support. See the `session documentation`_.