Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fixed #20536 -- rewrite of the file based cache backend

 * Safer for use in multiprocess environments
 * Better random culling
 * Cache files use less disk space
 * Safer delete behavior

Also fixed #15806, fixed #15825.
commit 7be638390e18fcbfaaed638f9908673360c280d3 1 parent ac2d86f
Jaap Roes authored akaariai committed
View
226 django/core/cache/backends/filebased.py
@@ -1,156 +1,156 @@
"File-based cache backend"
-
+import errno
+import glob
import hashlib
+import io
import os
-import shutil
+import random
+import tempfile
import time
+import zlib
+from django.core.cache.backends.base import BaseCache, DEFAULT_TIMEOUT
+from django.core.files.move import file_move_safe
+from django.utils.encoding import force_bytes
try:
from django.utils.six.moves import cPickle as pickle
except ImportError:
import pickle
-from django.core.cache.backends.base import BaseCache, DEFAULT_TIMEOUT
-from django.utils.encoding import force_bytes
-
class FileBasedCache(BaseCache):
+ cache_suffix = '.djcache'
+
def __init__(self, dir, params):
- BaseCache.__init__(self, params)
- self._dir = dir
- if not os.path.exists(self._dir):
- self._createdir()
+ super(FileBasedCache, self).__init__(params)
+ self._dir = os.path.abspath(dir)
+ self._createdir()
def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
- if self.has_key(key, version=version):
+ if self.has_key(key, version):
return False
-
- self.set(key, value, timeout, version=version)
+ self.set(key, value, timeout, version)
return True
def get(self, key, default=None, version=None):
- key = self.make_key(key, version=version)
- self.validate_key(key)
-
- fname = self._key_to_file(key)
- try:
- with open(fname, 'rb') as f:
- exp = pickle.load(f)
- now = time.time()
- if exp is not None and exp < now:
- self._delete(fname)
- else:
- return pickle.load(f)
- except (IOError, OSError, EOFError, pickle.PickleError):
- pass
+ fname = self._key_to_file(key, version)
+ if os.path.exists(fname):
+ try:
+ with io.open(fname, 'rb') as f:
+ if not self._is_expired(f):
+ return pickle.loads(zlib.decompress(f.read()))
+ except IOError as e:
+ if e.errno == errno.ENOENT:
+ pass # Cache file was removed after the exists check
return default
def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
- key = self.make_key(key, version=version)
- self.validate_key(key)
-
- fname = self._key_to_file(key)
- dirname = os.path.dirname(fname)
-
- self._cull()
-
+ self._createdir() # Cache dir can be deleted at any time.
+ fname = self._key_to_file(key, version)
+ self._cull() # make some room if necessary
+ fd, tmp_path = tempfile.mkstemp(dir=self._dir)
+ renamed = False
try:
- if not os.path.exists(dirname):
- os.makedirs(dirname)
-
- with open(fname, 'wb') as f:
+ with io.open(fd, 'wb') as f:
expiry = self.get_backend_timeout(timeout)
- pickle.dump(expiry, f, pickle.HIGHEST_PROTOCOL)
- pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
- except (IOError, OSError):
- pass
+ f.write(pickle.dumps(expiry, -1))
+ f.write(zlib.compress(pickle.dumps(value), -1))
+ file_move_safe(tmp_path, fname, allow_overwrite=True)
+ renamed = True
+ finally:
+ if not renamed:
+ os.remove(tmp_path)
def delete(self, key, version=None):
- key = self.make_key(key, version=version)
- self.validate_key(key)
- try:
- self._delete(self._key_to_file(key))
- except (IOError, OSError):
- pass
+ self._delete(self._key_to_file(key, version))
def _delete(self, fname):
- os.remove(fname)
+ if not fname.startswith(self._dir) or not os.path.exists(fname):
+ return
try:
- # Remove the 2 subdirs if they're empty
- dirname = os.path.dirname(fname)
- os.rmdir(dirname)
- os.rmdir(os.path.dirname(dirname))
- except (IOError, OSError):
- pass
+ os.remove(fname)
+ except OSError as e:
+ # ENOENT can happen if the cache file is removed (by another
+ # process) after the os.path.exists check.
+ if e.errno != errno.ENOENT:
+ raise
def has_key(self, key, version=None):
- key = self.make_key(key, version=version)
- self.validate_key(key)
- fname = self._key_to_file(key)
- try:
- with open(fname, 'rb') as f:
- exp = pickle.load(f)
- now = time.time()
- if exp < now:
- self._delete(fname)
- return False
- else:
- return True
- except (IOError, OSError, EOFError, pickle.PickleError):
- return False
+ fname = self._key_to_file(key, version)
+ if os.path.exists(fname):
+ with io.open(fname, 'rb') as f:
+ return not self._is_expired(f)
+ return False
def _cull(self):
- if int(self._num_entries) < self._max_entries:
- return
-
- try:
- filelist = sorted(os.listdir(self._dir))
- except (IOError, OSError):
- return
-
+ """
+ Removes random cache entries if max_entries is reached at a ratio
+ of num_entries / cull_frequency. A value of 0 for CULL_FREQUENCY means
+ that the entire cache will be purged.
+ """
+ filelist = self._list_cache_files()
+ num_entries = len(filelist)
+ if num_entries < self._max_entries:
+ return # return early if no culling is required
if self._cull_frequency == 0:
- doomed = filelist
- else:
- doomed = [os.path.join(self._dir, k) for (i, k) in enumerate(filelist) if i % self._cull_frequency == 0]
-
- for topdir in doomed:
- try:
- for root, _, files in os.walk(topdir):
- for f in files:
- self._delete(os.path.join(root, f))
- except (IOError, OSError):
- pass
+ return self.clear() # Clear the cache when CULL_FREQUENCY = 0
+ # Delete a random selection of entries
+ filelist = random.sample(filelist,
+ int(num_entries / self._cull_frequency))
+ for fname in filelist:
+ self._delete(fname)
def _createdir(self):
- try:
- os.makedirs(self._dir)
- except OSError:
- raise EnvironmentError("Cache directory '%s' does not exist and could not be created'" % self._dir)
-
- def _key_to_file(self, key):
+ if not os.path.exists(self._dir):
+ try:
+ os.makedirs(self._dir, 0o700)
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise EnvironmentError(
+ "Cache directory '%s' does not exist "
+ "and could not be created'" % self._dir)
+
+ def _key_to_file(self, key, version=None):
+ """
+ Convert a key into a cache file path. Basically this is the
+ root cache path joined with the md5sum of the key and a suffix.
"""
- Convert the filename into an md5 string. We'll turn the first couple
- bits of the path into directory prefixes to be nice to filesystems
- that have problems with large numbers of files in a directory.
+ key = self.make_key(key, version=version)
+ self.validate_key(key)
+ return os.path.join(self._dir, ''.join(
+ [hashlib.md5(force_bytes(key)).hexdigest(), self.cache_suffix]))
- Thus, a cache key of "foo" gets turnned into a file named
- ``{cache-dir}ac/bd/18db4cc2f85cedef654fccc4a4d8``.
+ def clear(self):
+ """
+ Remove all the cache files.
"""
- path = hashlib.md5(force_bytes(key)).hexdigest()
- path = os.path.join(path[:2], path[2:4], path[4:])
- return os.path.join(self._dir, path)
+ if not os.path.exists(self._dir):
+ return
+ for fname in self._list_cache_files():
+ self._delete(fname)
- def _get_num_entries(self):
- count = 0
- for _, _, files in os.walk(self._dir):
- count += len(files)
- return count
- _num_entries = property(_get_num_entries)
+ def _is_expired(self, f):
+ """
+ Takes an open cache file and determines if it has expired,
+ deletes the file if it is has passed its expiry time.
+ """
+ exp = pickle.load(f)
+ if exp is not None and exp < time.time():
+ f.close() # On Windows a file has to be closed before deleting
+ self._delete(f.name)
+ return True
+ return False
+
+ def _list_cache_files(self):
+ """
+ Get a list of paths to all the cache files. These are all the files
+ in the root cache dir that end on the cache_suffix.
+ """
+ if not os.path.exists(self._dir):
+ return []
+ filelist = [os.path.join(self._dir, fname) for fname
+ in glob.glob1(self._dir, '*%s' % self.cache_suffix)]
+ return filelist
- def clear(self):
- try:
- shutil.rmtree(self._dir)
- except (IOError, OSError):
- pass
# For backwards compatibility
View
14 docs/topics/cache.txt
@@ -253,10 +253,11 @@ model.
Filesystem caching
------------------
-To store cached items on a filesystem, use
-``"django.core.cache.backends.filebased.FileBasedCache"`` for
-:setting:`BACKEND <CACHES-BACKEND>`. For example, to store cached data in
-``/var/tmp/django_cache``, use this setting::
+The file-based backend serializes and stores each cache value as a separate
+file. To use this backend set :setting:`BACKEND <CACHES-BACKEND>` to
+``"django.core.cache.backends.filebased.FileBasedCache"`` and
+:setting:`LOCATION <CACHES-LOCATION>` to a suitable directory. For example,
+to store cached data in ``/var/tmp/django_cache``, use this setting::
CACHES = {
'default': {
@@ -265,7 +266,6 @@ To store cached items on a filesystem, use
}
}
-
If you're on Windows, put the drive letter at the beginning of the path,
like this::
@@ -286,10 +286,6 @@ above example, if your server runs as the user ``apache``, make sure the
directory ``/var/tmp/django_cache`` exists and is readable and writable by the
user ``apache``.
-Each cache value will be stored as a separate file whose contents are the
-cache data saved in a serialized ("pickled") format, using Python's ``pickle``
-module. Each file's name is the cache key, escaped for safe filesystem use.
-
Local-memory caching
--------------------
View
46 tests/cache/tests.py
@@ -1076,32 +1076,34 @@ def setUp(self):
def tearDown(self):
self.cache.clear()
+ os.rmdir(self.dirname)
- def test_hashing(self):
- """Test that keys are hashed into subdirectories correctly"""
- self.cache.set("foo", "bar")
- key = self.cache.make_key("foo")
- keyhash = hashlib.md5(key.encode()).hexdigest()
- keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:])
- self.assertTrue(os.path.exists(keypath))
+ def test_cull(self):
+ self.perform_cull_test(50, 29)
- def test_subdirectory_removal(self):
- """
- Make sure that the created subdirectories are correctly removed when empty.
- """
- self.cache.set("foo", "bar")
- key = self.cache.make_key("foo")
- keyhash = hashlib.md5(key.encode()).hexdigest()
- keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:])
- self.assertTrue(os.path.exists(keypath))
+ def test_ignores_non_cache_files(self):
+ fname = os.path.join(self.dirname, 'not-a-cache-file')
+ with open(fname, 'w'):
+ os.utime(fname, None)
+ self.cache.clear()
+ self.assertTrue(os.path.exists(fname),
+ 'Expected cache.clear to ignore non cache files')
+ os.remove(fname)
- self.cache.delete("foo")
- self.assertTrue(not os.path.exists(keypath))
- self.assertTrue(not os.path.exists(os.path.dirname(keypath)))
- self.assertTrue(not os.path.exists(os.path.dirname(os.path.dirname(keypath))))
+ def test_clear_does_not_remove_cache_dir(self):
+ self.cache.clear()
+ self.assertTrue(os.path.exists(self.dirname),
+ 'Expected cache.clear to keep the cache dir')
- def test_cull(self):
- self.perform_cull_test(50, 29)
+ def test_creates_cache_dir_if_nonexistent(self):
+ os.rmdir(self.dirname)
+ self.cache.set('foo', 'bar')
+ os.path.exists(self.dirname)
+
+ def test_zero_cull(self):
+ # Regression test for #15806
+ self.cache = get_cache(self.backend_name, LOCATION=self.dirname, OPTIONS={'MAX_ENTRIES': 30, 'CULL_FREQUENCY': 0})
+ self.perform_cull_test(50, 19)
class CustomCacheKeyValidationTests(unittest.TestCase):
Please sign in to comment.
Something went wrong with that request. Please try again.