Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fixed #6099: the filebased cache backend now uses md5 hashes of keys …

…instead of sanitized filenames. For good measure, keys are partitioned into subdirectories using the first few bits of the hash. Thanks, sherbang.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@6887 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit acfff050ecd9764943bc4444ee09b8b26df38168 1 parent 91ca6f2
@jacobian jacobian authored
View
110 django/core/cache/backends/filebased.py
@@ -1,12 +1,12 @@
"File-based cache backend"
+import md5
import os, time
try:
import cPickle as pickle
except ImportError:
import pickle
from django.core.cache.backends.base import BaseCache
-from django.utils.http import urlquote_plus
class CacheClass(BaseCache):
def __init__(self, dir, params):
@@ -29,24 +29,10 @@ def __init__(self, dir, params):
self._createdir()
def add(self, key, value, timeout=None):
- fname = self._key_to_file(key)
- if timeout is None:
- timeout = self.default_timeout
- try:
- filelist = os.listdir(self._dir)
- except (IOError, OSError):
- self._createdir()
- filelist = []
- if len(filelist) > self._max_entries:
- self._cull(filelist)
- if os.path.basename(fname) not in filelist:
- try:
- f = open(fname, 'wb')
- now = time.time()
- pickle.dump(now + timeout, f, 2)
- pickle.dump(value, f, 2)
- except (IOError, OSError):
- pass
+ if self.has_key(key):
+ return None
+
+ self.set(key, value, timeout)
def get(self, key, default=None):
fname = self._key_to_file(key)
@@ -56,7 +42,7 @@ def get(self, key, default=None):
now = time.time()
if exp < now:
f.close()
- os.remove(fname)
+ self._delete(fname)
else:
return pickle.load(f)
except (IOError, OSError, EOFError, pickle.PickleError):
@@ -65,40 +51,74 @@ def get(self, key, default=None):
def set(self, key, value, timeout=None):
fname = self._key_to_file(key)
+ dirname = os.path.dirname(fname)
+
if timeout is None:
timeout = self.default_timeout
+
+ self._cull()
+
try:
- filelist = os.listdir(self._dir)
- except (IOError, OSError):
- self._createdir()
- filelist = []
- if len(filelist) > self._max_entries:
- self._cull(filelist)
- try:
+ if not os.path.exists(dirname):
+ os.makedirs(dirname)
+
f = open(fname, 'wb')
now = time.time()
- pickle.dump(now + timeout, f, 2)
- pickle.dump(value, f, 2)
+ pickle.dump(now + timeout, f, pickle.HIGHEST_PROTOCOL)
+ pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
except (IOError, OSError):
pass
def delete(self, key):
try:
- os.remove(self._key_to_file(key))
+ self._delete(self._key_to_file(key))
+ except (IOError, OSError):
+ pass
+
+ def _delete(self, fname):
+ os.remove(fname)
+ try:
+ # Remove the 2 subdirs if they're empty
+ dirname = os.path.dirname(fname)
+ os.rmdir(dirname)
+ os.rmdir(os.path.dirname(dirname))
except (IOError, OSError):
pass
def has_key(self, key):
- return os.path.exists(self._key_to_file(key))
+ fname = self._key_to_file(key)
+ try:
+ f = open(fname, 'rb')
+ exp = pickle.load(f)
+ now = time.time()
+ if exp < now:
+ f.close()
+ self._delete(fname)
+ return False
+ else:
+ return True
+ except (IOError, OSError, EOFError, pickle.PickleError):
+ return False
- def _cull(self, filelist):
+ def _cull(self):
+ if int(self._num_entries) < self._max_entries:
+ return
+
+ try:
+ filelist = os.listdir(self._dir)
+ except (IOError, OSError):
+ return
+
if self._cull_frequency == 0:
doomed = filelist
else:
- doomed = [k for (i, k) in enumerate(filelist) if i % self._cull_frequency == 0]
- for fname in doomed:
+ doomed = [os.path.join(self._dir, k) for (i, k) in enumerate(filelist) if i % self._cull_frequency == 0]
+
+ for topdir in doomed:
try:
- os.remove(os.path.join(self._dir, fname))
+ for root, _, files in os.walk(topdir):
+ for f in files:
+ self._delete(os.path.join(root, f))
except (IOError, OSError):
pass
@@ -109,4 +129,22 @@ def _createdir(self):
raise EnvironmentError, "Cache directory '%s' does not exist and could not be created'" % self._dir
def _key_to_file(self, key):
- return os.path.join(self._dir, urlquote_plus(key))
+ """
+ Convert the filename into an md5 string. We'll turn the first couple
+ bits of the path into directory prefixes to be nice to filesystems
+ that have problems with large numbers of files in a directory.
+
+ Thus, a cache key of "foo" gets turnned into a file named
+ ``{cache-dir}ac/bd/18db4cc2f85cedef654fccc4a4d8``.
+ """
+ path = md5.new(key.encode('utf-8')).hexdigest()
+ path = os.path.join(path[:2], path[2:4], path[4:])
+ return os.path.join(self._dir, path)
+
+ def _get_num_entries(self):
+ count = 0
+ for _,_,files in os.walk(self._dir):
+ count += len(files)
+ return count
+ _num_entries = property(_get_num_entries)
+
View
58 tests/regressiontests/cache/tests.py
@@ -3,8 +3,8 @@
# Unit tests for cache framework
# Uses whatever cache backend is set in the test settings file.
-import time, unittest
-
+import time
+import unittest
from django.core.cache import cache
from django.utils.cache import patch_vary_headers
from django.http import HttpResponse
@@ -27,7 +27,7 @@ def test_add(self):
cache.add("addkey1", "value")
cache.add("addkey1", "newvalue")
self.assertEqual(cache.get("addkey1"), "value")
-
+
def test_non_existent(self):
# get with non-existent keys
self.assertEqual(cache.get("does_not_exist"), None)
@@ -76,10 +76,16 @@ def test_data_types(self):
self.assertEqual(cache.get("stuff"), stuff)
def test_expiration(self):
- # expiration
- cache.set('expire', 'very quickly', 1)
- time.sleep(2)
- self.assertEqual(cache.get("expire"), None)
+ cache.set('expire1', 'very quickly', 1)
+ cache.set('expire2', 'very quickly', 1)
+ cache.set('expire3', 'very quickly', 1)
+
+ time.sleep(2)
+ self.assertEqual(cache.get("expire1"), None)
+
+ cache.add("expire2", "newvalue")
+ self.assertEqual(cache.get("expire2"), "newvalue")
+ self.assertEqual(cache.has_key("expire3"), False)
def test_unicode(self):
stuff = {
@@ -92,6 +98,44 @@ def test_unicode(self):
cache.set(key, value)
self.assertEqual(cache.get(key), value)
+import os
+import md5
+import shutil
+import tempfile
+from django.core.cache.backends.filebased import CacheClass as FileCache
+
+class FileBasedCacheTests(unittest.TestCase):
+ """
+ Specific test cases for the file-based cache.
+ """
+ def setUp(self):
+ self.dirname = tempfile.mktemp()
+ os.mkdir(self.dirname)
+ self.cache = FileCache(self.dirname, {})
+
+ def tearDown(self):
+ shutil.rmtree(self.dirname)
+
+ def test_hashing(self):
+ """Test that keys are hashed into subdirectories correctly"""
+ self.cache.set("foo", "bar")
+ keyhash = md5.new("foo").hexdigest()
+ keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:])
+ self.assert_(os.path.exists(keypath))
+
+ def test_subdirectory_removal(self):
+ """
+ Make sure that the created subdirectories are correctly removed when empty.
+ """
+ self.cache.set("foo", "bar")
+ keyhash = md5.new("foo").hexdigest()
+ keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:])
+ self.assert_(os.path.exists(keypath))
+
+ self.cache.delete("foo")
+ self.assert_(not os.path.exists(keypath))
+ self.assert_(not os.path.exists(os.path.dirname(keypath)))
+ self.assert_(not os.path.exists(os.path.dirname(os.path.dirname(keypath))))
class CacheUtils(unittest.TestCase):
"""TestCase for django.utils.cache functions."""

0 comments on commit acfff05

Please sign in to comment.
Something went wrong with that request. Please try again.