Skip to content
Browse files

* sshelve performance improvements

  * update() is faster. Should be preferred over many individual inserts
  * clear() is faster
  • Loading branch information...
1 parent 8e92f6a commit 4649c3f4dd57b84311579da86b1274cc5b05e014 @jfennell jfennell committed Feb 7, 2011
Showing with 119 additions and 2 deletions.
  1. +5 −0 CHANGES.txt
  2. +1 −1 sqlite3dbm/__init__.py
  3. +1 −1 sqlite3dbm/dbm.py
  4. +29 −0 sqlite3dbm/sshelve.py
  5. +83 −0 tests/sshelve_test.py
View
5 CHANGES.txt
@@ -1,3 +1,8 @@
+### v0.1.3, 2011-02-07 -- Improve shelve update/clear performance
+* sshelve performance improvements
+ * update() is faster. Should be preferred over many individual inserts
+ * clear() is faster
+
### v0.1.2, 2011-01-28 -- Bumb version number
* Do a full minor bump for pypi
View
2 sqlite3dbm/__init__.py
@@ -18,7 +18,7 @@
"""
__author__ = 'Jason Fennell <jfennell@yelp.com>'
-__version__ = '0.1.2'
+__version__ = '0.1.3'
import sqlite3dbm.dbm as dbm
import sqlite3dbm.sshelve as sshelve
View
2 sqlite3dbm/dbm.py
@@ -425,7 +425,7 @@ def kv_gen():
for k, v in kwargs.iteritems():
yield k, v
- rows = [(k, v) for k, v in kv_gen()]
+ rows = list(kv_gen())
# Do all the inserts in a single transaction for the sake of efficiency
# TODO: Compare preformance of INSERT MANY to many INSERTS. Will
View
29 sqlite3dbm/sshelve.py
@@ -77,6 +77,35 @@ def select(self, *args):
for v in self.dict.select(*args)
]
+ # Performance override: we want to batch writes into one transaction
+ def update(self, *args, **kwargs):
+ # Copied from sqlite3dbm.dbm
+ def kv_gen():
+ """Generator that combines all the args for easy iteration."""
+ for arg in args:
+ if isinstance(arg, dict):
+ for k, v in arg.iteritems():
+ yield k, v
+ else:
+ for k, v in arg:
+ yield k, v
+
+ for k, v in kwargs.iteritems():
+ yield k, v
+ inserts = list(kv_gen())
+
+ if self.writeback:
+ self.cache.update(inserts)
+
+ self.dict.update([
+ (k, dumps(v, protocol=self._protocol))
+ for k, v in inserts
+ ])
+
+ # Performance override: clear in one sqlite command
+ def clear(self):
+ self.dict.clear()
+
def open(filename, flag='c', mode=0666, protocol=None, writeback=False):
"""Open a persistent sqlite3-backed dictionary. The *filename* specificed
is the path to the underlying database.
View
83 tests/sshelve_test.py
@@ -18,6 +18,7 @@
import os
import shutil
import tempfile
+import time
import testify
@@ -86,6 +87,34 @@ def test_get_many(self):
['fennell', droid, 0]
)
+ def test_update(self):
+ droid = ['R2-D2', 'C-3P0']
+ self.smap_shelf.update({
+ 'jason': 'fennell',
+ 'droid': droid,
+ 'pi': 3.14
+ })
+
+ testify.assert_equal(self.smap_shelf['jason'], 'fennell')
+ testify.assert_equal(self.smap_shelf['droid'], droid)
+ testify.assert_equal(self.smap_shelf['pi'], 3.14)
+
+ def test_clear(self):
+ droid = ['R2-D2', 'C-3P0']
+ self.smap_shelf.update({
+ 'jason': 'fennell',
+ 'droid': droid,
+ 'pi': 3.14
+ })
+
+ testify.assert_equal(self.smap_shelf['jason'], 'fennell')
+ testify.assert_equal(len(self.smap_shelf), 3)
+
+ self.smap_shelf.clear()
+
+ testify.assert_equal(len(self.smap_shelf), 0)
+ testify.assert_not_in('jason', self.smap_shelf)
+
def test_preserves_unicode(self):
"""Be paranoid about unicode."""
k = u'café'.encode('utf-8')
@@ -111,5 +140,59 @@ def test_open(self):
smap_shelf['foo'] = ['bar', 'baz', 'qux']
testify.assert_equal(smap_shelf['foo'], ['bar', 'baz', 'qux'])
+
+class TestShelfPerf(testify.TestCase):
+ @testify.setup
+ def create_environ(self):
+ self.tmpdir = tempfile.mkdtemp()
+
+ @testify.teardown
+ def teardown(self):
+ shutil.rmtree(self.tmpdir)
+
+ def test_update_perf(self):
+ """update() should be faster than lots of individual inserts"""
+
+ # Knobs that control how long this test takes vs. how accurate it is
+ # This test *should not flake*, but if you run into problems then you
+ # should increase `insert_per_iter` (the test will take longer though)
+ num_iters = 5
+ insert_per_iter = 300
+ min_ratio = 10
+
+ # Setup dbs
+ def setup_dbs(name):
+ name = name + '%d'
+ db_paths = [
+ os.path.join(self.tmpdir, name % i)
+ for i in xrange(num_iters)
+ ]
+ return [sqlite3dbm.sshelve.open(path) for path in db_paths]
+ update_dbs = setup_dbs('update')
+ insert_dbs = setup_dbs('insert')
+
+ # Setup data
+ insert_data = [
+ ('foo%d' % i, 'bar%d' % i)
+ for i in xrange(insert_per_iter)
+ ]
+
+ # Time upates
+ update_start = time.time()
+ for update_db in update_dbs:
+ update_db.update(insert_data)
+ update_time = time.time() - update_start
+
+ # Time inserts
+ insert_start = time.time()
+ for insert_db in insert_dbs:
+ for k, v in insert_data:
+ insert_db[k] = v
+ insert_time = time.time() - insert_start
+
+ # Inserts should take a subsantially greater amount of time
+ testify.assert_gt(insert_time, min_ratio*update_time)
+
+
if __name__ == '__main__':
testify.run()

0 comments on commit 4649c3f

Please sign in to comment.
Something went wrong with that request. Please try again.