Permalink
Browse files

Cache all password (pwd) and group (grp) database lookups.

Thanks to Jann Horn <jannhorn@googlemail.com> for determining that
even just caching user names and group names could provide a notable
performance improvement in some cases.

Signed-off-by: Rob Browning <rlb@defaultvalue.org>
Reviewed-by: Zoran Zaric <zz@zoranzaric.de>
  • Loading branch information...
1 parent 042eaac commit 4b33b1406683f4ca4082aace980c753e19a71875 @rlbdv rlbdv committed Sep 23, 2012
Showing with 90 additions and 32 deletions.
  1. +75 −12 lib/bup/helpers.py
  2. +15 −20 lib/bup/metadata.py
View
@@ -1,7 +1,7 @@
"""Helper functions and classes for bup."""
import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
-import heapq, operator, time, platform
+import heapq, operator, time, platform, grp
from bup import _version, _helpers
import bup._helpers as _helpers
@@ -211,16 +211,82 @@ def is_superuser():
return os.geteuid() == 0
+def _cache_key_value(get_value, key, cache):
+ """Return (value, was_cached). If there is a value in the cache
+ for key, use that, otherwise, call get_value(key) which should
+ throw a KeyError if there is no value -- in which case the cached
+ and returned value will be None.
+ """
+ try: # Do we already have it (or know there wasn't one)?
+ value = cache[key]
+ return value, True
+ except KeyError:
+ pass
+ value = None
+ try:
+ cache[key] = value = get_value(key)
+ except KeyError:
+ cache[key] = None
+ return value, False
+
+
+_uid_to_pwd_cache = {}
+_name_to_pwd_cache = {}
+
+def pwd_from_uid(uid):
+ """Return password database entry for uid (may be a cached value).
+ Return None if no entry is found.
+ """
+ global _uid_to_pwd_cache, _name_to_pwd_cache
+ entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
+ if entry and not cached:
+ _name_to_pwd_cache[entry.pw_name] = entry
+ return entry
+
+
+def pwd_from_name(name):
+ """Return password database entry for name (may be a cached value).
+ Return None if no entry is found.
+ """
+ global _uid_to_pwd_cache, _name_to_pwd_cache
+ entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
+ if entry and not cached:
+ _uid_to_pwd_cache[entry.pw_uid] = entry
+ return entry
+
+
+_gid_to_grp_cache = {}
+_name_to_grp_cache = {}
+
+def grp_from_gid(gid):
+ """Return password database entry for gid (may be a cached value).
+ Return None if no entry is found.
+ """
+ global _gid_to_grp_cache, _name_to_grp_cache
+ entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
+ if entry and not cached:
+ _name_to_grp_cache[entry.gr_name] = entry
+ return entry
+
+
+def grp_from_name(name):
+ """Return password database entry for name (may be a cached value).
+ Return None if no entry is found.
+ """
+ global _gid_to_grp_cache, _name_to_grp_cache
+ entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
+ if entry and not cached:
+ _gid_to_grp_cache[entry.gr_gid] = entry
+ return entry
+
+
_username = None
def username():
"""Get the user's login name."""
global _username
if not _username:
uid = os.getuid()
- try:
- _username = pwd.getpwuid(uid)[0]
- except KeyError:
- _username = 'user%d' % uid
+ _username = pwd_from_uid(uid)[0] or 'user%d' % uid
return _username
@@ -230,14 +296,11 @@ def userfullname():
global _userfullname
if not _userfullname:
uid = os.getuid()
- try:
- entry = pwd.getpwuid(uid)
+ entry = pwd_from_uid(uid)
+ if entry:
_userfullname = entry[4].split(',')[0] or entry[0]
- except KeyError:
- pass
- finally:
- if not _userfullname:
- _userfullname = 'user%d' % uid
+ if not _userfullname:
+ _userfullname = 'user%d' % uid
return _userfullname
View
@@ -9,6 +9,7 @@
from bup import vint, xstat
from bup.drecurse import recursive_dirlist
from bup.helpers import add_error, mkdirp, log, is_superuser
+from bup.helpers import pwd_from_uid, pwd_from_name, grp_from_gid, grp_from_name
from bup.xstat import utime, lutime
try:
@@ -199,16 +200,12 @@ def _add_common(self, path, st):
self.mtime = st.st_mtime
self.ctime = st.st_ctime
self.user = self.group = ''
- # FIXME: should we be caching id -> user/group name mappings?
- # IIRC, tar uses some trick -- possibly caching the last pair.
- try:
- self.user = pwd.getpwuid(st.st_uid)[0]
- except KeyError, e:
- pass
- try:
- self.group = grp.getgrgid(st.st_gid)[0]
- except KeyError, e:
- pass
+ entry = pwd_from_uid(st.st_uid)
+ if entry:
+ self.user = entry.pw_name
+ entry = grp_from_gid(st.st_gid)
+ if entry:
+ self.group = entry.gr_name
self.mode = st.st_mode
def _same_common(self, other):
@@ -361,24 +358,22 @@ def _apply_common_rec(self, path, restore_numeric_ids=False):
gid = self.gid
if not restore_numeric_ids:
if self.uid != 0 and self.user:
- try:
- uid = pwd.getpwnam(self.user)[2]
- except KeyError:
- pass # Fall back to self.uid.
+ entry = pwd_from_name(self.user)
+ if entry:
+ uid = entry.pw_uid
if self.gid != 0 and self.group:
- try:
- gid = grp.getgrnam(self.group)[2]
- except KeyError:
- pass # Fall back to self.gid.
+ entry = grp_from_name(self.group)
+ if entry:
+ gid = entry.gr_gid
else: # not superuser - only consider changing the group/gid
user_gids = os.getgroups()
if self.gid in user_gids:
gid = self.gid
if not restore_numeric_ids and \
self.gid != 0 and \
- self.group in [grp.getgrgid(x)[0] for x in user_gids]:
+ self.group in [grp_from_gid(x).gr_name for x in user_gids]:
try:
- gid = grp.getgrnam(self.group)[2]
+ gid = grp_from_name(self.group).gr_gid
except KeyError:
pass # Fall back to gid.

0 comments on commit 4b33b14

Please sign in to comment.