Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge pull request #12 from andersk/deforkify

Use git cat-file --batch and git log --name-status to avoid repeated fork+exec
  • Loading branch information...
commit 722342ef03639415d7a1dc0230239d34cb97d988 2 parents d7a50e2 + 9541c2a
@hvr authored
Showing with 109 additions and 29 deletions.
  1. +87 −10 tracext/git/PyGIT.py
  2. +22 −19 tracext/git/git_fs.py
View
97 tracext/git/PyGIT.py
@@ -14,6 +14,7 @@
from threading import Lock
from subprocess import Popen, PIPE
from operator import itemgetter
+from contextlib import contextmanager
import cStringIO
import codecs
@@ -48,24 +49,34 @@ def __build_git_cmd(self, gitcmd, *args):
return cmd
+ def __pipe(self, git_cmd, *cmd_args, **kw):
+ if sys.platform == "win32":
+ return Popen(self.__build_git_cmd(git_cmd, *cmd_args), **kw)
+ else:
+ return Popen(self.__build_git_cmd(git_cmd, *cmd_args),
+ close_fds=True, **kw)
+
def __execute(self, git_cmd, *cmd_args):
"execute git command and return file-like object of stdout"
#print >>sys.stderr, "DEBUG:", git_cmd, cmd_args
- if sys.platform == "win32":
- p = Popen(self.__build_git_cmd(git_cmd, *cmd_args),
- stdin=None, stdout=PIPE, stderr=PIPE)
- else:
- p = Popen(self.__build_git_cmd(git_cmd, *cmd_args),
- stdin=None, stdout=PIPE, stderr=PIPE, close_fds=True)
+ p = self.__pipe(git_cmd, *cmd_args, stdout=PIPE, stderr=PIPE)
stdout_data, stderr_data = p.communicate()
#TODO, do something with p.returncode, e.g. raise exception
return stdout_data
+ def cat_file_batch(self):
+ return self.__pipe('cat-file', '--batch', stdin=PIPE, stdout=PIPE)
+
+ def log_pipe(self, *cmd_args):
+ return self.__pipe('log', *cmd_args, stdout=PIPE)
+
def __getattr__(self, name):
+ if name[0] == '_' or name in ['cat_file_batch', 'log_pipe']:
+ raise AttributeError, name
return partial(self.__execute, name.replace('_','-'))
__is_sha_pat = re.compile(r'[0-9A-Fa-f]*$')
@@ -254,6 +265,13 @@ def __init__(self, git_dir, log, git_bin='git', git_fs_encoding=None):
self.__commit_msg_cache = SizedDict(200)
self.__commit_msg_lock = Lock()
+ self.__cat_file_pipe = None
+
+ def __del__(self):
+ if self.__cat_file_pipe is not None:
+ self.__cat_file_pipe.stdin.close()
+ self.__cat_file_pipe.wait()
+
#
# cache handling
#
@@ -473,6 +491,20 @@ def head(self):
"get current HEAD commit id"
return self.verifyrev("HEAD")
+ def cat_file(self, kind, sha):
+ if self.__cat_file_pipe is None:
+ self.__cat_file_pipe = self.repo.cat_file_batch()
+
+ self.__cat_file_pipe.stdin.write(sha + '\n')
+ self.__cat_file_pipe.stdin.flush()
+ _sha, _type, _size = self.__cat_file_pipe.stdout.readline().split()
+
+ if _type != kind:
+ raise TracError("internal error (got unexpected object kind '%s')" % k)
+
+ size = int(_size)
+ return self.__cat_file_pipe.stdout.read(size + 1)[:size]
+
def verifyrev(self, rev):
"verify/lookup given revision object and return a sha id or None if lookup failed"
rev = str(rev)
@@ -494,7 +526,7 @@ def verifyrev(self, rev):
return rc
if rc in _rev_cache.tag_set:
- sha = self.repo.cat_file("tag", rc).split(None, 2)[:2]
+ sha = self.cat_file("tag", rc).split(None, 2)[:2]
if sha[0] != 'object':
self.logger.debug("unexpected result from 'git-cat-file tag %s'" % rc)
return None
@@ -604,7 +636,7 @@ def read_commit(self, commit_id):
return result[0], dict(result[1])
# cache miss
- raw = self.repo.cat_file("commit", commit_id)
+ raw = self.cat_file("commit", commit_id)
raw = unicode(raw, self.get_commit_encoding(), 'replace')
lines = raw.splitlines()
@@ -625,7 +657,7 @@ def read_commit(self, commit_id):
return result[0], dict(result[1])
def get_file(self, sha):
- return cStringIO.StringIO(self.repo.cat_file("blob", str(sha)))
+ return cStringIO.StringIO(self.cat_file("blob", str(sha)))
def get_obj_size(self, sha):
sha = str(sha)
@@ -685,7 +717,52 @@ def sync(self):
rev = self.repo.rev_list("--max-count=1", "--topo-order", "--all").strip()
return self.__rev_cache_sync(rev)
- def last_change(self, sha, path):
+ @contextmanager
+ def get_historian(self, sha, base_path):
+ p = []
+ change = {}
+ next_path = []
+
+ def name_status_gen():
+ p[:] = [self.repo.log_pipe('--pretty=format:%n%H', '--name-status',
+ sha, '--', base_path)]
+ f = p[0].stdout
+ for l in f:
+ if l == '\n': continue
+ old_sha = l.rstrip('\n')
+ for l in f:
+ if l == '\n': break
+ _, path = l.rstrip('\n').split('\t', 1)
+ while path not in change:
+ change[path] = old_sha
+ if next_path == [path]: yield old_sha
+ try:
+ path, _ = path.rsplit('/', 1)
+ except ValueError:
+ break
+ f.close()
+ p[0].terminate()
+ p[0].wait()
+ p[:] = []
+ while True: yield None
+ gen = name_status_gen()
+
+ def historian(path):
+ try:
+ return change[path]
+ except KeyError:
+ next_path[:] = [path]
+ return gen.next()
+ yield historian
+
+ if p:
+ p[0].stdout.close()
+ p[0].terminate()
+ p[0].wait()
+
+ def last_change(self, sha, path, historian=None):
+ if historian is not None:
+ return historian(path)
return self.repo.rev_list("--max-count=1",
sha, "--",
self._fs_from_unicode(path)).strip() or None
View
41 tracext/git/git_fs.py
@@ -387,8 +387,8 @@ def display_rev(self, rev):
def short_rev(self, rev):
return self.git.shortrev(self.normalize_rev(rev), min_len=self._shortrev_len)
- def get_node(self, path, rev=None):
- return GitNode(self, path, rev, self.log)
+ def get_node(self, path, rev=None, historian=None):
+ return GitNode(self, path, rev, self.log, None, historian)
def get_quickjump_entries(self, rev):
for bname, bsha in self.git.get_branches():
@@ -412,24 +412,26 @@ def get_changes(self, old_path, old_rev, new_path, new_rev, ignore_ancestry=0):
if old_path != new_path:
raise TracError("not supported in git_fs")
- for chg in self.git.diff_tree(old_rev, new_rev, self.normalize_path(new_path)):
- mode1, mode2, obj1, obj2, action, path, path2 = chg
+ with self.git.get_historian(old_rev, old_path.strip('/')) as old_historian:
+ with self.git.get_historian(new_rev, new_path.strip('/')) as new_historian:
+ for chg in self.git.diff_tree(old_rev, new_rev, self.normalize_path(new_path)):
+ mode1, mode2, obj1, obj2, action, path, path2 = chg
- kind = Node.FILE
- if mode2.startswith('04') or mode1.startswith('04'):
- kind = Node.DIRECTORY
+ kind = Node.FILE
+ if mode2.startswith('04') or mode1.startswith('04'):
+ kind = Node.DIRECTORY
- change = GitChangeset.action_map[action]
+ change = GitChangeset.action_map[action]
- old_node = None
- new_node = None
+ old_node = None
+ new_node = None
- if change != Changeset.ADD:
- old_node = self.get_node(path, old_rev)
- if change != Changeset.DELETE:
- new_node = self.get_node(path, new_rev)
+ if change != Changeset.ADD:
+ old_node = self.get_node(path, old_rev, old_historian)
+ if change != Changeset.DELETE:
+ new_node = self.get_node(path, new_rev, new_historian)
- yield old_node, new_node, kind, change
+ yield old_node, new_node, kind, change
def next_rev(self, rev, path=''):
return self.git.hist_next_revision(rev)
@@ -469,7 +471,7 @@ def sync(self, rev_callback=None, clean=None):
rev_callback(rev)
class GitNode(Node):
- def __init__(self, repos, path, rev, log, ls_tree_info=None):
+ def __init__(self, repos, path, rev, log, ls_tree_info=None, historian=None):
self.log = log
self.repos = repos
self.fs_sha = None # points to either tree or blobs
@@ -491,7 +493,7 @@ def __init__(self, repos, path, rev, log, ls_tree_info=None):
self.fs_perm, k, self.fs_sha, self.fs_size, _ = ls_tree_info
# fix-up to the last commit-rev that touched this node
- rev = repos.git.last_change(rev, p)
+ rev = repos.git.last_change(rev, p, historian)
if k == 'tree':
pass
@@ -537,8 +539,9 @@ def get_entries(self):
if not self.isdir:
return
- for ent in self.repos.git.ls_tree(self.rev, self.__git_path()):
- yield GitNode(self.repos, ent[-1], self.rev, self.log, ent)
+ with self.repos.git.get_historian(self.rev, self.path.strip('/')) as historian:
+ for ent in self.repos.git.ls_tree(self.rev, self.__git_path()):
+ yield GitNode(self.repos, ent[-1], self.rev, self.log, ent, historian)
def get_content_type(self):
if self.isdir:
Please sign in to comment.
Something went wrong with that request. Please try again.