Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Use git cat-file --batch and git log --name-status to avoid repeated fork+exec #12

Merged
merged 3 commits into from

2 participants

@andersk

This significantly improves performance, especially for a Git repository stored on a network filesystem with many loose objects, by launching fewer copies of git and hence reading the repository fewer times.

andersk added some commits
@andersk andersk GitCore: Abstract out __pipe
Signed-off-by: Anders Kaseorg <andersk@mit.edu>
aa80f45
@andersk andersk Use git cat-file --batch, to avoid repeated fork+exec
Signed-off-by: Anders Kaseorg <andersk@mit.edu>
0283ffb
@andersk andersk last_change: Use git log --name-status to avoid repeated fork+exec
Signed-off-by: Anders Kaseorg <andersk@mit.edu>
9541c2a
@hvr
Owner

Good work, thanks!

@hvr hvr merged commit 722342e into hvr:master
@hvr
Owner

@andersk Could you maybe take a look at http://trac-hacks.org/ticket/9560 this commit seems to have broken Python 2.5 support

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Nov 18, 2011
  1. @andersk

    GitCore: Abstract out __pipe

    andersk authored
    Signed-off-by: Anders Kaseorg <andersk@mit.edu>
  2. @andersk

    Use git cat-file --batch, to avoid repeated fork+exec

    andersk authored
    Signed-off-by: Anders Kaseorg <andersk@mit.edu>
Commits on Nov 19, 2011
  1. @andersk

    last_change: Use git log --name-status to avoid repeated fork+exec

    andersk authored
    Signed-off-by: Anders Kaseorg <andersk@mit.edu>
This page is out of date. Refresh to see the latest.
Showing with 109 additions and 29 deletions.
  1. +87 −10 tracext/git/PyGIT.py
  2. +22 −19 tracext/git/git_fs.py
View
97 tracext/git/PyGIT.py
@@ -14,6 +14,7 @@
from threading import Lock
from subprocess import Popen, PIPE
from operator import itemgetter
+from contextlib import contextmanager
import cStringIO
import codecs
@@ -48,24 +49,34 @@ def __build_git_cmd(self, gitcmd, *args):
return cmd
+ def __pipe(self, git_cmd, *cmd_args, **kw):
+ if sys.platform == "win32":
+ return Popen(self.__build_git_cmd(git_cmd, *cmd_args), **kw)
+ else:
+ return Popen(self.__build_git_cmd(git_cmd, *cmd_args),
+ close_fds=True, **kw)
+
def __execute(self, git_cmd, *cmd_args):
"execute git command and return file-like object of stdout"
#print >>sys.stderr, "DEBUG:", git_cmd, cmd_args
- if sys.platform == "win32":
- p = Popen(self.__build_git_cmd(git_cmd, *cmd_args),
- stdin=None, stdout=PIPE, stderr=PIPE)
- else:
- p = Popen(self.__build_git_cmd(git_cmd, *cmd_args),
- stdin=None, stdout=PIPE, stderr=PIPE, close_fds=True)
+ p = self.__pipe(git_cmd, *cmd_args, stdout=PIPE, stderr=PIPE)
stdout_data, stderr_data = p.communicate()
#TODO, do something with p.returncode, e.g. raise exception
return stdout_data
+ def cat_file_batch(self):
+ return self.__pipe('cat-file', '--batch', stdin=PIPE, stdout=PIPE)
+
+ def log_pipe(self, *cmd_args):
+ return self.__pipe('log', *cmd_args, stdout=PIPE)
+
def __getattr__(self, name):
+ if name[0] == '_' or name in ['cat_file_batch', 'log_pipe']:
+ raise AttributeError, name
return partial(self.__execute, name.replace('_','-'))
__is_sha_pat = re.compile(r'[0-9A-Fa-f]*$')
@@ -254,6 +265,13 @@ def __init__(self, git_dir, log, git_bin='git', git_fs_encoding=None):
self.__commit_msg_cache = SizedDict(200)
self.__commit_msg_lock = Lock()
+ self.__cat_file_pipe = None
+
+ def __del__(self):
+ if self.__cat_file_pipe is not None:
+ self.__cat_file_pipe.stdin.close()
+ self.__cat_file_pipe.wait()
+
#
# cache handling
#
@@ -473,6 +491,20 @@ def head(self):
"get current HEAD commit id"
return self.verifyrev("HEAD")
+ def cat_file(self, kind, sha):
+ if self.__cat_file_pipe is None:
+ self.__cat_file_pipe = self.repo.cat_file_batch()
+
+ self.__cat_file_pipe.stdin.write(sha + '\n')
+ self.__cat_file_pipe.stdin.flush()
+ _sha, _type, _size = self.__cat_file_pipe.stdout.readline().split()
+
+ if _type != kind:
+ raise TracError("internal error (got unexpected object kind '%s')" % k)
+
+ size = int(_size)
+ return self.__cat_file_pipe.stdout.read(size + 1)[:size]
+
def verifyrev(self, rev):
"verify/lookup given revision object and return a sha id or None if lookup failed"
rev = str(rev)
@@ -494,7 +526,7 @@ def verifyrev(self, rev):
return rc
if rc in _rev_cache.tag_set:
- sha = self.repo.cat_file("tag", rc).split(None, 2)[:2]
+ sha = self.cat_file("tag", rc).split(None, 2)[:2]
if sha[0] != 'object':
self.logger.debug("unexpected result from 'git-cat-file tag %s'" % rc)
return None
@@ -604,7 +636,7 @@ def read_commit(self, commit_id):
return result[0], dict(result[1])
# cache miss
- raw = self.repo.cat_file("commit", commit_id)
+ raw = self.cat_file("commit", commit_id)
raw = unicode(raw, self.get_commit_encoding(), 'replace')
lines = raw.splitlines()
@@ -625,7 +657,7 @@ def read_commit(self, commit_id):
return result[0], dict(result[1])
def get_file(self, sha):
- return cStringIO.StringIO(self.repo.cat_file("blob", str(sha)))
+ return cStringIO.StringIO(self.cat_file("blob", str(sha)))
def get_obj_size(self, sha):
sha = str(sha)
@@ -685,7 +717,52 @@ def sync(self):
rev = self.repo.rev_list("--max-count=1", "--topo-order", "--all").strip()
return self.__rev_cache_sync(rev)
- def last_change(self, sha, path):
+ @contextmanager
+ def get_historian(self, sha, base_path):
+ p = []
+ change = {}
+ next_path = []
+
+ def name_status_gen():
+ p[:] = [self.repo.log_pipe('--pretty=format:%n%H', '--name-status',
+ sha, '--', base_path)]
+ f = p[0].stdout
+ for l in f:
+ if l == '\n': continue
+ old_sha = l.rstrip('\n')
+ for l in f:
+ if l == '\n': break
+ _, path = l.rstrip('\n').split('\t', 1)
+ while path not in change:
+ change[path] = old_sha
+ if next_path == [path]: yield old_sha
+ try:
+ path, _ = path.rsplit('/', 1)
+ except ValueError:
+ break
+ f.close()
+ p[0].terminate()
+ p[0].wait()
+ p[:] = []
+ while True: yield None
+ gen = name_status_gen()
+
+ def historian(path):
+ try:
+ return change[path]
+ except KeyError:
+ next_path[:] = [path]
+ return gen.next()
+ yield historian
+
+ if p:
+ p[0].stdout.close()
+ p[0].terminate()
+ p[0].wait()
+
+ def last_change(self, sha, path, historian=None):
+ if historian is not None:
+ return historian(path)
return self.repo.rev_list("--max-count=1",
sha, "--",
self._fs_from_unicode(path)).strip() or None
View
41 tracext/git/git_fs.py
@@ -387,8 +387,8 @@ def display_rev(self, rev):
def short_rev(self, rev):
return self.git.shortrev(self.normalize_rev(rev), min_len=self._shortrev_len)
- def get_node(self, path, rev=None):
- return GitNode(self, path, rev, self.log)
+ def get_node(self, path, rev=None, historian=None):
+ return GitNode(self, path, rev, self.log, None, historian)
def get_quickjump_entries(self, rev):
for bname, bsha in self.git.get_branches():
@@ -412,24 +412,26 @@ def get_changes(self, old_path, old_rev, new_path, new_rev, ignore_ancestry=0):
if old_path != new_path:
raise TracError("not supported in git_fs")
- for chg in self.git.diff_tree(old_rev, new_rev, self.normalize_path(new_path)):
- mode1, mode2, obj1, obj2, action, path, path2 = chg
+ with self.git.get_historian(old_rev, old_path.strip('/')) as old_historian:
+ with self.git.get_historian(new_rev, new_path.strip('/')) as new_historian:
+ for chg in self.git.diff_tree(old_rev, new_rev, self.normalize_path(new_path)):
+ mode1, mode2, obj1, obj2, action, path, path2 = chg
- kind = Node.FILE
- if mode2.startswith('04') or mode1.startswith('04'):
- kind = Node.DIRECTORY
+ kind = Node.FILE
+ if mode2.startswith('04') or mode1.startswith('04'):
+ kind = Node.DIRECTORY
- change = GitChangeset.action_map[action]
+ change = GitChangeset.action_map[action]
- old_node = None
- new_node = None
+ old_node = None
+ new_node = None
- if change != Changeset.ADD:
- old_node = self.get_node(path, old_rev)
- if change != Changeset.DELETE:
- new_node = self.get_node(path, new_rev)
+ if change != Changeset.ADD:
+ old_node = self.get_node(path, old_rev, old_historian)
+ if change != Changeset.DELETE:
+ new_node = self.get_node(path, new_rev, new_historian)
- yield old_node, new_node, kind, change
+ yield old_node, new_node, kind, change
def next_rev(self, rev, path=''):
return self.git.hist_next_revision(rev)
@@ -469,7 +471,7 @@ def sync(self, rev_callback=None, clean=None):
rev_callback(rev)
class GitNode(Node):
- def __init__(self, repos, path, rev, log, ls_tree_info=None):
+ def __init__(self, repos, path, rev, log, ls_tree_info=None, historian=None):
self.log = log
self.repos = repos
self.fs_sha = None # points to either tree or blobs
@@ -491,7 +493,7 @@ def __init__(self, repos, path, rev, log, ls_tree_info=None):
self.fs_perm, k, self.fs_sha, self.fs_size, _ = ls_tree_info
# fix-up to the last commit-rev that touched this node
- rev = repos.git.last_change(rev, p)
+ rev = repos.git.last_change(rev, p, historian)
if k == 'tree':
pass
@@ -537,8 +539,9 @@ def get_entries(self):
if not self.isdir:
return
- for ent in self.repos.git.ls_tree(self.rev, self.__git_path()):
- yield GitNode(self.repos, ent[-1], self.rev, self.log, ent)
+ with self.repos.git.get_historian(self.rev, self.path.strip('/')) as historian:
+ for ent in self.repos.git.ls_tree(self.rev, self.__git_path()):
+ yield GitNode(self.repos, ent[-1], self.rev, self.log, ent, historian)
def get_content_type(self):
if self.isdir:
Something went wrong with that request. Please try again.