Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Use git cat-file --batch and git log --name-status to avoid repeated fork+exec #12

Merged
merged 3 commits into from over 2 years ago

2 participants

Anders Kaseorg Herbert Valerio Riedel
Anders Kaseorg

This significantly improves performance, especially for a Git repository stored on a network filesystem with many loose objects, by launching fewer copies of git and hence reading the repository fewer times.

added some commits October 20, 2011
Anders Kaseorg GitCore: Abstract out __pipe
Signed-off-by: Anders Kaseorg <andersk@mit.edu>
aa80f45
Anders Kaseorg Use git cat-file --batch, to avoid repeated fork+exec
Signed-off-by: Anders Kaseorg <andersk@mit.edu>
0283ffb
Anders Kaseorg last_change: Use git log --name-status to avoid repeated fork+exec
Signed-off-by: Anders Kaseorg <andersk@mit.edu>
9541c2a
Herbert Valerio Riedel
Owner

Good work, thanks!

Herbert Valerio Riedel hvr merged commit 722342e into from November 19, 2011
Herbert Valerio Riedel hvr closed this November 19, 2011
Herbert Valerio Riedel
Owner

@andersk Could you maybe take a look at http://trac-hacks.org/ticket/9560 this commit seems to have broken Python 2.5 support

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Showing 3 unique commits by 1 author.

Nov 18, 2011
Anders Kaseorg GitCore: Abstract out __pipe
Signed-off-by: Anders Kaseorg <andersk@mit.edu>
aa80f45
Anders Kaseorg Use git cat-file --batch, to avoid repeated fork+exec
Signed-off-by: Anders Kaseorg <andersk@mit.edu>
0283ffb
Anders Kaseorg last_change: Use git log --name-status to avoid repeated fork+exec
Signed-off-by: Anders Kaseorg <andersk@mit.edu>
9541c2a
This page is out of date. Refresh to see the latest.
97  tracext/git/PyGIT.py
@@ -14,6 +14,7 @@
14 14
 from threading import Lock
15 15
 from subprocess import Popen, PIPE
16 16
 from operator import itemgetter
  17
+from contextlib import contextmanager
17 18
 import cStringIO
18 19
 import codecs
19 20
 
@@ -48,24 +49,34 @@ def __build_git_cmd(self, gitcmd, *args):
48 49
 
49 50
         return cmd
50 51
 
  52
+    def __pipe(self, git_cmd, *cmd_args, **kw):
  53
+        if sys.platform == "win32":
  54
+            return Popen(self.__build_git_cmd(git_cmd, *cmd_args), **kw)
  55
+        else:
  56
+            return Popen(self.__build_git_cmd(git_cmd, *cmd_args),
  57
+                         close_fds=True, **kw)
  58
+
51 59
     def __execute(self, git_cmd, *cmd_args):
52 60
         "execute git command and return file-like object of stdout"
53 61
 
54 62
         #print >>sys.stderr, "DEBUG:", git_cmd, cmd_args
55 63
 
56  
-        if sys.platform == "win32":
57  
-            p = Popen(self.__build_git_cmd(git_cmd, *cmd_args),
58  
-                      stdin=None, stdout=PIPE, stderr=PIPE)
59  
-        else:
60  
-            p = Popen(self.__build_git_cmd(git_cmd, *cmd_args),
61  
-                      stdin=None, stdout=PIPE, stderr=PIPE, close_fds=True)
  64
+        p = self.__pipe(git_cmd, *cmd_args, stdout=PIPE, stderr=PIPE)
62 65
 
63 66
         stdout_data, stderr_data = p.communicate()
64 67
         #TODO, do something with p.returncode, e.g. raise exception
65 68
 
66 69
         return stdout_data
67 70
 
  71
+    def cat_file_batch(self):
  72
+        return self.__pipe('cat-file', '--batch', stdin=PIPE, stdout=PIPE)
  73
+
  74
+    def log_pipe(self, *cmd_args):
  75
+        return self.__pipe('log', *cmd_args, stdout=PIPE)
  76
+
68 77
     def __getattr__(self, name):
  78
+        if name[0] == '_' or name in ['cat_file_batch', 'log_pipe']:
  79
+            raise AttributeError, name
69 80
         return partial(self.__execute, name.replace('_','-'))
70 81
 
71 82
     __is_sha_pat = re.compile(r'[0-9A-Fa-f]*$')
@@ -254,6 +265,13 @@ def __init__(self, git_dir, log, git_bin='git', git_fs_encoding=None):
254 265
         self.__commit_msg_cache = SizedDict(200)
255 266
         self.__commit_msg_lock = Lock()
256 267
 
  268
+        self.__cat_file_pipe = None
  269
+
  270
+    def __del__(self):
  271
+        if self.__cat_file_pipe is not None:
  272
+            self.__cat_file_pipe.stdin.close()
  273
+            self.__cat_file_pipe.wait()
  274
+
257 275
     #
258 276
     # cache handling
259 277
     #
@@ -473,6 +491,20 @@ def head(self):
473 491
         "get current HEAD commit id"
474 492
         return self.verifyrev("HEAD")
475 493
 
  494
+    def cat_file(self, kind, sha):
  495
+        if self.__cat_file_pipe is None:
  496
+            self.__cat_file_pipe = self.repo.cat_file_batch()
  497
+
  498
+        self.__cat_file_pipe.stdin.write(sha + '\n')
  499
+        self.__cat_file_pipe.stdin.flush()
  500
+        _sha, _type, _size = self.__cat_file_pipe.stdout.readline().split()
  501
+
  502
+        if _type != kind:
  503
+            raise TracError("internal error (got unexpected object kind '%s')" % k)
  504
+
  505
+        size = int(_size)
  506
+        return self.__cat_file_pipe.stdout.read(size + 1)[:size]
  507
+
476 508
     def verifyrev(self, rev):
477 509
         "verify/lookup given revision object and return a sha id or None if lookup failed"
478 510
         rev = str(rev)
@@ -494,7 +526,7 @@ def verifyrev(self, rev):
494 526
             return rc
495 527
 
496 528
         if rc in _rev_cache.tag_set:
497  
-            sha = self.repo.cat_file("tag", rc).split(None, 2)[:2]
  529
+            sha = self.cat_file("tag", rc).split(None, 2)[:2]
498 530
             if sha[0] != 'object':
499 531
                 self.logger.debug("unexpected result from 'git-cat-file tag %s'" % rc)
500 532
                 return None
@@ -604,7 +636,7 @@ def read_commit(self, commit_id):
604 636
                 return result[0], dict(result[1])
605 637
 
606 638
             # cache miss
607  
-            raw = self.repo.cat_file("commit", commit_id)
  639
+            raw = self.cat_file("commit", commit_id)
608 640
             raw = unicode(raw, self.get_commit_encoding(), 'replace')
609 641
             lines = raw.splitlines()
610 642
 
@@ -625,7 +657,7 @@ def read_commit(self, commit_id):
625 657
             return result[0], dict(result[1])
626 658
 
627 659
     def get_file(self, sha):
628  
-        return cStringIO.StringIO(self.repo.cat_file("blob", str(sha)))
  660
+        return cStringIO.StringIO(self.cat_file("blob", str(sha)))
629 661
 
630 662
     def get_obj_size(self, sha):
631 663
         sha = str(sha)
@@ -685,7 +717,52 @@ def sync(self):
685 717
         rev = self.repo.rev_list("--max-count=1", "--topo-order", "--all").strip()
686 718
         return self.__rev_cache_sync(rev)
687 719
 
688  
-    def last_change(self, sha, path):
  720
+    @contextmanager
  721
+    def get_historian(self, sha, base_path):
  722
+        p = []
  723
+        change = {}
  724
+        next_path = []
  725
+
  726
+        def name_status_gen():
  727
+            p[:] = [self.repo.log_pipe('--pretty=format:%n%H', '--name-status',
  728
+                                       sha, '--', base_path)]
  729
+            f = p[0].stdout
  730
+            for l in f:
  731
+                if l == '\n': continue
  732
+                old_sha = l.rstrip('\n')
  733
+                for l in f:
  734
+                    if l == '\n': break
  735
+                    _, path = l.rstrip('\n').split('\t', 1)
  736
+                    while path not in change:
  737
+                        change[path] = old_sha
  738
+                        if next_path == [path]: yield old_sha
  739
+                        try:
  740
+                            path, _ = path.rsplit('/', 1)
  741
+                        except ValueError:
  742
+                            break
  743
+            f.close()
  744
+            p[0].terminate()
  745
+            p[0].wait()
  746
+            p[:] = []
  747
+            while True: yield None
  748
+        gen = name_status_gen()
  749
+
  750
+        def historian(path):
  751
+            try:
  752
+                return change[path]
  753
+            except KeyError:
  754
+                next_path[:] = [path]
  755
+                return gen.next()
  756
+        yield historian
  757
+
  758
+        if p:
  759
+            p[0].stdout.close()
  760
+            p[0].terminate()
  761
+            p[0].wait()
  762
+
  763
+    def last_change(self, sha, path, historian=None):
  764
+        if historian is not None:
  765
+            return historian(path)
689 766
         return self.repo.rev_list("--max-count=1",
690 767
                                   sha, "--",
691 768
                                   self._fs_from_unicode(path)).strip() or None
41  tracext/git/git_fs.py
@@ -387,8 +387,8 @@ def display_rev(self, rev):
387 387
     def short_rev(self, rev):
388 388
         return self.git.shortrev(self.normalize_rev(rev), min_len=self._shortrev_len)
389 389
 
390  
-    def get_node(self, path, rev=None):
391  
-        return GitNode(self, path, rev, self.log)
  390
+    def get_node(self, path, rev=None, historian=None):
  391
+        return GitNode(self, path, rev, self.log, None, historian)
392 392
 
393 393
     def get_quickjump_entries(self, rev):
394 394
         for bname, bsha in self.git.get_branches():
@@ -412,24 +412,26 @@ def get_changes(self, old_path, old_rev, new_path, new_rev, ignore_ancestry=0):
412 412
         if old_path != new_path:
413 413
             raise TracError("not supported in git_fs")
414 414
 
415  
-        for chg in self.git.diff_tree(old_rev, new_rev, self.normalize_path(new_path)):
416  
-            mode1, mode2, obj1, obj2, action, path, path2 = chg
  415
+        with self.git.get_historian(old_rev, old_path.strip('/')) as old_historian:
  416
+            with self.git.get_historian(new_rev, new_path.strip('/')) as new_historian:
  417
+                for chg in self.git.diff_tree(old_rev, new_rev, self.normalize_path(new_path)):
  418
+                    mode1, mode2, obj1, obj2, action, path, path2 = chg
417 419
 
418  
-            kind = Node.FILE
419  
-            if mode2.startswith('04') or mode1.startswith('04'):
420  
-                kind = Node.DIRECTORY
  420
+                    kind = Node.FILE
  421
+                    if mode2.startswith('04') or mode1.startswith('04'):
  422
+                        kind = Node.DIRECTORY
421 423
 
422  
-            change = GitChangeset.action_map[action]
  424
+                    change = GitChangeset.action_map[action]
423 425
 
424  
-            old_node = None
425  
-            new_node = None
  426
+                    old_node = None
  427
+                    new_node = None
426 428
 
427  
-            if change != Changeset.ADD:
428  
-                old_node = self.get_node(path, old_rev)
429  
-            if change != Changeset.DELETE:
430  
-                new_node = self.get_node(path, new_rev)
  429
+                    if change != Changeset.ADD:
  430
+                        old_node = self.get_node(path, old_rev, old_historian)
  431
+                    if change != Changeset.DELETE:
  432
+                        new_node = self.get_node(path, new_rev, new_historian)
431 433
 
432  
-            yield old_node, new_node, kind, change
  434
+                    yield old_node, new_node, kind, change
433 435
 
434 436
     def next_rev(self, rev, path=''):
435 437
         return self.git.hist_next_revision(rev)
@@ -469,7 +471,7 @@ def sync(self, rev_callback=None, clean=None):
469 471
                 rev_callback(rev)
470 472
 
471 473
 class GitNode(Node):
472  
-    def __init__(self, repos, path, rev, log, ls_tree_info=None):
  474
+    def __init__(self, repos, path, rev, log, ls_tree_info=None, historian=None):
473 475
         self.log = log
474 476
         self.repos = repos
475 477
         self.fs_sha = None # points to either tree or blobs
@@ -491,7 +493,7 @@ def __init__(self, repos, path, rev, log, ls_tree_info=None):
491 493
             self.fs_perm, k, self.fs_sha, self.fs_size, _ = ls_tree_info
492 494
 
493 495
             # fix-up to the last commit-rev that touched this node
494  
-            rev = repos.git.last_change(rev, p)
  496
+            rev = repos.git.last_change(rev, p, historian)
495 497
 
496 498
             if k == 'tree':
497 499
                 pass
@@ -537,8 +539,9 @@ def get_entries(self):
537 539
         if not self.isdir:
538 540
             return
539 541
 
540  
-        for ent in self.repos.git.ls_tree(self.rev, self.__git_path()):
541  
-            yield GitNode(self.repos, ent[-1], self.rev, self.log, ent)
  542
+        with self.repos.git.get_historian(self.rev, self.path.strip('/')) as historian:
  543
+            for ent in self.repos.git.ls_tree(self.rev, self.__git_path()):
  544
+                yield GitNode(self.repos, ent[-1], self.rev, self.log, ent, historian)
542 545
 
543 546
     def get_content_type(self):
544 547
         if self.isdir:
Commit_comment_tip

Tip: You can add notes to lines in a file. Hover to the left of a line to make a note

Something went wrong with that request. Please try again.