Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

GitPlugin: significantly reduced memory usage of revision cache by cu…

…tting down usage of `set` objects in favor of tuples in cache data structures, and

aggressively reusing sha strings
  • Loading branch information...
commit 4d39ea4491a5eac11cde087bc09883479adad247 1 parent d06a37a
Herbert Valerio Riedel authored February 24, 2008

Showing 1 changed file with 106 additions and 14 deletions. Show diff stats Hide diff stats

  1. 120  tracext/git/PyGIT.py
120  tracext/git/PyGIT.py
@@ -113,7 +113,7 @@ def getInstance(self):
113 113
         return self.__inst
114 114
 
115 115
 class Storage:
116  
-    __SREV_MIN = 6 # minimum short-rev length
  116
+    __SREV_MIN = 4 # minimum short-rev length
117 117
 
118 118
     @staticmethod
119 119
     def git_version():
@@ -184,15 +184,25 @@ def get_commits(self):
184 184
                 for revs in self.repo.rev_parse("--tags").readlines():
185 185
                     new_tags.add(revs.strip())
186 186
 
  187
+                # helper for reusing strings
  188
+                __rev_seen = {}
  189
+                def __rev_reuse(rev):
  190
+                    rev = str(rev)
  191
+                    return __rev_seen.setdefault(rev, rev)
  192
+
187 193
                 for revs in self.repo.rev_list("--parents", "--all").readlines():
188 194
                     revs = revs.strip().split()
189 195
 
  196
+                    revs = map(__rev_reuse, revs)
  197
+
190 198
                     rev = revs[0]
191 199
 
192 200
                     # shortrev "hash" map
193  
-                    new_sdb.setdefault(rev[:self.__SREV_MIN], set()).add(rev)
  201
+                    srev_key = int(rev[:self.__SREV_MIN], 16)
  202
+                    assert srev_key >= 0 and srev_key <= 0xffff
  203
+                    new_sdb.setdefault(srev_key, []).append(rev)
194 204
 
195  
-                    parents = set(revs[1:])
  205
+                    parents = tuple(revs[1:])
196 206
 
197 207
                     ord_rev += 1
198 208
 
@@ -205,23 +215,51 @@ def get_commits(self):
205 215
                         assert _children
206 216
                         assert not _parents
207 217
                         assert _ord_rev == 0
208  
-                        new_db[rev] = (_children, parents, ord_rev)
209 218
                     else:
210  
-                        new_db[rev] = (set(), parents, ord_rev)
  219
+                        _children = []
  220
+
  221
+                    # create/update entry
  222
+                    new_db[rev] = _children, parents, ord_rev
211 223
 
212 224
                     # update all parents(rev)'s children
213 225
                     for parent in parents:
214  
-                        if new_db.has_key(parent):
215  
-                            new_db[parent][0].add(rev)
216  
-                        else:
217  
-                            new_db[parent] = (set([rev]), set(), 0) # dummy ordinal_id
  226
+                        # by default, a dummy ordinal_id is used for the mean-time
  227
+                        _children, _parents, _ord_rev = new_db.setdefault(parent, ([], [], 0))
  228
+                        if rev not in _children:
  229
+                            _children.append(rev)
  230
+
  231
+                __rev_seen = None
  232
+
  233
+                # convert children lists to tuples
  234
+                tmp = {}
  235
+                while True:
  236
+                    try:
  237
+                        k,v = new_db.popitem()
  238
+                        tmp[k] = tuple(v[0]),v[1],v[2]
  239
+                    except KeyError:
  240
+                        break
  241
+                assert len(new_db) == 0
  242
+                new_db = tmp
218 243
 
  244
+                # convert sdb either to dict or array depending on size
  245
+                tmp = [()]*(max(new_sdb.keys())+1) if len(new_sdb) > 5000 else {}
  246
+
  247
+                while True:
  248
+                    try:
  249
+                        k,v = new_sdb.popitem()
  250
+                        tmp[k] = tuple(v)
  251
+                    except KeyError:
  252
+                        break
  253
+
  254
+                assert len(new_sdb) == 0
  255
+                new_sdb = tmp
  256
+
  257
+                # atomically update self._commit_db
219 258
                 self._commit_db = new_db, parent, new_tags, new_sdb
220 259
                 self.last_youngest_rev = youngest
221 260
                 self.logger.debug("rebuilt commit tree db for %d with %d entries" % (id(self),len(new_db)))
222 261
 
223  
-            assert self._commit_db[1] is not None
224  
-            assert self._commit_db[0] is not None
  262
+            assert all([ e is not None for e in self._commit_db])
225 263
 
226 264
             return self._commit_db[0]
227 265
 
@@ -315,7 +353,8 @@ def shortrev(self, rev):
315 353
             return rev
316 354
 
317 355
         srev = rev[:self.__SREV_MIN]
318  
-        srevs = sdb[srev]
  356
+        srev_key = int(srev, 16)
  357
+        srevs = set(sdb[srev_key])
319 358
 
320 359
         if len(srevs) == 1:
321 360
             return srev # we already got a unique id
@@ -428,7 +467,7 @@ def children_recursive(self, sha):
428 467
             p = work_list.popleft()
429 468
             yield p
430 469
 
431  
-            _children = db[p][0] - seen
  470
+            _children = set(db[p][0]) - seen
432 471
 
433 472
             seen.update(_children)
434 473
             work_list.extend(_children)
@@ -535,18 +574,54 @@ def __chg_tuple():
535 574
         if chg:
536 575
             yield __chg_tuple()
537 576
 
  577
+############################################################################
  578
+############################################################################
  579
+############################################################################
  580
+
538 581
 if __name__ == '__main__':
539 582
     import sys, logging, timeit
540 583
 
541 584
     print "git version [%s]" % str(Storage.git_version())
542 585
 
  586
+    # custom linux hack reading `/proc/<PID>/statm`
  587
+    if sys.platform == "linux2":
  588
+        __pagesize = os.sysconf('SC_PAGESIZE')
  589
+
  590
+        def proc_statm(pid = os.getpid()):
  591
+            __proc_statm = '/proc/%d/statm' % pid
  592
+            try:
  593
+                t = open(__proc_statm)
  594
+                result = t.read().split()
  595
+                t.close()
  596
+                assert len(result) == 7
  597
+                return tuple([ __pagesize*int(p) for p in result ])
  598
+            except:
  599
+                raise RuntimeError("failed to get memory stats")
  600
+
  601
+    else: # not linux2
  602
+        print "WARNING - meminfo.proc_statm() not available"
  603
+        def proc_statm():
  604
+            return (0,)*7
  605
+
  606
+    print "statm =", proc_statm()
  607
+    __data_size = proc_statm()[5]
  608
+
  609
+    def print_data_usage():
  610
+        print "DATA:", proc_statm()[5] - __data_size
  611
+
  612
+    print_data_usage()
  613
+
543 614
     g = Storage(sys.argv[1], logging)
544 615
 
  616
+    print_data_usage()
  617
+
545 618
     print "[%s]" % g.head()
546 619
     print g.ls_tree(g.head())
547 620
     print "--------------"
  621
+    print_data_usage()
548 622
     print g.read_commit(g.head())
549 623
     print "--------------"
  624
+    print_data_usage()
550 625
     p = g.parents(g.head())
551 626
     print list(p)
552 627
     print "--------------"
@@ -558,11 +633,12 @@ def __chg_tuple():
558 633
     print g.get_branches()
559 634
     print "--------------"
560 635
     print g.hist_prev_revision(g.oldest_rev()), g.oldest_rev(), g.hist_next_revision(g.oldest_rev())
561  
-
  636
+    print_data_usage()
562 637
     print "--------------"
563 638
     p = g.youngest_rev()
564 639
     print g.hist_prev_revision(p), p, g.hist_next_revision(p)
565 640
     print "--------------"
  641
+
566 642
     p = g.head()
567 643
     for i in range(-5,5):
568 644
         print i, g.history_relative_rev(p, i)
@@ -583,7 +659,9 @@ def check4loops(head):
583 659
 
584 660
     #p = g.head()
585 661
     #revs = [ g.history_relative_rev(p, i) for i in range(0,10) ]
  662
+    print_data_usage()
586 663
     revs = g.get_commits().keys()
  664
+    print_data_usage()
587 665
 
588 666
     def shortrev_test():
589 667
         for i in revs:
@@ -599,3 +677,17 @@ def shortrev_test():
599 677
     #print len(check4loops(g.oldest_rev()))
600 678
 
601 679
     #print len(list(g.children_recursive(g.oldest_rev())))
  680
+
  681
+    print_data_usage()
  682
+
  683
+    # perform typical trac operations:
  684
+
  685
+    if 0:
  686
+        print "--------------"
  687
+        rev = g.head()
  688
+        for mode,type,sha,name in g.ls_tree(rev):
  689
+            [last_rev] = g.history(rev, name, limit=1)
  690
+            s = g.get_obj_size(sha) if type == "blob" else 0
  691
+            msg = g.read_commit(last_rev)
  692
+
  693
+            print "%s %s %10d [%s]" % (type, last_rev, s, name)

0 notes on commit 4d39ea4

Please sign in to comment.
Something went wrong with that request. Please try again.