Skip to content
This repository has been archived by the owner on May 22, 2019. It is now read-only.

Commit

Permalink
Add reachable heads for each revision to revision cache
Browse files Browse the repository at this point in the history
Also switch to `git rev-list --topo-order` traversal in order to avoid
a 2nd pass in Python.
  • Loading branch information
hvr committed Oct 17, 2010
1 parent 23afc8e commit 694f993
Showing 1 changed file with 71 additions and 54 deletions.
125 changes: 71 additions & 54 deletions tracext/git/PyGIT.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,12 @@ def getInstance(self):
% (("","weak ")[is_weak], id(self.__inst), self.__repo))
return self.__inst


class Storage:
"""
High-level wrapper around GitCore with in-memory caching
"""

__SREV_MIN = 4 # minimum short-rev length

@staticmethod
Expand Down Expand Up @@ -236,88 +241,96 @@ def __rev_cache_sync(self, youngest_rev=None):
return need_update

def get_rev_cache(self):
"""
Retrieve revision cache
may rebuild cache on the fly if required
returns tuple(youngest_rev, oldest_rev, rev_dict, tag_dict, short_rev_dict, branch_dict)
"""

with self.__rev_cache_lock:
if self.__rev_cache is None: # can be cleared by Storage.__rev_cache_sync()
self.logger.debug("triggered rebuild of commit tree db for %d" % id(self))
new_db = {}
new_sdb = {}
new_tags = set([])

youngest = None
oldest = None
for revs in self.repo.rev_parse("--tags").splitlines():
new_tags.add(revs.strip())
new_db = {} # db
new_sdb = {} # short_rev db

# helper for reusing strings
__rev_seen = {}
def __rev_reuse(rev):
rev = str(rev)
return __rev_seen.setdefault(rev, rev)

rev = ord_rev = 0
for revs in self.repo.rev_list("--parents", "--all").splitlines():
revs = revs.strip().split()
new_tags = set(__rev_reuse(rev.strip()) for rev in self.repo.rev_parse("--tags").splitlines())

revs = map(__rev_reuse, revs)
new_branches = [(k, __rev_reuse(v)) for k, v in self._get_branches()]
head_revs = set(v for _, v in new_branches)

rev = ord_rev = 0
for ord_rev, revs in enumerate(self.repo.rev_list("--parents",
"--topo-order",
"--all").splitlines()):
revs = map(__rev_reuse, revs.strip().split())

rev = revs[0]

# first rev seen is assumed to be the youngest one
if not ord_rev:
youngest = rev

# shortrev "hash" map
srev_key = self.__rev_key(rev)
new_sdb.setdefault(srev_key, []).append(rev)

# parents
parents = tuple(revs[1:])

ord_rev += 1

# first rev seen is assumed to be the youngest one (and has ord_rev=1)
if not youngest:
youngest = rev

# new_db[rev] = (children(rev), parents(rev), ordinal_id(rev))
if new_db.has_key(rev):
_children,_parents,_ord_rev = new_db[rev]
# new_db[rev] = (children(rev), parents(rev), ordinal_id(rev), rheads(rev))
if rev in new_db:
# (incomplete) entry was already created by children
_children, _parents, _ord_rev, _rheads = new_db[rev]
assert _children
assert not _parents
assert _ord_rev == 0
else:

if rev in head_revs and rev not in _rheads:
_rheads.append(rev)

else: # new entry
_children = []
_rheads = [rev] if rev in head_revs else []

# create/update entry
new_db[rev] = _children, parents, ord_rev
# create/update entry -- transform lists into tuples since entry will be final
new_db[rev] = tuple(_children), tuple(parents), ord_rev + 1, tuple(_rheads)

# update all parents(rev)'s children
# update parents(rev)s
for parent in parents:
# by default, a dummy ordinal_id is used for the mean-time
_children, _parents, _ord_rev = new_db.setdefault(parent, ([], [], 0))
_children, _parents, _ord_rev, _rheads2 = new_db.setdefault(parent, ([], [], 0, []))

# update parent(rev)'s children
if rev not in _children:
_children.append(rev)

# update parent(rev)'s rheads
for rev in _rheads:
if rev not in _rheads2:
_rheads2.append(rev)

# last rev seen is assumed to be the oldest one (with highest ord_rev)
oldest = rev

__rev_seen = None

assert len(new_db) == ord_rev

# convert children lists to tuples
tmp = {}
try:
while True:
k,v = new_db.popitem()
assert v[2] > 0
tmp[k] = tuple(v[0]),v[1],v[2]
except KeyError:
pass

assert len(new_db) == 0
new_db = tmp

# convert sdb either to dict or array depending on size
tmp = [()]*(max(new_sdb.keys())+1) if len(new_sdb) > 5000 else {}

try:
while True:
k,v = new_sdb.popitem()
k, v = new_sdb.popitem()
tmp[k] = tuple(v)
except KeyError:
pass
Expand All @@ -326,17 +339,32 @@ def __rev_reuse(rev):
new_sdb = tmp

# atomically update self.__rev_cache
self.__rev_cache = youngest, oldest, new_db, new_tags, new_sdb
self.logger.debug("rebuilt commit tree db for %d with %d entries" % (id(self),len(new_db)))
self.__rev_cache = youngest, oldest, new_db, new_tags, new_sdb, new_branches
self.logger.debug("rebuilt commit tree db for %d with %d entries" % (id(self), len(new_db)))

assert all(e is not None for e in self.__rev_cache) or not any(self.__rev_cache)

return self.__rev_cache
# with self.__rev_cache_lock

# tuple: youngest_rev, oldest_rev, rev_dict, tag_dict, short_rev_dict
# tuple: youngest_rev, oldest_rev, rev_dict, tag_dict, short_rev_dict, branch_dict
rev_cache = property(get_rev_cache)

def _get_branches(self):
"returns list of (local) branches, with active (= HEAD) one being the first item"
result=[]
for e in self.repo.branch("-v", "--no-abbrev").splitlines():
(bname,bsha)=e[1:].strip().split()[:2]
if e.startswith('*'):
result.insert(0,(bname,bsha))
else:
result.append((bname,bsha))
return result

def get_branches(self):
"returns list of (local) branches, with active (= HEAD) one being the first item"
return self.rev_cache[5]

def get_commits(self):
return self.rev_cache[2]

Expand Down Expand Up @@ -467,17 +495,6 @@ def fullrev(self, srev):

return None

def get_branches(self):
"returns list of (local) branches, with active (= HEAD) one being the first item"
result=[]
for e in self.repo.branch("-v", "--no-abbrev").splitlines():
(bname,bsha)=e[1:].strip().split()[:2]
if e.startswith('*'):
result.insert(0,(bname,bsha))
else:
result.append((bname,bsha))
return result

def get_tags(self):
return [e.strip() for e in self.repo.tag("-l").splitlines()]

Expand Down Expand Up @@ -594,7 +611,7 @@ def all_revs(self):
return self.get_commits().iterkeys()

def sync(self):
rev = self.repo.rev_list("--max-count=1", "--all").strip()
rev = self.repo.rev_list("--max-count=1", "--topo-order", "--all").strip()
return self.__rev_cache_sync(rev)

def last_change(self, sha, path):
Expand Down

0 comments on commit 694f993

Please sign in to comment.