Skip to content

Commit

Permalink
Refactor to prepare for caching overhaul. Refs #248
Browse files Browse the repository at this point in the history
  • Loading branch information
jonashaag committed Mar 28, 2020
1 parent 5b697de commit e21cf6a
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 100 deletions.
157 changes: 61 additions & 96 deletions klaus/repo.py
Expand Up @@ -17,50 +17,44 @@
NOT_SET = '__not_set__'


def cached_call(key, validator, producer, _cache={}):
data, old_validator = _cache.get(key, (None, NOT_SET))
if old_validator != validator:
data = producer()
_cache[key] = (data, validator)
return data


class FancyRepo(dulwich.repo.Repo):
"""A wrapper around Dulwich's Repo that adds some helper methods."""
@property
def name(self):
return repo_human_name(self.path)

def getref(self, k, default=NOT_SET):
try:
return self[k]
except KeyError:
if default is not NOT_SET:
return default
else:
raise

def get_refs_as_dict(self, base=None):
return self.refs.as_dict(base)

def get_resolved_refs_as_dict(self, base=None, resolve_default=NOT_SET):
res = {}
for k, v in self.get_refs_as_dict(base).items():
v = self.getref(v, default=None)
if v is None and resolve_default is NOT_SET:
# Skip unresolvable refs when no default is given.
pass
else:
res[k] = v or resolve_default
return res

# TODO: factor out stuff into dulwich
def get_last_updated_at(self):
"""Get datetime of last commit to this repository."""
# Cache result to speed up repo_list.html template.
# If self.get_refs() has changed, we should invalidate the cache.
all_refs = self.get_refs()
return cached_call(
key=(id(self), 'get_last_updated_at'),
validator=all_refs,
producer=lambda: self._get_last_updated_at(all_refs)
)

def _get_last_updated_at(self, all_refs):
resolveable_refs = []
for ref_hash in all_refs:
try:
resolveable_refs.append(self[ref_hash])
except KeyError:
# Whoops. The ref points at a non-existant object
pass
resolveable_refs.sort(
key=lambda obj:getattr(obj, 'commit_time', float('-inf')),
reverse=True
)
for ref in resolveable_refs:
# Find the latest ref that has a commit_time; tags do not
# have a commit time
if hasattr(ref, "commit_time"):
return ref.commit_time
return None
commit_times = [getattr(obj, 'commit_time', float('-inf'))
for obj in self.get_resolved_refs_as_dict().values()]
if commit_times:
return max(commit_times)
else:
return None

@property
def cloneurl(self):
Expand All @@ -78,21 +72,6 @@ def get_description(self):
"""Like Dulwich's `get_description`, but returns None if the file
contains Git's default text "Unnamed repository[...]".
"""
# Cache result to speed up repo_list.html template.
# If description file mtime has changed, we should invalidate the cache.
description_file = os.path.join(self._controldir, 'description')
try:
description_mtime = os.stat(os.path.join(self._controldir, 'description')).st_mtime
except OSError:
description_mtime = None

return cached_call(
key=(id(self), 'get_description'),
validator=description_mtime,
producer=self._get_description
)

def _get_description(self):
description = super(FancyRepo, self).get_description()
if description:
description = force_unicode(description)
Expand All @@ -104,9 +83,9 @@ def get_commit(self, rev):
for prefix in ['refs/heads/', 'refs/tags/', '']:
key = prefix + rev
try:
obj = self[encode_for_git(key)]
obj = self.getref(encode_for_git(key))
if isinstance(obj, dulwich.objects.Tag):
obj = self[obj.object[1]]
obj = self.getref(obj.object[1])
return obj
except KeyError:
pass
Expand All @@ -129,22 +108,27 @@ def get_ref_names_ordered_by_last_commit(self, prefix, exclude=None):
"""Return a list of ref names that begin with `prefix`, ordered by the
time they have been committed to last.
"""
def get_commit_time(refname):
try:
obj = self[refs[refname]]
except KeyError:
# Default to 0, i.e. sorting refs that point at non-existant
# objects last.
def get_commit_time(obj):
if obj is None:
# Put refs that point to non-existent objects last.
return 0
if isinstance(obj, dulwich.objects.Tag):
elif isinstance(obj, dulwich.objects.Tag):
return obj.tag_time
return obj.commit_time
else:
return obj.commit_time

refs = self.refs.as_dict(encode_for_git(prefix))
refs = self.get_resolved_refs_as_dict(
encode_for_git(prefix),
resolve_default=None
)
if exclude:
refs.pop(prefix + exclude, None)
sorted_names = sorted(refs.keys(), key=get_commit_time, reverse=True)
return [decode_from_git(ref) for ref in sorted_names]
sorted_refs = sorted(
refs.items(),
key=lambda item: get_commit_time(item[1]),
reverse=True
)
return [decode_from_git(name) for name, _ in sorted_refs]

def get_branch_names(self, exclude=None):
"""Return a list of branch names of this repo, ordered by the time they
Expand All @@ -158,8 +142,8 @@ def get_tag_names(self):

def get_tag_and_branch_shas(self):
"""Return a list of SHAs of all tags and branches."""
tag_shas = self.refs.as_dict(b'refs/tags/').values()
branch_shas = self.refs.as_dict(b'refs/heads/').values()
tag_shas = self.get_refs_as_dict('refs/tags/').values()
branch_shas = self.get_refs_as_dict('refs/heads/').values()
return set(tag_shas) | set(branch_shas)

def history(self, commit, path=None, max_commits=None, skip=0):
Expand Down Expand Up @@ -187,7 +171,7 @@ def history(self, commit, path=None, max_commits=None, skip=0):

output = subprocess.check_output(cmd, cwd=os.path.abspath(self.path))
sha1_sums = output.strip().split(b'\n')
return [self[sha1] for sha1 in sha1_sums]
return [self.getref(sha1) for sha1 in sha1_sums]

def blame(self, commit, path):
"""Return a 'git blame' list for the file at `path`: For each line in
Expand All @@ -197,18 +181,24 @@ def blame(self, commit, path):
cmd = ['git', 'blame', '-ls', '--root', decode_from_git(commit.id), '--', path]
output = subprocess.check_output(cmd, cwd=os.path.abspath(self.path))
sha1_sums = [line[:40] for line in output.strip().split(b'\n')]
return [None if self[sha1] is None else decode_from_git(self[sha1].id) for sha1 in sha1_sums]
lines = []
for sha1 in sha1_sums:
obj = self.getref(sha1, None)
if obj is not None:
obj = decode_from_git(obj.id)
lines.append(obj)
return lines

def get_blob_or_tree(self, commit, path):
"""Return the Git tree or blob object for `path` at `commit`."""
try:
(mode, oid) = tree_lookup_path(self.__getitem__, commit.tree,
(mode, oid) = tree_lookup_path(self.getref, commit.tree,
encode_for_git(path))
except NotTreeError:
# Some part of the path was a file where a folder was expected.
# Example: path="/path/to/foo.txt" but "to" is a file in "/path".
raise KeyError
return self[oid]
return self.getref(oid)

def listdir(self, commit, path):
"""Return a list of submodules, directories and files in given
Expand Down Expand Up @@ -243,7 +233,7 @@ def commit_diff(self, commit):
from klaus.utils import guess_is_binary

if commit.parents:
parent_tree = self[commit.parents[0]].tree
parent_tree = self.getref(commit.parents[0]).tree
else:
parent_tree = None

Expand Down Expand Up @@ -290,38 +280,13 @@ def commit_diff(self, commit):

def raw_commit_diff(self, commit):
if commit.parents:
parent_tree = self[commit.parents[0]].tree
parent_tree = self.getref(commit.parents[0]).tree
else:
parent_tree = None
bytesio = io.BytesIO()
dulwich.patch.write_tree_diff(bytesio, self.object_store, parent_tree, commit.tree)
return bytesio.getvalue()

def freeze(self):
return FrozenFancyRepo(self)


class FrozenFancyRepo(object):
"""A special version of FancyRepo that assumes the underlying Git
repository does not change. Used for performance optimizations.
"""
def __init__(self, repo):
self.__repo = repo
self.__last_updated_at = NOT_SET

def __setattr__(self, name, value):
if not name.startswith('_FrozenFancyRepo__'):
raise TypeError("Can't set %s attribute on FrozenFancyRepo" % name)
super(FrozenFancyRepo, self).__setattr__(name, value)

def __getattr__(self, name):
return getattr(self.__repo, name)

def fast_get_last_updated_at(self):
if self.__last_updated_at is NOT_SET:
self.__last_updated_at = self.__repo.get_last_updated_at()
return self.__last_updated_at


class InvalidRepo:
"""Represent an invalid repository and store pertinent data."""
Expand Down
2 changes: 1 addition & 1 deletion klaus/templates/repo_list.html
Expand Up @@ -16,7 +16,7 @@ <h2>
</h2>
<ul class=repolist>
{% for repo in repos %}
{% set last_updated_at = repo.fast_get_last_updated_at() %}
{% set last_updated_at = repo.get_last_updated_at() %}
{% set description = repo.get_description() %}
<li>
<a
Expand Down
5 changes: 2 additions & 3 deletions klaus/views.py
Expand Up @@ -37,9 +37,8 @@ def repo_list():
sort_key = lambda repo: repo.name
else:
order_by = 'last_updated'
sort_key = lambda repo: (-(repo.fast_get_last_updated_at() or -1), repo.name)
repos = sorted([repo.freeze() for repo in current_app.valid_repos.values()],
key=sort_key)
sort_key = lambda repo: (-(repo.get_last_updated_at() or -1), repo.name)
repos = sorted(current_app.valid_repos.values(), key=sort_key)
invalid_repos = sorted(current_app.invalid_repos.values(), key=lambda repo: repo.name)
return render_template('repo_list.html', repos=repos, invalid_repos=invalid_repos,
order_by=order_by, base_href=None)
Expand Down

0 comments on commit e21cf6a

Please sign in to comment.