Skip to content

Commit

Permalink
RF: Reimplement GitRepo.get_branch_commits() with git rev-list
Browse files Browse the repository at this point in the history
and add '_' function name suffix to indicate its generator-behavior.

This furthers #2970
  • Loading branch information
mih committed Feb 20, 2020
1 parent bc05be3 commit f2a0ee1
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 102 deletions.
6 changes: 3 additions & 3 deletions datalad/distribution/publish.py
Expand Up @@ -330,7 +330,7 @@ def _publish_dataset(ds, remote, refspec, paths, annex_copy_options, force=False
# changes
if not diff and is_annex_repo:
try:
git_annex_commit = next(ds.repo.get_branch_commits('git-annex'))
git_annex_commit = next(ds.repo.get_branch_commits_('git-annex'))
except StopIteration:
git_annex_commit = None
#diff = _get_remote_diff(ds, [], git_annex_commit, remote, 'git-annex')
Expand Down Expand Up @@ -547,7 +547,7 @@ def _get_remote_diff(ds, current_commit, remote, remote_branch_name):
lgr.debug("Testing for changes with respect to '%s' of remote '%s'",
remote_branch_name, remote)
if current_commit is None:
current_commit = ds.repo.repo.commit()
current_commit = ds.repo.get_hexsha()
remote_ref = ds.repo.repo.remotes[remote].refs[remote_branch_name]
# XXX: ATM nothing calls this function with a non-empty `paths` arg
#if paths:
Expand All @@ -560,7 +560,7 @@ def _get_remote_diff(ds, current_commit, remote, remote_branch_name):
#else:
# if commits differ at all
lgr.debug("Since no paths provided, comparing commits")
diff = current_commit != remote_ref.commit
diff = current_commit != remote_ref.commit.hexsha
else:
lgr.debug("Remote '%s' has no branch matching %r. Will publish",
remote, remote_branch_name)
Expand Down
2 changes: 1 addition & 1 deletion datalad/distribution/tests/test_create_test_dataset.py
Expand Up @@ -88,4 +88,4 @@ def test_hierarchy(topdir):
# each one should have 2 commits (but the last one)-- one for file and
# another one for sub-dataset
repo = GitRepo(ds)
eq_(len(list(repo.get_branch_commits())), 1 + int(ids<2))
eq_(len(list(repo.get_branch_commits_())), 1 + int(ids<2))
72 changes: 36 additions & 36 deletions datalad/distribution/tests/test_publish.py
Expand Up @@ -133,8 +133,8 @@ def test_publish_simple(origin, src_path, dst_path):

ok_clean_git(source.repo, annex=None)
ok_clean_git(target, annex=None)
eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))

# don't fail when doing it again
res = publish(dataset=source, to="target")
Expand All @@ -143,10 +143,10 @@ def test_publish_simple(origin, src_path, dst_path):

ok_clean_git(source.repo, annex=None)
ok_clean_git(target, annex=None)
eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits("git-annex")),
list(source.repo.get_branch_commits("git-annex")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))
eq_(list(target.get_branch_commits_("git-annex")),
list(source.repo.get_branch_commits_("git-annex")))

# 'target/master' should be tracking branch at this point, so
# try publishing without `to`:
Expand All @@ -163,15 +163,15 @@ def test_publish_simple(origin, src_path, dst_path):
eq_(res, [source])

ok_clean_git(dst_path, annex=None)
eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))
# Since git-annex 6.20170220, post-receive hook gets triggered
# which results in entry being added for that repo into uuid.log on remote
# end since then finally git-annex senses that it needs to init that remote,
# so it might have 1 more commit than local.
# see https://github.com/datalad/datalad/issues/1319
ok_(set(source.repo.get_branch_commits("git-annex")).issubset(
set(target.get_branch_commits("git-annex"))))
ok_(set(source.repo.get_branch_commits_("git-annex")).issubset(
set(target.get_branch_commits_("git-annex"))))

eq_(source.repo.fsck(), source.repo.fsck(remote='target'))

Expand All @@ -198,8 +198,8 @@ def test_publish_plain_git(origin, src_path, dst_path):

ok_clean_git(source.repo, annex=None)
ok_clean_git(target, annex=None)
eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))

# don't fail when doing it again
res = publish(dataset=source, to="target")
Expand All @@ -208,8 +208,8 @@ def test_publish_plain_git(origin, src_path, dst_path):

ok_clean_git(source.repo, annex=None)
ok_clean_git(target, annex=None)
eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))

# some modification:
with open(opj(src_path, 'test_mod_file'), "w") as f:
Expand All @@ -222,8 +222,8 @@ def test_publish_plain_git(origin, src_path, dst_path):
eq_(res, [source])

ok_clean_git(dst_path, annex=None)
eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))

# amend and change commit msg in order to test for force push:
source.repo.commit("amended", options=['--amend'])
Expand Down Expand Up @@ -299,32 +299,32 @@ def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub
eq_({r['path'] for r in res},
{src_path, sub1.path, sub2.path})

eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits("git-annex")),
list(source.repo.get_branch_commits("git-annex")))
eq_(list(sub1_target.get_branch_commits("master")),
list(sub1.get_branch_commits("master")))
eq_(list(sub1_target.get_branch_commits("git-annex")),
list(sub1.get_branch_commits("git-annex")))
eq_(list(sub2_target.get_branch_commits("master")),
list(sub2.get_branch_commits("master")))
eq_(list(sub2_target.get_branch_commits("git-annex")),
list(sub2.get_branch_commits("git-annex")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))
eq_(list(target.get_branch_commits_("git-annex")),
list(source.repo.get_branch_commits_("git-annex")))
eq_(list(sub1_target.get_branch_commits_("master")),
list(sub1.get_branch_commits_("master")))
eq_(list(sub1_target.get_branch_commits_("git-annex")),
list(sub1.get_branch_commits_("git-annex")))
eq_(list(sub2_target.get_branch_commits_("master")),
list(sub2.get_branch_commits_("master")))
eq_(list(sub2_target.get_branch_commits_("git-annex")),
list(sub2.get_branch_commits_("git-annex")))

# we are tracking origin but origin has different git-annex, since we
# cloned from it, so it is not aware of our git-annex
neq_(list(origin.repo.get_branch_commits("git-annex")),
list(source.repo.get_branch_commits("git-annex")))
neq_(list(origin.repo.get_branch_commits_("git-annex")),
list(source.repo.get_branch_commits_("git-annex")))
# So if we first publish to it recursively, we would update
# all sub-datasets since git-annex branch would need to be pushed
res_ = publish(dataset=source, recursive=True)
assert_result_count(res_, 1, status='ok', path=source.path)
assert_result_count(res_, 1, status='ok', path=sub1.path)
assert_result_count(res_, 1, status='ok', path=sub2.path)
# and now should carry the same state for git-annex
eq_(list(origin.repo.get_branch_commits("git-annex")),
list(source.repo.get_branch_commits("git-annex")))
eq_(list(origin.repo.get_branch_commits_("git-annex")),
list(source.repo.get_branch_commits_("git-annex")))

# test for publishing with --since. By default since no changes, nothing pushed
res_ = publish(dataset=source, recursive=True)
Expand Down Expand Up @@ -444,8 +444,8 @@ def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_c
eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path]))
# XXX master was not checked out in dst!

eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))
# TODO: last commit in git-annex branch differs. Probably fine,
# but figure out, when exactly to expect this for proper testing:
# yoh: they differ because local annex records information about now
Expand All @@ -454,8 +454,8 @@ def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_c
# different commits. I do not observe such behavior of remote having git-annex
# automagically updated in older clones
# which do not have post-receive hook on remote side
eq_(list(target.get_branch_commits("git-annex"))[1:],
list(source.repo.get_branch_commits("git-annex"))[1:])
eq_(list(target.get_branch_commits_("git-annex"))[1:],
list(source.repo.get_branch_commits_("git-annex"))[1:])

# we need compare target/master:
target.checkout("master")
Expand Down
14 changes: 7 additions & 7 deletions datalad/interface/tests/test_add_archive_content.py
Expand Up @@ -470,9 +470,9 @@ def test_add_delete_after_and_drop(self):
with assert_raises(Exception), \
swallow_logs():
self.annex.whereis(key1, key=True, output='full')
commits_prior = list(self.annex.get_branch_commits('git-annex'))
commits_prior = list(self.annex.get_branch_commits_('git-annex'))
add_archive_content('1.tar', annex=self.annex, strip_leading_dirs=True, delete_after=True)
commits_after = list(self.annex.get_branch_commits('git-annex'))
commits_after = list(self.annex.get_branch_commits_('git-annex'))
# There should be a single commit for all additions +1 to initiate datalad-archives gh-1258
# If faking dates, there should be another +1 because
# annex.alwayscommit isn't set to false.
Expand Down Expand Up @@ -504,15 +504,15 @@ def test_add_delete_after_and_drop_subdir(self):
with chpwd(self.annex.path):
# was failing since deleting without considering if tarball
# was extracted in that tarball directory
commits_prior_master = list(self.annex.get_branch_commits())
commits_prior = list(self.annex.get_branch_commits('git-annex'))
commits_prior_master = list(self.annex.get_branch_commits_())
commits_prior = list(self.annex.get_branch_commits_('git-annex'))
add_out = add_archive_content(
opj('subdir', '1.tar'),
delete_after=True,
drop_after=True)
ok_clean_git(self.annex.path)
commits_after_master = list(self.annex.get_branch_commits())
commits_after = list(self.annex.get_branch_commits('git-annex'))
commits_after_master = list(self.annex.get_branch_commits_())
commits_after = list(self.annex.get_branch_commits_('git-annex'))
# There should be a single commit for all additions +1 to
# initiate datalad-archives gh-1258. If faking dates,
# there should be another +1 because annex.alwayscommit
Expand All @@ -534,7 +534,7 @@ def test_add_delete_after_and_drop_subdir(self):
drop_after=True,
allow_dirty=True)
ok_clean_git(self.annex.path, untracked=['dummy.txt'])
assert_equal(len(list(self.annex.get_branch_commits())),
assert_equal(len(list(self.annex.get_branch_commits_())),
len(commits_prior_master))

# there should be no .datalad temporary files hanging around
Expand Down
52 changes: 13 additions & 39 deletions datalad/support/gitrepo.py
Expand Up @@ -2685,11 +2685,8 @@ def set_remote_url(self, name, url, push=False):
var = 'remote.{0}.{1}'.format(name, 'pushurl' if push else 'url')
self.config.set(var, url, where='local', reload=True)

def get_branch_commits(self, branch=None, limit=None, stop=None, value=None):
"""Return GitPython's commits for the branch
Pretty much similar to what 'git log <branch>' does.
It is a generator which returns top commits first
def get_branch_commits_(self, branch=None, limit=None, stop=None):
"""Return commit hexshas for a branch
Parameters
----------
Expand All @@ -2702,44 +2699,21 @@ def get_branch_commits(self, branch=None, limit=None, stop=None, value=None):
stop: str, optional
hexsha of the commit at which stop reporting (matched one is not
reported either)
value: None | 'hexsha', optional
What to yield. If None - entire commit object is yielded, if 'hexsha'
only its hexsha
"""
Yields
------
str
"""
cmd = ['rev-list']
if limit == 'left-only':
cmd.append('--left-only')
if not branch:
branch = self.get_active_branch()

try:
_branch = self.repo.branches[branch]
except IndexError:
raise MissingBranchError(self, branch,
[b.name for b in self.repo.branches])

fvalue = {None: lambda x: x, 'hexsha': lambda x: x.hexsha}[value]

if not limit:
def gen():
# traverse doesn't yield original commit
co = _branch.commit
yield co
for co_ in co.traverse():
yield co_
elif limit == 'left-only':
# we need a custom implementation since couldn't figure out how to
# do with .traversal
def gen():
co = _branch.commit
while co:
yield co
co = co.parents[0] if co.parents else None
else:
raise ValueError(limit)

for c in gen():
if stop and c.hexsha == stop:
cmd.append(branch)
for r in self.call_git_items_(cmd):
if stop and stop == r:
return
yield fvalue(c)
yield r

def checkout(self, name, options=None):
"""
Expand Down
4 changes: 2 additions & 2 deletions datalad/support/tests/test_annexrepo.py
Expand Up @@ -2060,8 +2060,8 @@ def test_fake_dates(path):
ar = AnnexRepo(path, create=True, fake_dates=True)
timestamp = ar.config.obtain("datalad.fake-dates-start") + 1
# Commits from the "git annex init" call are one second ahead.
for commit in ar.get_branch_commits("git-annex"):
eq_(timestamp, commit.committed_date)
for commit in ar.get_branch_commits_("git-annex"):
eq_(timestamp, int(ar.format_commit('%ct', commit)))
assert_in("timestamp={}s".format(timestamp),
ar.call_git(["cat-file", "blob", "git-annex:uuid.log"]))

Expand Down
18 changes: 4 additions & 14 deletions datalad/support/tests/test_gitrepo.py
Expand Up @@ -670,7 +670,7 @@ def test_GitRepo_ssh_push(repo_path, remote_path):
repo.push(remote="ssh-remote", refspec="ssh-test", force=True)
# correct commit message in remote:
assert_in("amended",
list(remote_repo.get_branch_commits('ssh-test'))[-1].summary)
list(remote_repo.get_branch_commits_('ssh-test'))[-1].summary)


@with_tempfile
Expand Down Expand Up @@ -900,28 +900,18 @@ def test_GitRepo_get_merge_base(src):


@with_tempfile(mkdir=True)
def test_GitRepo_git_get_branch_commits(src):
def test_GitRepo_git_get_branch_commits_(src):

repo = GitRepo(src, create=True)
with open(op.join(src, 'file.txt'), 'w') as f:
f.write('load')
repo.add('*')
repo.commit('committing')

commits_default = list(repo.get_branch_commits())
commits = list(repo.get_branch_commits('master'))
commits_default = list(repo.get_branch_commits_())
commits = list(repo.get_branch_commits_('master'))
eq_(commits, commits_default)

eq_(len(commits), 1)
commits_stop0 = list(repo.get_branch_commits(stop=commits[0].hexsha))
eq_(commits_stop0, [])
commits_hexsha = list(repo.get_branch_commits(value='hexsha'))
commits_hexsha_left = list(repo.get_branch_commits(value='hexsha', limit='left-only'))
eq_([commits[0].hexsha], commits_hexsha)
# our unittest is rudimentary ;-)
eq_(commits_hexsha_left, commits_hexsha)
repo.precommit() # to stop all the batched processes for swallow_outputs
raise SkipTest("TODO: Was more of a smoke test -- improve testing")


@with_testrepos(flavors=['local'])
Expand Down

0 comments on commit f2a0ee1

Please sign in to comment.