Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RF: No GitPython use in publish #4171

Merged
merged 2 commits into from Feb 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 8 additions & 20 deletions datalad/distribution/publish.py
Expand Up @@ -330,11 +330,11 @@ def _publish_dataset(ds, remote, refspec, paths, annex_copy_options, force=False
# changes
if not diff and is_annex_repo:
try:
git_annex_commit = next(ds.repo.get_branch_commits('git-annex'))
git_annex_commit = next(ds.repo.get_branch_commits_('git-annex'))
except StopIteration:
git_annex_commit = None
#diff = _get_remote_diff(ds, [], git_annex_commit, remote, 'git-annex')
diff = _get_remote_diff(ds, git_annex_commit, remote, 'git-annex')
diff = _get_remote_diff(ds.repo, git_annex_commit, remote, 'git-annex')
if diff:
lgr.info("Will publish updated git-annex")

Expand Down Expand Up @@ -539,28 +539,16 @@ def _get_remote_info(ds_path, ds_remote_info, to, missing):
ds_remote_info[ds_path] = {'remote': to}



def _get_remote_diff(ds, current_commit, remote, remote_branch_name):
#def _get_remote_diff(ds, paths, current_commit, remote, remote_branch_name):
def _get_remote_diff(repo, current_commit, remote, remote_branch_name):
"""Helper to check if remote has different state of the branch"""
if remote_branch_name in ds.repo.repo.remotes[remote].refs:
remote_ref = '/'.join((remote, remote_branch_name))
if remote_ref in repo.get_remote_branches():
lgr.debug("Testing for changes with respect to '%s' of remote '%s'",
remote_branch_name, remote)
if current_commit is None:
current_commit = ds.repo.repo.commit()
remote_ref = ds.repo.repo.remotes[remote].refs[remote_branch_name]
# XXX: ATM nothing calls this function with a non-empty `paths` arg
#if paths:
# # if there were custom paths, we will look at the diff
# lgr.debug("Since paths provided, looking at diff")
# diff = current_commit.diff(
# remote_ref,
# paths=paths
# )
#else:
# if commits differ at all
lgr.debug("Since no paths provided, comparing commits")
diff = current_commit != remote_ref.commit
current_commit = repo.get_hexsha()
remote_ref = repo.get_hexsha(remote_ref)
diff = current_commit != remote_ref
else:
lgr.debug("Remote '%s' has no branch matching %r. Will publish",
remote, remote_branch_name)
Expand Down
2 changes: 1 addition & 1 deletion datalad/distribution/tests/test_create_test_dataset.py
Expand Up @@ -88,4 +88,4 @@ def test_hierarchy(topdir):
# each one should have 2 commits (but the last one)-- one for file and
# another one for sub-dataset
repo = GitRepo(ds)
eq_(len(list(repo.get_branch_commits())), 1 + int(ids<2))
eq_(len(list(repo.get_branch_commits_())), 1 + int(ids<2))
72 changes: 36 additions & 36 deletions datalad/distribution/tests/test_publish.py
Expand Up @@ -133,8 +133,8 @@ def test_publish_simple(origin, src_path, dst_path):

ok_clean_git(source.repo, annex=None)
ok_clean_git(target, annex=None)
eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))

# don't fail when doing it again
res = publish(dataset=source, to="target")
Expand All @@ -143,10 +143,10 @@ def test_publish_simple(origin, src_path, dst_path):

ok_clean_git(source.repo, annex=None)
ok_clean_git(target, annex=None)
eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits("git-annex")),
list(source.repo.get_branch_commits("git-annex")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))
eq_(list(target.get_branch_commits_("git-annex")),
list(source.repo.get_branch_commits_("git-annex")))

# 'target/master' should be tracking branch at this point, so
# try publishing without `to`:
Expand All @@ -163,15 +163,15 @@ def test_publish_simple(origin, src_path, dst_path):
eq_(res, [source])

ok_clean_git(dst_path, annex=None)
eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))
# Since git-annex 6.20170220, post-receive hook gets triggered
# which results in entry being added for that repo into uuid.log on remote
# end since then finally git-annex senses that it needs to init that remote,
# so it might have 1 more commit than local.
# see https://github.com/datalad/datalad/issues/1319
ok_(set(source.repo.get_branch_commits("git-annex")).issubset(
set(target.get_branch_commits("git-annex"))))
ok_(set(source.repo.get_branch_commits_("git-annex")).issubset(
set(target.get_branch_commits_("git-annex"))))

eq_(source.repo.fsck(), source.repo.fsck(remote='target'))

Expand All @@ -198,8 +198,8 @@ def test_publish_plain_git(origin, src_path, dst_path):

ok_clean_git(source.repo, annex=None)
ok_clean_git(target, annex=None)
eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))

# don't fail when doing it again
res = publish(dataset=source, to="target")
Expand All @@ -208,8 +208,8 @@ def test_publish_plain_git(origin, src_path, dst_path):

ok_clean_git(source.repo, annex=None)
ok_clean_git(target, annex=None)
eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))

# some modification:
with open(opj(src_path, 'test_mod_file'), "w") as f:
Expand All @@ -222,8 +222,8 @@ def test_publish_plain_git(origin, src_path, dst_path):
eq_(res, [source])

ok_clean_git(dst_path, annex=None)
eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))

# amend and change commit msg in order to test for force push:
source.repo.commit("amended", options=['--amend'])
Expand Down Expand Up @@ -299,32 +299,32 @@ def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub
eq_({r['path'] for r in res},
{src_path, sub1.path, sub2.path})

eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits("git-annex")),
list(source.repo.get_branch_commits("git-annex")))
eq_(list(sub1_target.get_branch_commits("master")),
list(sub1.get_branch_commits("master")))
eq_(list(sub1_target.get_branch_commits("git-annex")),
list(sub1.get_branch_commits("git-annex")))
eq_(list(sub2_target.get_branch_commits("master")),
list(sub2.get_branch_commits("master")))
eq_(list(sub2_target.get_branch_commits("git-annex")),
list(sub2.get_branch_commits("git-annex")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))
eq_(list(target.get_branch_commits_("git-annex")),
list(source.repo.get_branch_commits_("git-annex")))
eq_(list(sub1_target.get_branch_commits_("master")),
list(sub1.get_branch_commits_("master")))
eq_(list(sub1_target.get_branch_commits_("git-annex")),
list(sub1.get_branch_commits_("git-annex")))
eq_(list(sub2_target.get_branch_commits_("master")),
list(sub2.get_branch_commits_("master")))
eq_(list(sub2_target.get_branch_commits_("git-annex")),
list(sub2.get_branch_commits_("git-annex")))

# we are tracking origin but origin has different git-annex, since we
# cloned from it, so it is not aware of our git-annex
neq_(list(origin.repo.get_branch_commits("git-annex")),
list(source.repo.get_branch_commits("git-annex")))
neq_(list(origin.repo.get_branch_commits_("git-annex")),
list(source.repo.get_branch_commits_("git-annex")))
# So if we first publish to it recursively, we would update
# all sub-datasets since git-annex branch would need to be pushed
res_ = publish(dataset=source, recursive=True)
assert_result_count(res_, 1, status='ok', path=source.path)
assert_result_count(res_, 1, status='ok', path=sub1.path)
assert_result_count(res_, 1, status='ok', path=sub2.path)
# and now should carry the same state for git-annex
eq_(list(origin.repo.get_branch_commits("git-annex")),
list(source.repo.get_branch_commits("git-annex")))
eq_(list(origin.repo.get_branch_commits_("git-annex")),
list(source.repo.get_branch_commits_("git-annex")))

# test for publishing with --since. By default since no changes, nothing pushed
res_ = publish(dataset=source, recursive=True)
Expand Down Expand Up @@ -444,8 +444,8 @@ def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_c
eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path]))
# XXX master was not checked out in dst!

eq_(list(target.get_branch_commits("master")),
list(source.repo.get_branch_commits("master")))
eq_(list(target.get_branch_commits_("master")),
list(source.repo.get_branch_commits_("master")))
# TODO: last commit in git-annex branch differs. Probably fine,
# but figure out, when exactly to expect this for proper testing:
# yoh: they differ because local annex records information about now
Expand All @@ -454,8 +454,8 @@ def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_c
# different commits. I do not observe such behavior of remote having git-annex
# automagically updated in older clones
# which do not have post-receive hook on remote side
eq_(list(target.get_branch_commits("git-annex"))[1:],
list(source.repo.get_branch_commits("git-annex"))[1:])
eq_(list(target.get_branch_commits_("git-annex"))[1:],
list(source.repo.get_branch_commits_("git-annex"))[1:])

# we need compare target/master:
target.checkout("master")
Expand Down
14 changes: 7 additions & 7 deletions datalad/interface/tests/test_add_archive_content.py
Expand Up @@ -470,9 +470,9 @@ def test_add_delete_after_and_drop(self):
with assert_raises(Exception), \
swallow_logs():
self.annex.whereis(key1, key=True, output='full')
commits_prior = list(self.annex.get_branch_commits('git-annex'))
commits_prior = list(self.annex.get_branch_commits_('git-annex'))
add_archive_content('1.tar', annex=self.annex, strip_leading_dirs=True, delete_after=True)
commits_after = list(self.annex.get_branch_commits('git-annex'))
commits_after = list(self.annex.get_branch_commits_('git-annex'))
# There should be a single commit for all additions +1 to initiate datalad-archives gh-1258
# If faking dates, there should be another +1 because
# annex.alwayscommit isn't set to false.
Expand Down Expand Up @@ -504,15 +504,15 @@ def test_add_delete_after_and_drop_subdir(self):
with chpwd(self.annex.path):
# was failing since deleting without considering if tarball
# was extracted in that tarball directory
commits_prior_master = list(self.annex.get_branch_commits())
commits_prior = list(self.annex.get_branch_commits('git-annex'))
commits_prior_master = list(self.annex.get_branch_commits_())
commits_prior = list(self.annex.get_branch_commits_('git-annex'))
add_out = add_archive_content(
opj('subdir', '1.tar'),
delete_after=True,
drop_after=True)
ok_clean_git(self.annex.path)
commits_after_master = list(self.annex.get_branch_commits())
commits_after = list(self.annex.get_branch_commits('git-annex'))
commits_after_master = list(self.annex.get_branch_commits_())
commits_after = list(self.annex.get_branch_commits_('git-annex'))
# There should be a single commit for all additions +1 to
# initiate datalad-archives gh-1258. If faking dates,
# there should be another +1 because annex.alwayscommit
Expand All @@ -534,7 +534,7 @@ def test_add_delete_after_and_drop_subdir(self):
drop_after=True,
allow_dirty=True)
ok_clean_git(self.annex.path, untracked=['dummy.txt'])
assert_equal(len(list(self.annex.get_branch_commits())),
assert_equal(len(list(self.annex.get_branch_commits_())),
len(commits_prior_master))

# there should be no .datalad temporary files hanging around
Expand Down
52 changes: 13 additions & 39 deletions datalad/support/gitrepo.py
Expand Up @@ -2685,11 +2685,8 @@ def set_remote_url(self, name, url, push=False):
var = 'remote.{0}.{1}'.format(name, 'pushurl' if push else 'url')
self.config.set(var, url, where='local', reload=True)

def get_branch_commits(self, branch=None, limit=None, stop=None, value=None):
"""Return GitPython's commits for the branch

Pretty much similar to what 'git log <branch>' does.
It is a generator which returns top commits first
def get_branch_commits_(self, branch=None, limit=None, stop=None):
"""Return commit hexshas for a branch

Parameters
----------
Expand All @@ -2702,44 +2699,21 @@ def get_branch_commits(self, branch=None, limit=None, stop=None, value=None):
stop: str, optional
hexsha of the commit at which stop reporting (matched one is not
reported either)
value: None | 'hexsha', optional
What to yield. If None - entire commit object is yielded, if 'hexsha'
only its hexsha
"""

Yields
------
str
"""
cmd = ['rev-list']
if limit == 'left-only':
cmd.append('--left-only')
if not branch:
branch = self.get_active_branch()

try:
_branch = self.repo.branches[branch]
except IndexError:
raise MissingBranchError(self, branch,
[b.name for b in self.repo.branches])

fvalue = {None: lambda x: x, 'hexsha': lambda x: x.hexsha}[value]

if not limit:
def gen():
# traverse doesn't yield original commit
co = _branch.commit
yield co
for co_ in co.traverse():
yield co_
elif limit == 'left-only':
# we need a custom implementation since couldn't figure out how to
# do with .traversal
def gen():
co = _branch.commit
while co:
yield co
co = co.parents[0] if co.parents else None
else:
raise ValueError(limit)

for c in gen():
if stop and c.hexsha == stop:
cmd.append(branch)
for r in self.call_git_items_(cmd):
if stop and stop == r:
return
yield fvalue(c)
yield r

def checkout(self, name, options=None):
"""
Expand Down
4 changes: 2 additions & 2 deletions datalad/support/tests/test_annexrepo.py
Expand Up @@ -2060,8 +2060,8 @@ def test_fake_dates(path):
ar = AnnexRepo(path, create=True, fake_dates=True)
timestamp = ar.config.obtain("datalad.fake-dates-start") + 1
# Commits from the "git annex init" call are one second ahead.
for commit in ar.get_branch_commits("git-annex"):
eq_(timestamp, commit.committed_date)
for commit in ar.get_branch_commits_("git-annex"):
eq_(timestamp, int(ar.format_commit('%ct', commit)))
assert_in("timestamp={}s".format(timestamp),
ar.call_git(["cat-file", "blob", "git-annex:uuid.log"]))

Expand Down
21 changes: 7 additions & 14 deletions datalad/support/tests/test_gitrepo.py
Expand Up @@ -670,7 +670,10 @@ def test_GitRepo_ssh_push(repo_path, remote_path):
repo.push(remote="ssh-remote", refspec="ssh-test", force=True)
# correct commit message in remote:
assert_in("amended",
list(remote_repo.get_branch_commits('ssh-test'))[-1].summary)
remote_repo.format_commit(
'%s',
list(remote_repo.get_branch_commits_('ssh-test'))[-1]
))


@with_tempfile
Expand Down Expand Up @@ -900,28 +903,18 @@ def test_GitRepo_get_merge_base(src):


@with_tempfile(mkdir=True)
def test_GitRepo_git_get_branch_commits(src):
def test_GitRepo_git_get_branch_commits_(src):

repo = GitRepo(src, create=True)
with open(op.join(src, 'file.txt'), 'w') as f:
f.write('load')
repo.add('*')
repo.commit('committing')

commits_default = list(repo.get_branch_commits())
commits = list(repo.get_branch_commits('master'))
commits_default = list(repo.get_branch_commits_())
commits = list(repo.get_branch_commits_('master'))
eq_(commits, commits_default)

eq_(len(commits), 1)
commits_stop0 = list(repo.get_branch_commits(stop=commits[0].hexsha))
eq_(commits_stop0, [])
commits_hexsha = list(repo.get_branch_commits(value='hexsha'))
commits_hexsha_left = list(repo.get_branch_commits(value='hexsha', limit='left-only'))
eq_([commits[0].hexsha], commits_hexsha)
# our unittest is rudimentary ;-)
eq_(commits_hexsha_left, commits_hexsha)
repo.precommit() # to stop all the batched processes for swallow_outputs
raise SkipTest("TODO: Was more of a smoke test -- improve testing")


@with_testrepos(flavors=['local'])
Expand Down