Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First batch of GitPython pruning #2902

Merged
merged 15 commits into from
Oct 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 8 additions & 23 deletions datalad/interface/rerun.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
from datalad.consts import PRE_INIT_COMMIT_SHA

from datalad.support.constraints import EnsureNone, EnsureStr
from datalad.support.gitrepo import GitCommandError
from datalad.support.param import Parameter
from datalad.support.json_py import load_stream

Expand Down Expand Up @@ -212,7 +211,7 @@ def __call__(
message="branch '{}' already exists".format(branch))
return

if not commit_exists(ds, revision + "^"):
if not ds.repo.commit_exists(revision + "^"):
# Only a single commit is reachable from `revision`. In
# this case, --since has no effect on the range construction.
revrange = revision
Expand Down Expand Up @@ -244,7 +243,7 @@ def __call__(
def _revs_as_results(dset, revs):
for rev in revs:
res = get_status_dict("run", ds=dset, commit=rev)
full_msg = dset.repo.repo.git.show(rev, "--format=%B", "--no-patch")
full_msg = dset.repo.format_commit("%B", rev)
try:
msg, info = get_run_info(dset, full_msg)
except ValueError as exc:
Expand Down Expand Up @@ -294,7 +293,7 @@ def _rerun_as_results(dset, revrange, since, branch, onto, message):
# that, regardless of if and how --since is specified, the effective
# value for --since is the parent of the first revision.
onto = results[0]["commit"] + "^"
if not commit_exists(dset, onto):
if not dset.repo.commit_exists(onto):
# This is unlikely to happen in the wild because it means that the
# first commit is a datalad run commit. Just abort rather than
# trying to checkout on orphan branch or something like that.
Expand All @@ -314,20 +313,15 @@ def _rerun_as_results(dset, revrange, since, branch, onto, message):
status="ok")

def rev_is_ancestor(rev):
try:
dset.repo.repo.git.merge_base("--is-ancestor", rev, start_point)
except GitCommandError:
# Revision is NOT an ancestor of the starting point.
return False
return True
return dset.repo.is_ancestor(rev, start_point)

# We want to skip revs before the starting point and pick those after.
to_pick = set(dropwhile(rev_is_ancestor, [r["commit"] for r in results]))

def skip_or_pick(hexsha, result, msg):
pick = hexsha in to_pick
result["rerun_action"] = "pick" if pick else "skip"
shortrev = dset.repo.repo.git.rev_parse("--short", hexsha)
shortrev = dset.repo.get_hexsha(hexsha, short=True)
result["message"] = (
"%s %s; %s",
shortrev, msg, "cherry picking" if pick else "skipping")
Expand Down Expand Up @@ -407,8 +401,7 @@ def _report(dset, results):
res["diff"] = list(res["diff"])
# Add extra information that is useful in the report but not
# needed for the rerun.
out = dset.repo.repo.git.show(
"--no-patch", "--format=%an%x00%aI", res["commit"])
out = dset.repo.format_commit("%an%x00%aI", res["commit"])
res["author"], res["date"] = out.split("\0")
yield res

Expand All @@ -428,7 +421,7 @@ def fn(dset, results):
ofh.write(header.format(
script=script,
since="" if since is None else " --since=" + since,
revision=dset.repo.repo.git.rev_parse(revision),
revision=dset.repo.get_hexsha(revision),
ds='dataset {} at '.format(dset.id) if dset.id else '',
path=dset.path))

Expand Down Expand Up @@ -537,7 +530,7 @@ def diff_revision(dataset, revision="HEAD"):
-------
Generator that yields AnnotatePaths instances
"""
if commit_exists(dataset, revision + "^"):
if dataset.repo.commit_exists(revision + "^"):
revrange = "{rev}^..{rev}".format(rev=revision)
else:
# No other commits are reachable from this revision. Diff
Expand All @@ -557,11 +550,3 @@ def new_or_modified(diff_results):
if r.get('type') == 'file' and r.get('state') in ['added', 'modified']:
r.pop('status', None)
yield r


def commit_exists(dataset, commit):
try:
dataset.repo.repo.git.rev_parse("--verify", commit + "^{commit}")
except:
return False
return True
3 changes: 2 additions & 1 deletion datalad/interface/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,8 @@ def run_command(cmd, dataset=None, inputs=None, outputs=None, expand=None,
msg = assure_bytes(msg)

if not rerun_info and cmd_exitcode:
msg_path = opj(relpath(ds.repo.repo.git_dir), "COMMIT_EDITMSG")
msg_path = relpath(opj(ds.repo.path, ds.repo.get_git_dir(ds.repo),
"COMMIT_EDITMSG"))
with open(msg_path, "wb") as ofh:
ofh.write(msg)
lgr.info("The command had a non-zero exit code. "
Expand Down
18 changes: 9 additions & 9 deletions datalad/interface/tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,16 +262,16 @@ def test_rerun_onto(path):
# same (but detached) place.
ds.rerun(revision="static", onto="static")
ok_(ds.repo.get_active_branch() is None)
eq_(ds.repo.repo.git.rev_parse("HEAD"),
ds.repo.repo.git.rev_parse("static"))
eq_(ds.repo.get_hexsha(),
ds.repo.get_hexsha("static"))

# If we run the "static" change from the same "base", we end up
# with a new commit.
ds.repo.checkout("master")
ds.rerun(revision="static", onto="static^")
ok_(ds.repo.get_active_branch() is None)
neq_(ds.repo.repo.git.rev_parse("HEAD"),
ds.repo.repo.git.rev_parse("static"))
neq_(ds.repo.get_hexsha(),
ds.repo.get_hexsha("static"))
assert_result_count(ds.diff(revision="HEAD..static"), 0)
for revrange in ["..static", "static.."]:
assert_result_count(
Expand All @@ -282,8 +282,8 @@ def test_rerun_onto(path):
ds.repo.checkout("master")
ds.rerun(onto="HEAD")
ok_(ds.repo.get_active_branch() is None)
neq_(ds.repo.repo.git.rev_parse("HEAD"),
ds.repo.repo.git.rev_parse("master"))
neq_(ds.repo.get_hexsha(),
ds.repo.get_hexsha("master"))

# An empty `onto` means use the parent of the first revision.
ds.repo.checkout("master")
Expand All @@ -300,7 +300,7 @@ def test_rerun_onto(path):
eq_(ds.repo.get_active_branch(), "from-base")
assert_result_count(ds.diff(revision="master..from-base"), 0)
eq_(ds.repo.get_merge_base(["static", "from-base"]),
ds.repo.repo.git.rev_parse("static^"))
ds.repo.get_hexsha("static^"))


@ignore_nose_capturing_stdout
Expand Down Expand Up @@ -391,7 +391,7 @@ def test_run_failure(path):
eq_(hexsha_initial, ds.repo.get_hexsha())
ok_(ds.repo.dirty)

msgfile = opj(ds.repo.repo.git_dir, "COMMIT_EDITMSG")
msgfile = opj(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG")
ok_exists(msgfile)

ds.add(".", save=False)
Expand Down Expand Up @@ -449,7 +449,7 @@ def test_rerun_branch(path):
assert_result_count(
ds.repo.repo.git.rev_list(revrange).split(), 3)
eq_(ds.repo.get_merge_base(["master", "rerun"]),
ds.repo.repo.git.rev_parse("prerun"))
ds.repo.get_hexsha("prerun"))

# Start rerun branch at tip of current branch.
ds.repo.checkout("master")
Expand Down
2 changes: 1 addition & 1 deletion datalad/interface/tests/test_save.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def test_save_message_file(path):
"msg": "add foo"})
ds.add("foo", save=False)
ds.save(message_file=opj(ds.path, "msg"))
assert_equal(ds.repo.repo.git.show("--format=%s", "--no-patch"),
assert_equal(ds.repo.format_commit("%s"),
"add foo")


Expand Down
94 changes: 83 additions & 11 deletions datalad/support/gitrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1341,32 +1341,61 @@ def get_indexed_files(self):
return [x[0] for x in self.cmd_call_wrapper(
self.repo.index.entries.keys)]

def get_hexsha(self, object=None):
"""Return a hexsha for a given object. If None - of current HEAD
def format_commit(self, fmt, commitish=None):
"""Return `git show` output for `commitish`.

Parameters
----------
object: str, optional
Any type of Git object identifier. See `git show`.
fmt : str
A format string accepted by `git show`.
commitish: str, optional
Any commit identifier (defaults to "HEAD").

Returns
-------
str or, if there are not commits yet, None.
"""
cmd = ['git', 'show', '--no-patch', "--format=%H"]
if object:
cmd.append(object)
cmd = ['git', 'show', '-z', '--no-patch', '--format=' + fmt]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, another one without -z. Thx for catching that!

if commitish is not None:
cmd.append(commitish + "^{commit}")
# make sure Git takes our argument as a revision
cmd.append('--')
try:
stdout, stderr = self._git_custom_command(
'', cmd, expect_stderr=True, expect_fail=True)
except CommandError as e:
if 'bad revision' in e.stderr:
raise ValueError("Unknown object identifier: %s" % object)
raise ValueError("Unknown commit identifier: %s" % commitish)
elif 'does not have any commits yet' in e.stderr:
return None
else:
raise e
stdout = stdout.splitlines()
assert(len(stdout) == 1)
return stdout[0]
# This trailing null is coming from the -z above, which avoids the
# newline that Git would append to the output. We could drop -z and
# strip the newline directly, but then we'd have to worry about
# compatibility across platforms.
return stdout.rsplit("\0", 1)[0]

def get_hexsha(self, commitish=None, short=False):
"""Return a hexsha for a given commitish.

Parameters
----------
commitish : str, optional
Any identifier that refers to a commit (defaults to "HEAD").
short : bool, optional
Return the abbreviated form of the hexsha.

Returns
-------
str or, if there are not commits yet, None.
"""
stdout = self.format_commit("%{}".format('h' if short else 'H'),
commitish)
if stdout is not None:
stdout = stdout.splitlines()
assert(len(stdout) == 1)
return stdout[0]

@normalize_paths(match_return_type=False)
def get_last_commit_hash(self, files):
Expand All @@ -1384,6 +1413,29 @@ def get_last_commit_hash(self, files):
return None
raise

def commit_exists(self, commitish):
"""Does `commitish` exist in the repo?

Parameters
----------
commitish : str
A commit or an object that can be dereferenced to one.

Returns
-------
bool
"""
try:
# Note: The peeling operator "^{commit}" is required so that
# rev-parse doesn't succeed if passed a full hexsha that is valid
# but doesn't exist.
self._git_custom_command(
"", ["git", "rev-parse", "--verify", commitish + "^{commit}"],
expect_fail=True)
except CommandError:
return False
return True

def get_merge_base(self, commitishes):
"""Get a merge base hexsha

Expand Down Expand Up @@ -1419,6 +1471,26 @@ def get_merge_base(self, commitishes):
assert(len(bases) == 1) # we do not do 'all' yet
return bases[0].hexsha

def is_ancestor(self, reva, revb):
"""Is `reva` an ancestor of `revb`?

Parameters
----------
reva, revb : str
Revisions.

Returns
-------
bool
"""
try:
self._git_custom_command(
"", ["git", "merge-base", "--is-ancestor", reva, revb],
expect_fail=True)
except CommandError:
return False
return True

def get_commit_date(self, branch=None, date='authored'):
"""Get the date stamp of the last commit (in a branch or head otherwise)

Expand Down
12 changes: 11 additions & 1 deletion datalad/support/tests/test_gitrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,7 +1066,8 @@ def _get_inodes(repo):
return dict(
[(os.path.join(*o.split(os.sep)[-2:]),
os.stat(o).st_ino)
for o in glob(os.path.join(repo.repo.git_dir,
for o in glob(os.path.join(repo.path,
repo.get_git_dir(repo),
'objects', '*', '*'))])

origin_inodes = _get_inodes(repo)
Expand Down Expand Up @@ -1240,6 +1241,15 @@ def test_gitattributes(path):
})


@with_tempfile(mkdir=True)
def test_get_hexsha_tag(path):
gr = GitRepo(path, create=True)
gr.commit(msg="msg", options=["--allow-empty"])
gr.tag("atag", message="atag msg")
# get_hexsha() dereferences a tag to a commit.
eq_(gr.get_hexsha("atag"), gr.get_hexsha())


@with_tempfile(mkdir=True)
def test_get_tags(path):
from mock import patch
Expand Down
2 changes: 1 addition & 1 deletion datalad/support/tests/test_repodates.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_check_dates(path):
ar.commit("add foo")
ar.tag("foo-tag", "tag before refdate")
# We can't use ar.get_tags because that returns the commit's hexsha,
# not the tag's.
# not the tag's, and ar.get_hexsha is limited to commit objects.
foo_tag = ar.repo.git.rev_parse("foo-tag")
# Make a lightweight tag to make sure `tag_dates` doesn't choke on it.
ar.tag("light")
Expand Down