Skip to content

Commit

Permalink
Merge commit '0.16.6-17-g9e147092b' into merge-maint
Browse files Browse the repository at this point in the history
* commit '0.16.6-17-g9e147092b': (27 commits)
  Upload coverage from extension tests
  limit chardet version to <5.0.0
  limit chardet version to <5.0.0
  implement call_git with call_git_items_
  refactor runner.utils tests
  keep line endings intact as long as possible
  add a test for call_git_items_ results
  do not skip file-URL tests on windows
  adapt tests to modified GitRepo.call_git-semantic
  Update RST changelog
  Update CHANGELOG.md
  BF(workaround): skip test_ria_postclonecfg on OSX for now
  prevent result rendering in recursive search-call
  DOC: Finalize adjustment of the docstring for _call_git to be factually correct
  refactor thread safety tests
  refactor thread-safety tests
  remove unnecessary assignment
  fix exception recording
  remove debug print, use test's member check
  use assert_raises from datalad test utils
  ...

 Conflicts:
	.github/workflows/test_extensions.yml - we added custom installation of nose in master and that conflicted with changes in maint
	datalad/core/distributed/tests/test_clone.py - just a comment conflicted, not clear why git was not smart enough
	datalad/dataset/tests/test_gitrepo.py - also removed unused subprocess import and adjusted signature of new tests to make kwargs
  • Loading branch information
yarikoptic committed Jul 1, 2022
2 parents 797dde3 + 9e14709 commit 05e6cfa
Show file tree
Hide file tree
Showing 14 changed files with 413 additions and 105 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/test_extensions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -r requirements-devel.txt
# we are using pytest here but extensions still tested with nose
pip install nose
- name: Install ${{ matrix.extension }} extension
run: |
Expand All @@ -65,3 +66,9 @@ jobs:
python -m nose -s -v --with-cov --cover-package datalad $(echo ${{ matrix.extension }} | tr '-' '_')
# TODO: later whenever some extensions would migrate to pytest -- use pytest
# python -m pytest -c ../tox.ini -s -v --cov=datalad --pyargs $(echo ${{ matrix.extension }} | tr '-' '_')
python -m coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
directory: __testhome__
fail_ci_if_error: false
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# 0.16.6 (Tue Jun 14 2022)

#### 🐛 Bug Fix

- Prevent duplicated result rendering when searching in default datasets [#6765](https://github.com/datalad/datalad/pull/6765) ([@christian-monch](https://github.com/christian-monch))
- BF(workaround): skip test_ria_postclonecfg on OSX for now ([@yarikoptic](https://github.com/yarikoptic))
- BF(workaround to #6759): if saving credential failed, just log error and continue [#6762](https://github.com/datalad/datalad/pull/6762) ([@yarikoptic](https://github.com/yarikoptic))
- Prevent reentry of a runner instance [#6737](https://github.com/datalad/datalad/pull/6737) ([@christian-monch](https://github.com/christian-monch))

#### Authors: 2

- Christian Mönch ([@christian-monch](https://github.com/christian-monch))
- Yaroslav Halchenko ([@yarikoptic](https://github.com/yarikoptic))

---

# 0.16.5 (Wed Jun 08 2022)

#### 🐛 Bug Fix
Expand Down
44 changes: 33 additions & 11 deletions datalad/dataset/gitrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def _generator_call_git(self,
----------
sep : str, optional
Use `sep` as line separator. Does not create an empty last line if
the input ends on sep.
the input ends on sep. The lines contain the separator, if it exists.
All other parameters match those described for `call_git`.
Expand Down Expand Up @@ -338,14 +338,14 @@ def pipe_data_received(self, fd, data):
env=env)

line_splitter = {
STDOUT_FILENO: LineSplitter(sep),
STDERR_FILENO: LineSplitter(sep)
STDOUT_FILENO: LineSplitter(sep, keep_ends=True),
STDERR_FILENO: LineSplitter(sep, keep_ends=True)
}

for file_no, content in generator:
if file_no in (STDOUT_FILENO, STDERR_FILENO):
for line in line_splitter[file_no].process(content):
yield file_no, line + "\n"
yield file_no, line
else:
raise ValueError(f"unknown file number: {file_no}")

Expand All @@ -364,9 +364,10 @@ def _call_git(self,
"""Allows for calling arbitrary commands.
Internal helper to the call_git*() methods.
Unlike call_git, _call_git returns both stdout and stderr.
The parameters, return value, and raised exceptions match those
documented for `call_git`.
documented for `call_git`, with the exception of env, which allows to
specify the custom environment (variables) to be used.
"""
runner = self._git_runner
stderr_log_level = {True: 5, False: 11}[expect_stderr]
Expand Down Expand Up @@ -429,12 +430,13 @@ def call_git(self, args, files=None,
------
CommandError if the call exits with a non-zero status.
"""
return "\n".join(
return "".join(
self.call_git_items_(args,
files,
expect_stderr=expect_stderr,
expect_fail=expect_fail,
read_only=read_only))
read_only=read_only,
keep_ends=True))

def call_git_items_(self,
args,
Expand All @@ -443,7 +445,8 @@ def call_git_items_(self,
expect_fail=False,
env=None,
read_only=False,
sep=None):
sep=None,
keep_ends=False):
"""
Call git, yield output lines when available. Output lines are split
at line ends or `sep` if `sep` is not None.
Expand All @@ -458,7 +461,20 @@ def call_git_items_(self,
Returns
-------
Generator that yields stdout items.
Generator that yields stdout items, i.e. lines with the line ending or
separator removed.
Please note, this method is meant to be used to process output that is
meant for 'interactive' interpretation. It is not intended to return
stdout from a command like "git cat-file". The reason is that
it strips of the line endings (or separator) from the result lines,
unless 'keep_ends' is True. If 'keep_ends' is False, you will not know
which line ending was stripped (if 'separator' is None) or whether a
line ending (or separator) was stripped at all, because the last line
may not have a line ending (or separator).
If you want to reliably recreate the output set 'keep_ends' to True and
"".join() the result, or use 'GitRepo.call_git()' instead.
Raises
------
Expand All @@ -481,7 +497,13 @@ def call_git_items_(self,
env=env,
sep=sep):
if file_no == STDOUT_FILENO:
yield line.rstrip("\n")
if keep_ends is True:
yield line
else:
if sep:
yield line.rstrip(sep)
else:
yield line.rstrip()
else:
stderr_lines.append(line)

Expand Down
56 changes: 56 additions & 0 deletions datalad/dataset/tests/test_gitrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,3 +320,59 @@ def test_get_dot_git(emptycase=None, gitdircase=None, barecase=None, gitfilecase

eq_(_get_dot_git(gitfilecase, resolved=r),
(gitfilecase.resolve() if r else gitfilecase) / 'subdir')


file1_content = "file1 content\n"
file2_content = "file2 content\0"
example_tree = {
"file1": file1_content,
"file2": file2_content
}


def _create_test_gitrepo(temp_dir):
repo = GitRepo(temp_dir)
repo.init()
repo.call_git(["add", "."])
repo.call_git(["commit", "-m", "test commit"])

hash_keys = tuple(
repo.call_git(["hash-object", file_name]).strip()
for file_name in ("file1", "file2")
)
return repo, hash_keys


@with_tree(tree=example_tree)
def test_call_git_items(temp_dir=None):
# check proper handling of separator in call_git_items_
repo, (hash1, hash2) = _create_test_gitrepo(temp_dir)

expected_tree_lines = (
f'100644 blob {hash1}\tfile1',
f'100644 blob {hash2}\tfile2'
)

assert_equal(
expected_tree_lines,
tuple(repo.call_git_items_(["ls-tree", "HEAD"]))
)

assert_equal(
expected_tree_lines,
tuple(repo.call_git_items_(["ls-tree", "-z", "HEAD"], sep="\0"))
)


@with_tree(tree=example_tree)
def test_call_git_call_git_items_identity(temp_dir=None):
# Ensure that git_call() and "".join(call_git_items_(..., keep_ends=True))
# yield the same result and that the result is identical to the file content

repo, hash_keys = _create_test_gitrepo(temp_dir)
args = ["cat-file", "-p"]
for hash_key, content in zip(hash_keys, (file1_content, file2_content)):
r_item = "".join(repo.call_git_items_(args + [hash_key], keep_ends=True))
r_no_item = repo.call_git(args, [hash_key])
assert_equal(r_item, r_no_item)
assert_equal(r_item, content)
5 changes: 4 additions & 1 deletion datalad/downloaders/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,10 @@ def _ask_field_value(self, f, instructions=None):

def _ask_and_set(self, f, instructions=None):
v = self._ask_field_value(f, instructions=instructions)
self.set(**{f: v})
try:
self.set(**{f: v})
except Exception as e:
lgr.error("Failed to record credential field %r: %s", f, CapturedException(e))
return v

def enter_new(self, instructions=None, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion datalad/metadata/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def _search_from_virgin_install(dataset, query):
"Performing search using DataLad superdataset %r",
default_ds.path
)
for res in default_ds.search(query):
for res in default_ds.search(query, result_renderer="disabled"):
yield res
return
else:
Expand Down
23 changes: 22 additions & 1 deletion datalad/runner/nonasyncrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import enum
import logging
import subprocess
import threading
import time
from collections import deque
from collections.abc import Generator
Expand Down Expand Up @@ -115,6 +116,7 @@ def send(self, _):
# state: GeneratorState.process_exited.
if len(self.result_queue) > 0:
return self.result_queue.popleft()
self.runner.generator = None
raise StopIteration(self.return_code)

def throw(self, exception_type, value=None, trace_back=None):
Expand Down Expand Up @@ -237,11 +239,14 @@ def __init__(self,
self.result = None
self.process_removed = False
self.return_code = None
self.generator = None

self.last_touched = dict()
self.active_file_numbers = set()
self.stall_check_interval = 10

self.initialization_lock = threading.Lock()

def _check_result(self):
if self.exception_on_error is True:
if self.return_code != 0:
Expand All @@ -262,6 +267,13 @@ def run(self) -> Union[Any, Generator]:
"""
Run the command as specified in __init__.
This method is not re-entrant. Furthermore, if the protocol is a
subclass of `GeneratorMixIn`, and the generator has not been
exhausted, i.e. it has not raised `StopIteration`, this method should
not be called again. If it is called again before the generator is
exhausted, a `RuntimeError` is raised. In the non-generator case, a
second caller will be suspended until the first caller has returned.
Returns
-------
Any
Expand Down Expand Up @@ -291,6 +303,13 @@ def run(self) -> Union[Any, Generator]:
# get the return code of the process
result = gen.return_code
"""
with self.initialization_lock:
return self._locked_run()

def _locked_run(self) -> Union[Any, Generator]:
if self.generator is not None:
raise RuntimeError("ThreadedRunner.run() was re-entered")

if isinstance(self.stdin, (int, IO, type(None))):
# We will not write anything to stdin. If the caller passed a
# file-like he can write to it from a different thread.
Expand Down Expand Up @@ -346,6 +365,7 @@ def run(self) -> Union[Any, Generator]:
"information."
)
raise

self.process_running = True
self.active_file_numbers.add(None)

Expand Down Expand Up @@ -430,7 +450,8 @@ def run(self) -> Union[Any, Generator]:
self.process_waiting_thread.start()

if issubclass(self.protocol_class, GeneratorMixIn):
return _ResultGenerator(self, self.protocol.result_queue)
self.generator = _ResultGenerator(self, self.protocol.result_queue)
return self.generator

return self.process_loop()

Expand Down

0 comments on commit 05e6cfa

Please sign in to comment.