Merge commit '0.16.6-17-g9e147092b' into merge-maint

* commit '0.16.6-17-g9e147092b': (27 commits) Upload coverage from extension tests limit chardet version to <5.0.0 limit chardet version to <5.0.0 implement call_git with call_git_items_ refactor runner.utils tests keep line endings intact as long as possible add a test for call_git_items_ results do not skip file-URL tests on windows adapt tests to modified GitRepo.call_git-semantic Update RST changelog Update CHANGELOG.md BF(workaround): skip test_ria_postclonecfg on OSX for now prevent result rendering in recursive search-call DOC: Finalize adjustment of the docstring for _call_git to be factually correct refactor thread safety tests refactor thread-safety tests remove unnecessary assignment fix exception recording remove debug print, use test's member check use assert_raises from datalad test utils ... Conflicts: .github/workflows/test_extensions.yml - we added custom installation of nose in master and that conflicted with changes in maint datalad/core/distributed/tests/test_clone.py - just a comment conflicted, not clear why git was not smart enough datalad/dataset/tests/test_gitrepo.py - also removed unused subprocess import and adjusted signature of new tests to make kwargs
datalad · Jul 1, 2022 · 05e6cfa · 05e6cfa
2 parents 797dde3 + 9e14709
commit 05e6cfa
Show file tree

Hide file tree

Showing 14 changed files with 413 additions and 105 deletions.
diff --git a/.github/workflows/test_extensions.yml b/.github/workflows/test_extensions.yml
@@ -38,7 +38,8 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -r requirements.txt
+        pip install -r requirements-devel.txt
+        # we are using pytest here but extensions still tested with nose
         pip install nose
     - name: Install ${{ matrix.extension }} extension
       run: |
@@ -65,3 +66,9 @@ jobs:
         python -m nose -s -v --with-cov --cover-package datalad $(echo ${{ matrix.extension }} | tr '-' '_')
         # TODO: later whenever some extensions would migrate to pytest -- use pytest
         # python -m pytest -c ../tox.ini -s -v --cov=datalad --pyargs $(echo ${{ matrix.extension }} | tr '-' '_')
+        python -m coverage xml
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v3
+      with:
+        directory: __testhome__
+        fail_ci_if_error: false
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,19 @@
+# 0.16.6 (Tue Jun 14 2022)
+
+#### 🐛 Bug Fix
+
+- Prevent duplicated result rendering when searching in default datasets [#6765](https://github.com/datalad/datalad/pull/6765) ([@christian-monch](https://github.com/christian-monch))
+- BF(workaround): skip test_ria_postclonecfg on OSX for now ([@yarikoptic](https://github.com/yarikoptic))
+- BF(workaround to #6759): if saving credential failed, just log error and continue [#6762](https://github.com/datalad/datalad/pull/6762) ([@yarikoptic](https://github.com/yarikoptic))
+- Prevent reentry of a runner instance [#6737](https://github.com/datalad/datalad/pull/6737) ([@christian-monch](https://github.com/christian-monch))
+
+#### Authors: 2
+
+- Christian Mönch ([@christian-monch](https://github.com/christian-monch))
+- Yaroslav Halchenko ([@yarikoptic](https://github.com/yarikoptic))
+
+---
+
 # 0.16.5 (Wed Jun 08 2022)
 
 #### 🐛 Bug Fix

diff --git a/datalad/dataset/gitrepo.py b/datalad/dataset/gitrepo.py
@@ -282,7 +282,7 @@ def _generator_call_git(self,
         ----------
         sep : str, optional
           Use `sep` as line separator. Does not create an empty last line if
-          the input ends on sep.
+          the input ends on sep. The lines contain the separator, if it exists.
 
         All other parameters match those described for `call_git`.
 
@@ -338,14 +338,14 @@ def pipe_data_received(self, fd, data):
                 env=env)
 
         line_splitter = {
-            STDOUT_FILENO: LineSplitter(sep),
-            STDERR_FILENO: LineSplitter(sep)
+            STDOUT_FILENO: LineSplitter(sep, keep_ends=True),
+            STDERR_FILENO: LineSplitter(sep, keep_ends=True)
         }
 
         for file_no, content in generator:
             if file_no in (STDOUT_FILENO, STDERR_FILENO):
                 for line in line_splitter[file_no].process(content):
-                    yield file_no, line + "\n"
+                    yield file_no, line
             else:
                 raise ValueError(f"unknown file number: {file_no}")
 
@@ -364,9 +364,10 @@ def _call_git(self,
         """Allows for calling arbitrary commands.
 
         Internal helper to the call_git*() methods.
-
+        Unlike call_git, _call_git returns both stdout and stderr.
         The parameters, return value, and raised exceptions match those
-        documented for `call_git`.
+        documented for `call_git`, with the exception of env, which allows to
+        specify the custom environment (variables) to be used.
         """
         runner = self._git_runner
         stderr_log_level = {True: 5, False: 11}[expect_stderr]
@@ -429,12 +430,13 @@ def call_git(self, args, files=None,
         ------
         CommandError if the call exits with a non-zero status.
         """
-        return "\n".join(
+        return "".join(
             self.call_git_items_(args,
                                  files,
                                  expect_stderr=expect_stderr,
                                  expect_fail=expect_fail,
-                                 read_only=read_only))
+                                 read_only=read_only,
+                                 keep_ends=True))
 
     def call_git_items_(self,
                         args,
@@ -443,7 +445,8 @@ def call_git_items_(self,
                         expect_fail=False,
                         env=None,
                         read_only=False,
-                        sep=None):
+                        sep=None,
+                        keep_ends=False):
         """
         Call git, yield output lines when available. Output lines are split
         at line ends or `sep` if `sep` is not None.
@@ -458,7 +461,20 @@ def call_git_items_(self,
 
         Returns
         -------
-        Generator that yields stdout items.
+        Generator that yields stdout items, i.e. lines with the line ending or
+        separator removed.
+
+        Please note, this method is meant to be used to process output that is
+        meant for 'interactive' interpretation. It is not intended to return
+        stdout from a command like "git cat-file". The reason is that
+        it strips of the line endings (or separator) from the result lines,
+        unless 'keep_ends' is True. If 'keep_ends' is False, you will not know
+        which line ending was stripped (if 'separator' is None) or whether a
+        line ending (or separator) was stripped at all, because the last line
+        may not have a line ending (or separator).
+
+        If you want to reliably recreate the output set 'keep_ends' to True and
+        "".join() the result, or use 'GitRepo.call_git()' instead.
 
         Raises
         ------
@@ -481,7 +497,13 @@ def call_git_items_(self,
                                                     env=env,
                                                     sep=sep):
                 if file_no == STDOUT_FILENO:
-                    yield line.rstrip("\n")
+                    if keep_ends is True:
+                        yield line
+                    else:
+                        if sep:
+                            yield line.rstrip(sep)
+                        else:
+                            yield line.rstrip()
                 else:
                     stderr_lines.append(line)
 

diff --git a/datalad/dataset/tests/test_gitrepo.py b/datalad/dataset/tests/test_gitrepo.py
@@ -320,3 +320,59 @@ def test_get_dot_git(emptycase=None, gitdircase=None, barecase=None, gitfilecase
 
         eq_(_get_dot_git(gitfilecase, resolved=r),
             (gitfilecase.resolve() if r else gitfilecase) / 'subdir')
+
+
+file1_content = "file1 content\n"
+file2_content = "file2 content\0"
+example_tree = {
+    "file1": file1_content,
+    "file2": file2_content
+}
+
+
+def _create_test_gitrepo(temp_dir):
+    repo = GitRepo(temp_dir)
+    repo.init()
+    repo.call_git(["add", "."])
+    repo.call_git(["commit", "-m", "test commit"])
+
+    hash_keys = tuple(
+        repo.call_git(["hash-object", file_name]).strip()
+        for file_name in ("file1", "file2")
+    )
+    return repo, hash_keys
+
+
+@with_tree(tree=example_tree)
+def test_call_git_items(temp_dir=None):
+    # check proper handling of separator in call_git_items_
+    repo, (hash1, hash2) = _create_test_gitrepo(temp_dir)
+
+    expected_tree_lines = (
+        f'100644 blob {hash1}\tfile1',
+        f'100644 blob {hash2}\tfile2'
+    )
+
+    assert_equal(
+        expected_tree_lines,
+        tuple(repo.call_git_items_(["ls-tree", "HEAD"]))
+    )
+
+    assert_equal(
+        expected_tree_lines,
+        tuple(repo.call_git_items_(["ls-tree", "-z", "HEAD"], sep="\0"))
+    )
+
+
+@with_tree(tree=example_tree)
+def test_call_git_call_git_items_identity(temp_dir=None):
+    # Ensure that git_call() and "".join(call_git_items_(..., keep_ends=True))
+    # yield the same result and that the result is identical to the file content
+
+    repo, hash_keys = _create_test_gitrepo(temp_dir)
+    args = ["cat-file", "-p"]
+    for hash_key, content in zip(hash_keys, (file1_content, file2_content)):
+        r_item = "".join(repo.call_git_items_(args + [hash_key], keep_ends=True))
+        r_no_item = repo.call_git(args, [hash_key])
+        assert_equal(r_item, r_no_item)
+        assert_equal(r_item, content)
diff --git a/datalad/downloaders/credentials.py b/datalad/downloaders/credentials.py
@@ -140,7 +140,10 @@ def _ask_field_value(self, f, instructions=None):
 
     def _ask_and_set(self, f, instructions=None):
         v = self._ask_field_value(f, instructions=instructions)
-        self.set(**{f: v})
+        try:
+            self.set(**{f: v})
+        except Exception as e:
+            lgr.error("Failed to record credential field %r: %s", f, CapturedException(e))
         return v
 
     def enter_new(self, instructions=None, **kwargs):

diff --git a/datalad/metadata/search.py b/datalad/metadata/search.py
@@ -301,7 +301,7 @@ def _search_from_virgin_install(dataset, query):
             "Performing search using DataLad superdataset %r",
             default_ds.path
         )
-        for res in default_ds.search(query):
+        for res in default_ds.search(query, result_renderer="disabled"):
             yield res
         return
     else:

diff --git a/datalad/runner/nonasyncrunner.py b/datalad/runner/nonasyncrunner.py
@@ -13,6 +13,7 @@
 import enum
 import logging
 import subprocess
+import threading
 import time
 from collections import deque
 from collections.abc import Generator
@@ -115,6 +116,7 @@ def send(self, _):
             # state: GeneratorState.process_exited.
             if len(self.result_queue) > 0:
                 return self.result_queue.popleft()
+            self.runner.generator = None
             raise StopIteration(self.return_code)
 
     def throw(self, exception_type, value=None, trace_back=None):
@@ -237,11 +239,14 @@ def __init__(self,
         self.result = None
         self.process_removed = False
         self.return_code = None
+        self.generator = None
 
         self.last_touched = dict()
         self.active_file_numbers = set()
         self.stall_check_interval = 10
 
+        self.initialization_lock = threading.Lock()
+
     def _check_result(self):
         if self.exception_on_error is True:
             if self.return_code != 0:
@@ -262,6 +267,13 @@ def run(self) -> Union[Any, Generator]:
         """
         Run the command as specified in __init__.
 
+        This method is not re-entrant. Furthermore, if the protocol is a
+        subclass of `GeneratorMixIn`, and the generator has not been
+        exhausted, i.e. it has not raised `StopIteration`, this method should
+        not be called again. If it is called again before the generator is
+        exhausted, a `RuntimeError` is raised. In the non-generator case, a
+        second caller will be suspended until the first caller has returned.
+
         Returns
         -------
         Any
@@ -291,6 +303,13 @@ def run(self) -> Union[Any, Generator]:
                # get the return code of the process
                result = gen.return_code
         """
+        with self.initialization_lock:
+            return self._locked_run()
+
+    def _locked_run(self) -> Union[Any, Generator]:
+        if self.generator is not None:
+            raise RuntimeError("ThreadedRunner.run() was re-entered")
+
         if isinstance(self.stdin, (int, IO, type(None))):
             # We will not write anything to stdin. If the caller passed a
             # file-like he can write to it from a different thread.
@@ -346,6 +365,7 @@ def run(self) -> Union[Any, Generator]:
                     "information."
                 )
             raise
+
         self.process_running = True
         self.active_file_numbers.add(None)
 
@@ -430,7 +450,8 @@ def run(self) -> Union[Any, Generator]:
         self.process_waiting_thread.start()
 
         if issubclass(self.protocol_class, GeneratorMixIn):
-            return _ResultGenerator(self, self.protocol.result_queue)
+            self.generator = _ResultGenerator(self, self.protocol.result_queue)
+            return self.generator
 
         return self.process_loop()