From 093f8a0f78d7f7fd20dae8f4739371cc75bd8d70 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Dec 2018 11:59:10 -0500 Subject: [PATCH 1/4] RF: uniform and consistent open(wb) for create_tree across pythons --- datalad/utils.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/datalad/utils.py b/datalad/utils.py index 0978b5739c..c2e9aba99b 100644 --- a/datalad/utils.py +++ b/datalad/utils.py @@ -2132,15 +2132,8 @@ def create_tree(path, tree, archives_leading_dir=True, remove_existing=False): archives_leading_dir=archives_leading_dir, remove_existing=remove_existing) else: - if PY2: - open_kwargs = {'mode': "w"} - if isinstance(load, text_type): - load = load.encode('utf-8') - else: - open_kwargs = {'mode': "w", 'encoding': "utf-8"} - - with open(full_name, **open_kwargs) as f: - f.write(load) + with open_func(full_name, "wb") as f: + f.write(assure_bytes(load, 'utf-8')) if executable: os.chmod(full_name, os.stat(full_name).st_mode | stat.S_IEXEC) From fd4753244a42b01275a5872e88a06a54b5363045 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Dec 2018 11:53:22 -0500 Subject: [PATCH 2/4] ENH: support pure .gz (not .tar.gz) files for create_tree and ok_file_has_content --- datalad/tests/test_utils.py | 24 ++++++++++++++++++++++-- datalad/tests/utils.py | 24 ++++++++++++++++++------ datalad/utils.py | 4 ++++ 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/datalad/tests/test_utils.py b/datalad/tests/test_utils.py index dbe287a8c7..66760b85ca 100644 --- a/datalad/tests/test_utils.py +++ b/datalad/tests/test_utils.py @@ -63,6 +63,7 @@ from ..utils import map_items from ..utils import unlink from ..utils import CMD_MAX_ARG +from ..utils import create_tree from ..support.annexrepo import AnnexRepo from nose.tools import ( @@ -91,7 +92,8 @@ from .utils import skip_if_no_module from .utils import ( probe_known_failure, skip_known_failure, known_failure, known_failure_v6, - known_failure_direct_mode, skip_if + known_failure_direct_mode, skip_if, + ok_file_has_content ) @@ -1186,4 +1188,22 @@ def test_CMD_MAX_ARG(): # 100 is arbitrarily large small integer ;) # if fails -- we are unlikely to be able to work on this system # and something went really wrong! - assert_greater(CMD_MAX_ARG, 100) \ No newline at end of file + assert_greater(CMD_MAX_ARG, 100) + + +@with_tempfile(mkdir=True) +def test_create_tree(path): + content = u"мама мыла раму" + create_tree(path, OrderedDict([ + ('1', content), + ('sd', OrderedDict( + [ + # right away an obscure case where we have both 1 and 1.gz + ('1', content*2), + ('1.gz', content*3), + ] + )), + ])) + ok_file_has_content(op.join(path, '1'), content) + ok_file_has_content(op.join(path, 'sd', '1'), content*2) + ok_file_has_content(op.join(path, 'sd', '1.gz'), content*3, decompress=True) \ No newline at end of file diff --git a/datalad/tests/utils.py b/datalad/tests/utils.py index 316c43f7b9..6d6a5a013e 100644 --- a/datalad/tests/utils.py +++ b/datalad/tests/utils.py @@ -9,6 +9,7 @@ """Miscellaneous utilities to assist with testing""" import glob +import gzip import inspect import shutil import stat @@ -391,19 +392,30 @@ def ok_exists(path): assert exists(path), 'path %s does not exist' % path -def ok_file_has_content(path, content, strip=False, re_=False, **kwargs): +def ok_file_has_content(path, content, strip=False, re_=False, + decompress=False, **kwargs): """Verify that file exists and has expected content""" ok_exists(path) - with open(path, 'r') as f: - content_ = f.read() + if decompress: + if path.endswith('.gz'): + open_func = gzip.open + else: + raise NotImplementedError("Don't know how to decompress %s" % path) + else: + open_func = open + + with open_func(path, 'rb') as f: + file_content = f.read() + if isinstance(content, text_type): + file_content = assure_unicode(file_content) if strip: - content_ = content_.strip() + file_content = file_content.strip() if re_: - assert_re_in(content, content_, **kwargs) + assert_re_in(content, file_content, **kwargs) else: - assert_equal(content, content_, **kwargs) + assert_equal(content, file_content, **kwargs) # diff --git a/datalad/utils.py b/datalad/utils.py index c2e9aba99b..2aa396928a 100644 --- a/datalad/utils.py +++ b/datalad/utils.py @@ -21,6 +21,7 @@ import platform import gc import glob +import gzip import string import wrapt @@ -2132,6 +2133,9 @@ def create_tree(path, tree, archives_leading_dir=True, remove_existing=False): archives_leading_dir=archives_leading_dir, remove_existing=remove_existing) else: + open_func = open + if full_name.endswith('.gz'): + open_func = gzip.open with open_func(full_name, "wb") as f: f.write(assure_bytes(load, 'utf-8')) if executable: From f4e98dbe260b3735abc7f088439262d74bde0f12 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 4 Dec 2018 09:24:58 -0500 Subject: [PATCH 3/4] BF: leave the scope of "open" right after reading On windows, if any of the subsequent tests fail, harness would fail to remove temporary directory since it would still be "busy". There is no need to stay within open context --- datalad/tests/utils.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/datalad/tests/utils.py b/datalad/tests/utils.py index 6d6a5a013e..cbab003a19 100644 --- a/datalad/tests/utils.py +++ b/datalad/tests/utils.py @@ -406,16 +406,17 @@ def ok_file_has_content(path, content, strip=False, re_=False, with open_func(path, 'rb') as f: file_content = f.read() - if isinstance(content, text_type): - file_content = assure_unicode(file_content) - if strip: - file_content = file_content.strip() + if isinstance(content, text_type): + file_content = assure_unicode(file_content) - if re_: - assert_re_in(content, file_content, **kwargs) - else: - assert_equal(content, file_content, **kwargs) + if strip: + file_content = file_content.strip() + + if re_: + assert_re_in(content, file_content, **kwargs) + else: + assert_equal(content, file_content, **kwargs) # From d110255eb4136605edeb989a5edfcc889fb20abb Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 4 Dec 2018 09:58:50 -0500 Subject: [PATCH 4/4] BF(workaround): manually replace os.linesep with \n upon "rb" in ok_file_has_content --- datalad/tests/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datalad/tests/utils.py b/datalad/tests/utils.py index cbab003a19..8c0c40a787 100644 --- a/datalad/tests/utils.py +++ b/datalad/tests/utils.py @@ -410,6 +410,12 @@ def ok_file_has_content(path, content, strip=False, re_=False, if isinstance(content, text_type): file_content = assure_unicode(file_content) + if os.linesep != '\n': + # for consistent comparisons etc. Apparently when reading in `b` mode + # on Windows we would also get \r + # https://github.com/datalad/datalad/pull/3049#issuecomment-444128715 + file_content = file_content.replace(os.linesep, '\n') + if strip: file_content = file_content.strip()