Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

addurls: Fixes to subpath handling #3561

Merged
merged 3 commits into from Jul 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 20 additions & 2 deletions datalad/plugin/addurls.py
Expand Up @@ -176,7 +176,7 @@ def get_subpaths(filename):

spaths = []
for part in filename.split("//")[:-1]:
path = os.path.join(*(spaths + [part]))
path = os.path.join(spaths[-1], part) if spaths else part
spaths.append(path)
return filename.replace("//", os.path.sep), spaths

Expand Down Expand Up @@ -373,6 +373,24 @@ def add_extra_filename_values(filename_format, rows, urls, dry_run):
"Finished requesting file names")


def sort_paths(paths):
"""Sort `paths` by directory level and then alphabetically.

Parameters
----------
paths : iterable of str

Returns
-------
Generator of sorted paths.
"""
def level_and_name(p):
return p.count(os.path.sep), p

for path in sorted(paths, key=level_and_name):
yield path


def extract(stream, input_type, url_format="{0}", filename_format="{1}",
exclude_autometa=None, meta=None,
dry_run=False, missing_value=None):
Expand Down Expand Up @@ -440,7 +458,7 @@ def extract(stream, input_type, url_format="{0}", filename_format="{1}",
RepFormatter(colidx_to_name, missing_value).format,
filename_format)
subpaths = _format_filenames(format_filename, rows_with_url, infos)
return infos, subpaths
return infos, list(sort_paths(subpaths))


@with_result_progress("Adding URLs")
Expand Down
52 changes: 45 additions & 7 deletions datalad/plugin/tests/test_addurls.py
Expand Up @@ -12,6 +12,7 @@
import json
import logging
import os
import os.path as op
import tempfile

from mock import patch
Expand Down Expand Up @@ -117,15 +118,36 @@ def test_clean_meta_args():


def test_get_subpaths():
for fname, expect in [("no/dbl/slash", ("no/dbl/slash", [])),
("p1//n", ("p1/n", ["p1"])),
("p1//p2/p3//n", ("p1/p2/p3/n",
["p1", "p1/p2/p3"])),
("//n", ("/n", [""])),
("n//", ("n/", ["n"]))]:
for fname, expect in [
(op.join("no", "dbl", "slash"),
(op.join("no", "dbl", "slash"), [])),
("p1//n",
(op.join("p1", "n"), ["p1"])),
(op.join("p1//p2", "p3//n"),
(op.join("p1", "p2", "p3", "n"),
["p1", op.join("p1", "p2", "p3")])),
(op.join("p1//p2", "p3//p4", "p5//", "n"),
(op.join("p1", "p2", "p3", "p4", "p5", "n"),
["p1",
op.join("p1", "p2", "p3"),
op.join("p1", "p2", "p3", "p4", "p5")])),
("//n", (op.sep + "n", [""])),
("n//", ("n" + op.sep, ["n"]))]:
eq_(au.get_subpaths(fname), expect)


def test_sort_paths():
paths = [op.join("x", "a", "b"),
"z",
op.join("y", "b"),
op.join("y", "a")]
expected = ["z",
op.join("y", "a"),
op.join("y", "b"),
op.join("x", "a", "b")]
eq_(list(au.sort_paths(paths)), expected)


def test_is_legal_metafield():
for legal in ["legal", "0", "legal_"]:
assert_true(au.is_legal_metafield(legal))
Expand Down Expand Up @@ -213,7 +235,7 @@ def test_extract():
filename_format="{age_group}//{now_dead}//{name}.csv")

eq_(subpaths,
{"kid", "kid/no", "adult", "adult/yes", "adult/no"})
["adult", "kid", "adult/no", "adult/yes", "kid/no"])

eq_([d["url"] for d in info],
["will_1.com", "bob_2.com", "scott_1.com", "max_2.com"])
Expand Down Expand Up @@ -536,3 +558,19 @@ def version_fn(url):
for fname, info in whereis.items():
eq_(info[ds.repo.WEB_UUID]['urls'],
["{}udir/{}.dat.v1".format(self.url, fname)])

@with_tempfile(mkdir=True)
def test_addurls_deeper(self, path):
ds = Dataset(path).create(force=True)
ds.addurls(
self.json_file, "{url}",
"{subdir}//adir/{subdir}-again//other-ds//bdir/{name}")
eq_(set(ds.subdatasets(recursive=True, result_xfm="relpaths")),
{"foo",
"bar",
op.join("foo", "adir", "foo-again"),
op.join("bar", "adir", "bar-again"),
op.join("foo", "adir", "foo-again", "other-ds"),
op.join("bar", "adir", "bar-again", "other-ds")})
ok_exists(os.path.join(
ds.path, "foo", "adir", "foo-again", "other-ds", "bdir", "a"))