Skip to content

Commit

Permalink
OPT: group datasets and perform patch.object for always_commit once p…
Browse files Browse the repository at this point in the history
…er dataset

I bet it does not take much time to patch.object but it still does take time,
so why to waste it?
  • Loading branch information
yarikoptic committed Sep 3, 2020
1 parent cd315f9 commit 6c7b4a5
Showing 1 changed file with 16 additions and 8 deletions.
24 changes: 16 additions & 8 deletions datalad/plugin/addurls.py
Expand Up @@ -556,16 +556,24 @@ def add_meta(rows):
"""
from unittest.mock import patch

# OPT: group by dataset first so to not patch/unpatch always_commit
# per each file of which we could have thousands
from collections import defaultdict
dss_rows = defaultdict(list)
for row in rows:
ds, filename = row["ds"], row["ds_filename"]
dss_rows[row["ds"]].append(row)

for ds, ds_rows in dss_rows.items():
with patch.object(ds.repo, "always_commit", False):
lgr.debug("Adding metadata to %s in %s", filename, ds.path)
for a in ds.repo.set_metadata_(filename, add=row["meta_args"]):
res = annexjson2result(a, ds, type="file", logger=lgr)
# Don't show all added metadata for the file because that
# could quickly flood the output.
del res["message"]
yield res
for row in ds_rows:
filename = row["ds_filename"]
lgr.debug("Adding metadata to %s in %s", filename, ds.path)
for a in ds.repo.set_metadata_(filename, add=row["meta_args"]):
res = annexjson2result(a, ds, type="file", logger=lgr)
# Don't show all added metadata for the file because that
# could quickly flood the output.
del res["message"]
yield res


@build_doc
Expand Down

0 comments on commit 6c7b4a5

Please sign in to comment.