Skip to content

Commit

Permalink
OPT: do not sort, maintain a set of prior hits
Browse files Browse the repository at this point in the history
Seems to provide some (~5%) performance benefit

x     4.57±0.02ms       4.19±0.1ms     0.92  paths.get_parent_paths.time_allsubmods_toplevel [hopa/virtualenv-py2.7]
x     5.52±0.05ms      5.07±0.06ms     0.92  paths.get_parent_paths.time_allsubmods_toplevel [hopa/virtualenv-py3.7]
x     3.85±0.06ms      3.79±0.04ms     0.98  paths.get_parent_paths.time_allsubmods_toplevel_only [hopa/virtualenv-py2.7]
x     4.82±0.03ms      4.64±0.03ms     0.96  paths.get_parent_paths.time_allsubmods_toplevel_only [hopa/virtualenv-py3.7]
x         257±3ns          258±5ns     1.00  paths.get_parent_paths.time_no_submods [hopa/virtualenv-py2.7]
x         243±1ns          250±5ns     1.03  paths.get_parent_paths.time_no_submods [hopa/virtualenv-py3.7]
x     3.33±0.04ms      3.20±0.01ms     0.96  paths.get_parent_paths.time_one_submod_subdir [hopa/virtualenv-py2.7]
x     4.11±0.04ms      4.07±0.02ms     0.99  paths.get_parent_paths.time_one_submod_subdir [hopa/virtualenv-py3.7]
x     3.36±0.04ms      3.18±0.01ms     0.95  paths.get_parent_paths.time_one_submod_toplevel [hopa/virtualenv-py2.7]
x      4.19±0.2ms      4.04±0.03ms     0.96  paths.get_parent_paths.time_one_submod_toplevel [hopa/virtualenv-py3.7]
  • Loading branch information
yarikoptic authored and kyleam committed Jul 31, 2019
1 parent ba12ba9 commit 2ac968e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 8 deletions.
2 changes: 1 addition & 1 deletion benchmarks/support/path.py
Expand Up @@ -14,7 +14,7 @@ def setup(self):
# and some hierarchy of submodules
self.nfiles = 40 # per each construct
self.nsubmod = 30 # at two levels
self.toplevel_submods = sorted(['submod%d' % i for i in range(self.nsubmod)])
self.toplevel_submods = ['submod%d' % i for i in range(self.nsubmod)]
self.posixpaths = \
['file%d' % i for i in range(self.nfiles)] + \
['subdir/anotherfile%d' % i for i in range(self.nfiles)]
Expand Down
19 changes: 12 additions & 7 deletions datalad/support/path.py
Expand Up @@ -145,9 +145,9 @@ def get_parent_paths(paths, parents, only_with_parents=False):
Returns
-------
A sorted list of paths where some entries replaced with their "parents"
without duplicates. So for 'a/b' and 'a/c' with a being among parents, there
will be a single 'a'
A list of paths (without duplicaates), where some entries replaced with
their "parents" without duplicates. So for 'a/b' and 'a/c' with a being
among parents, there will be a single 'a'
"""
# Let's do an early check even though then we would skip the checks on paths
# being relative etc
Expand All @@ -173,7 +173,8 @@ def get_parent_paths(paths, parents, only_with_parents=False):
# Could be an ordered dict but no need
parent_lengths = [(l, parent_lengths[l]) for l in sorted(parent_lengths, reverse=True)]

res = set()
res = []
seen = set()

for path in paths: # O(len(paths)) - unavoidable but could be parallelized!
# Sanity check -- should not be too expensive
Expand All @@ -183,13 +184,17 @@ def get_parent_paths(paths, parents, only_with_parents=False):
continue # no directory deep enough
candidate_parent = path[:parent_length]
if candidate_parent in parents_: # O(log(len(parents))) but expected one less due to per length handling
res.add(candidate_parent)
if candidate_parent not in seen:
res.append(candidate_parent)
seen.add(candidate_parent)
break # it is!
else: # no hits
if not only_with_parents:
res.add(path)
if path not in seen:
res.append(path)
seen.add(path)

return sorted(res) # TODO: keep it as set? should we retain original order?
return res


def _get_parent_paths_check(path):
Expand Down

0 comments on commit 2ac968e

Please sign in to comment.