Skip to content

Commit

Permalink
Merge pull request #120 from jaraco/list-perf
Browse files Browse the repository at this point in the history
Listdir performance optimization - separate class
  • Loading branch information
jaraco committed Apr 16, 2017
2 parents 093eca8 + 4b9e000 commit 3743c20
Show file tree
Hide file tree
Showing 3 changed files with 217 additions and 8 deletions.
12 changes: 12 additions & 0 deletions CHANGES.rst
@@ -1,3 +1,15 @@
10.3
----

- #115: Added a new performance-optimized implementation
for listdir operations, optimizing ``listdir``, ``walk``,
``walkfiles``, ``walkdirs``, and ``fnmatch``, presented
as the ``FastPath`` class.

Please direct feedback on this implementation to the ticket,
especially if the performance benefits justify it replacing
the default ``Path`` class.

10.2
----

Expand Down
186 changes: 186 additions & 0 deletions path.py
Expand Up @@ -1712,3 +1712,189 @@ class CaseInsensitivePattern(text_type):
@property
def normcase(self):
return __import__('ntpath').normcase


class FastPath(Path):
"""
Performance optimized version of Path for use
on embedded platforms and other systems with limited
CPU. See #115 and #116 for background.
"""

def listdir(self, pattern=None):
children = os.listdir(self)
if pattern is None:
return [self / child for child in children]

pattern, normcase = self.__prepare(pattern)
return [
self / child
for child in children
if self._next_class(child).__fnmatch(pattern, normcase)
]

def walk(self, pattern=None, errors='strict'):
class Handlers:
def strict(msg):
raise

def warn(msg):
warnings.warn(msg, TreeWalkWarning)

def ignore(msg):
pass

if not callable(errors) and errors not in vars(Handlers):
raise ValueError("invalid errors parameter")
errors = vars(Handlers).get(errors, errors)

if pattern:
pattern, normcase = self.__prepare(pattern)
else:
normcase = None

return self.__walk(pattern, normcase, errors)

def __walk(self, pattern, normcase, errors):
""" Prepared version of walk """
try:
childList = self.listdir()
except Exception:
exc = sys.exc_info()[1]
tmpl = "Unable to list directory '%(self)s': %(exc)s"
msg = tmpl % locals()
errors(msg)
return

for child in childList:
if pattern is None or child.__fnmatch(pattern, normcase):
yield child
try:
isdir = child.isdir()
except Exception:
exc = sys.exc_info()[1]
tmpl = "Unable to access '%(child)s': %(exc)s"
msg = tmpl % locals()
errors(msg)
isdir = False

if isdir:
for item in child.__walk(pattern, normcase, errors):
yield item

def walkdirs(self, pattern=None, errors='strict'):
if errors not in ('strict', 'warn', 'ignore'):
raise ValueError("invalid errors parameter")

if pattern:
pattern, normcase = self.__prepare(pattern)
else:
normcase = None

return self.__walkdirs(pattern, normcase, errors)

def __walkdirs(self, pattern, normcase, errors):
""" Prepared version of walkdirs """
try:
dirs = self.dirs()
except Exception:
if errors == 'ignore':
return
elif errors == 'warn':
warnings.warn(
"Unable to list directory '%s': %s"
% (self, sys.exc_info()[1]),
TreeWalkWarning)
return
else:
raise

for child in dirs:
if pattern is None or child.__fnmatch(pattern, normcase):
yield child
for subsubdir in child.__walkdirs(pattern, normcase, errors):
yield subsubdir

def walkfiles(self, pattern=None, errors='strict'):
if errors not in ('strict', 'warn', 'ignore'):
raise ValueError("invalid errors parameter")

if pattern:
pattern, normcase = self.__prepare(pattern)
else:
normcase = None

return self.__walkfiles(pattern, normcase, errors)

def __walkfiles(self, pattern, normcase, errors):
""" Prepared version of walkfiles """
try:
childList = self.listdir()
except Exception:
if errors == 'ignore':
return
elif errors == 'warn':
warnings.warn(
"Unable to list directory '%s': %s"
% (self, sys.exc_info()[1]),
TreeWalkWarning)
return
else:
raise

for child in childList:
try:
isfile = child.isfile()
isdir = not isfile and child.isdir()
except:
if errors == 'ignore':
continue
elif errors == 'warn':
warnings.warn(
"Unable to access '%s': %s"
% (self, sys.exc_info()[1]),
TreeWalkWarning)
continue
else:
raise

if isfile:
if pattern is None or child.__fnmatch(pattern, normcase):
yield child
elif isdir:
for f in child.__walkfiles(pattern, normcase, errors):
yield f

def __fnmatch(self, pattern, normcase):
""" Return ``True`` if `self.name` matches the given `pattern`,
prepared version.
`pattern` - A filename pattern with wildcards,
for example ``'*.py'``. The pattern is expected to be normcase'd
already.
`normcase` - A function used to normalize the pattern and
filename before matching.
.. seealso:: :func:`Path.fnmatch`
"""
return fnmatch.fnmatchcase(normcase(self.name), pattern)

def __prepare(self, pattern, normcase=None):
""" Prepares a fmatch_pattern for use with ``FastPath.__fnmatch`.
`pattern` - A filename pattern with wildcards,
for example ``'*.py'``. If the pattern contains a `normcase`
attribute, it is applied to the name and path prior to comparison.
`normcase` - (optional) A function used to normalize the pattern and
filename before matching. Defaults to :meth:`self.module`, which defaults
to :meth:`os.path.normcase`.
.. seealso:: :func:`FastPath.__fnmatch`
"""
if not normcase:
normcase = getattr(pattern, 'normcase', self.module.normcase)
pattern = normcase(pattern)
return pattern, normcase

def fnmatch(self, pattern, normcase=None):
if not pattern:
raise ValueError("No pattern provided")

pattern, normcase = self.__prepare(pattern, normcase)
return self.__fnmatch(pattern, normcase)
27 changes: 19 additions & 8 deletions test_path.py
Expand Up @@ -31,17 +31,27 @@
import pytest

import path
from path import Path, tempdir
from path import tempdir
from path import CaseInsensitivePattern as ci
from path import SpecialResolver
from path import Multi

Path = None


def p(**choices):
""" Choose a value from several possible values, based on os.name """
return choices[os.name]


@pytest.fixture(autouse=True, params=[path.Path, path.FastPath])
def path_class(request, monkeypatch):
"""
Invoke tests on any number of Path classes.
"""
monkeypatch.setitem(globals(), 'Path', request.param)


class TestBasics:
def test_relpath(self):
root = Path(p(nt='C:\\', posix='/'))
Expand Down Expand Up @@ -789,17 +799,17 @@ def test_chdir_or_cd(self, tmpdir):


class TestSubclass:
class PathSubclass(Path):
pass

def test_subclass_produces_same_class(self):
"""
When operations are invoked on a subclass, they should produce another
instance of that subclass.
"""
p = self.PathSubclass('/foo')
class PathSubclass(Path):
pass
p = PathSubclass('/foo')
subdir = p / 'bar'
assert isinstance(subdir, self.PathSubclass)
assert isinstance(subdir, PathSubclass)


class TestTempDir:
Expand All @@ -809,7 +819,7 @@ def test_constructor(self):
One should be able to readily construct a temporary directory
"""
d = tempdir()
assert isinstance(d, Path)
assert isinstance(d, path.Path)
assert d.exists()
assert d.isdir()
d.rmdir()
Expand All @@ -822,7 +832,7 @@ def test_next_class(self):
"""
d = tempdir()
sub = d / 'subdir'
assert isinstance(sub, Path)
assert isinstance(sub, path.Path)
d.rmdir()

def test_context_manager(self):
Expand Down Expand Up @@ -1084,7 +1094,8 @@ def test_for_class(self):
cls = Multi.for_class(Path)
assert issubclass(cls, Path)
assert issubclass(cls, Multi)
assert cls.__name__ == 'MultiPath'
expected_name = 'Multi' + Path.__name__
assert cls.__name__ == expected_name

def test_detect_no_pathsep(self):
"""
Expand Down

0 comments on commit 3743c20

Please sign in to comment.