Skip to content

Commit

Permalink
pythonGH-114847: Speed up posixpath.realpath()
Browse files Browse the repository at this point in the history
Apply the following optimizations to `posixpath.realpath()`:

- Remove use of recursion
- Directly construct child paths rather than using `join()`
- Use `os.getcwd[b]()` rather than `abspath()`
- Use `startswith(sep)` rather than `isabs()`
  • Loading branch information
barneygale committed Feb 1, 2024
1 parent 282e44f commit dfe0320
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 35 deletions.
83 changes: 48 additions & 35 deletions Lib/posixpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,49 +432,46 @@ def realpath(filename, *, strict=False):
"""Return the canonical path of the specified filename, eliminating any
symbolic links encountered in the path."""
filename = os.fspath(filename)
path, ok = _joinrealpath(filename[:0], filename, strict, {})
return abspath(path)

# Join two paths, normalizing and eliminating any symbolic links
# encountered in the second path.
def _joinrealpath(path, rest, strict, seen):
if isinstance(path, bytes):
if isinstance(filename, bytes):
sep = b'/'
curdir = b'.'
pardir = b'..'
getcwd = os.getcwdb
else:
sep = '/'
curdir = '.'
pardir = '..'

if isabs(rest):
rest = rest[1:]
path = sep

while rest:
name, _, rest = rest.partition(sep)
getcwd = os.getcwd

seen = {}
stack = []
querying = True
path = sep if filename.startswith(sep) else getcwd()
for part in reversed(filename.split(sep)):
stack.append((False, part))

while stack:
is_symlink, name = stack.pop()
if is_symlink:
# resolved symlink
seen[name] = path
continue
if not name or name == curdir:
# current dir
continue
if name == pardir:
# parent dir
if path:
path, name = split(path)
if name == pardir:
path = join(path, pardir, pardir)
newpath, name = split(path)
if name == pardir:
path = path + sep + pardir
else:
path = pardir
path = newpath
continue
newpath = join(path, name)
try:
st = os.lstat(newpath)
except OSError:
if strict:
raise
is_link = False
if len(path) == 1:
newpath = path + name
else:
is_link = stat.S_ISLNK(st.st_mode)
if not is_link:
newpath = path + sep + name
if not querying:
path = newpath
continue
# Resolve the symbolic link
Expand All @@ -490,14 +487,30 @@ def _joinrealpath(path, rest, strict, seen):
os.stat(newpath)
else:
# Return already resolved part + rest of the path unchanged.
return join(newpath, rest), False
seen[newpath] = None # not resolved symlink
path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
if not ok:
return join(path, rest), False
seen[newpath] = path # resolved symlink
path = newpath
querying = False
continue
try:
st = os.lstat(newpath)
if not stat.S_ISLNK(st.st_mode):
path = newpath
continue
target = os.readlink(newpath)
except OSError:
if strict:
raise
else:
path = newpath
querying = False
continue

return path, True
seen[newpath] = None # not resolved symlink
if target.startswith(sep):
path = sep
stack.append((True, newpath))
for part in reversed(target.split(sep)):
stack.append((False, part))
return path


supports_unicode_filenames = (sys.platform == 'darwin')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Speed up :func:`os.path.realpath` on non-Windows platforms.

0 comments on commit dfe0320

Please sign in to comment.