Skip to content

Commit

Permalink
Fix / Handle long paths on Windows (Python) (#433)
Browse files Browse the repository at this point in the history
* Add a test case to make sure the Shim loader can handle long paths on Windows

* Include tests for loading resources in the shim loader tests

* Add a unit test for the model loader where the checkout dir has a long path

* Add unit tests for the Git repos checking out into directories with long paths

* Add unit tests for the PyPi repos checking out into directories with long paths

* Add repo tests for the local repo implementation

* Fix handling of Windows long paths for Git and PyPi repos

* Fix try_clean_dir() in utils module and add windows_unc_path()

* Fix handling long paths on Windows in the model loader

* Update repos test after changes in utils module

* Fix one Python warning

* Add test cases for long paths in the file storage test suite

* Use UNC paths for local storage on Windows

* Clean up long file names in local storage tests

* On Windows, sanitise UNC root paths for logging

* Tidy up some test naming
  • Loading branch information
martin-traverse committed Jul 11, 2024
1 parent 890d8a1 commit 11686e9
Show file tree
Hide file tree
Showing 13 changed files with 408 additions and 68 deletions.
19 changes: 12 additions & 7 deletions tracdap-runtime/python/src/tracdap/rt/_impl/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,24 @@ def __init__(self, sys_config: _cfg.RuntimeConfig, scratch_dir: pathlib.Path):
self.__log = _util.logger_for_object(self)

self.__scratch_dir = scratch_dir.joinpath("models")
self.__scratch_dir.mkdir(exist_ok=True, parents=False, mode=0o750)

self.__repos = _repos.RepositoryManager(sys_config)
self.__scopes: tp.Dict[str, ModelLoader._ScopeState] = dict()

safe_scratch_dir = _util.windows_unc_path(self.__scratch_dir)
safe_scratch_dir.mkdir(exist_ok=True, parents=False, mode=0o750)

def create_scope(self, scope: str):

try:

self.__log.info(f"Creating model scope [{scope}]")

scope_scratch_dir = self.__scratch_dir.joinpath(scope)
scope_scratch_dir.mkdir(exist_ok=False, parents=False, mode=0o750)
scope_dir = self.__scratch_dir.joinpath(scope)

scope_state = ModelLoader._ScopeState(scope_scratch_dir)
safe_scope_dir = _util.windows_unc_path(scope_dir)
safe_scope_dir.mkdir(exist_ok=False, parents=False, mode=0o750)

scope_state = ModelLoader._ScopeState(scope_dir)
self.__scopes[scope] = scope_state

except FileExistsError as e:
Expand Down Expand Up @@ -152,7 +155,9 @@ def load_model_class(self, scope: str, model_def: _meta.ModelDefinition) -> _api
# What gets cached is the checkout, which may contain multiple packages depending on the repo type

else:
checkout_dir.mkdir(mode=0o750, parents=True, exist_ok=False)
safe_checkout_dir = _util.windows_unc_path(checkout_dir)
safe_checkout_dir.mkdir(mode=0o750, parents=True, exist_ok=False)

package_dir = repo.do_checkout(model_def, checkout_dir)

scope_state.code_cache[code_cache_key] = checkout_dir
Expand Down Expand Up @@ -189,7 +194,7 @@ def scan_model(self, model_stub: _meta.ModelDefinition, model_class: _api.TracMo

try:

model: _api.TracModel = object.__new__(model_class)
model: _api.TracModel = _api.TracModel.__new__(model_class)
model_class.__init__(model)

attributes = model.define_attributes()
Expand Down
4 changes: 4 additions & 0 deletions tracdap-runtime/python/src/tracdap/rt/_impl/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ def __init__(self, storage_key: str, storage_config: _cfg.PluginConfig, fs: pa_f
fs_impl = "arrow"
fs_root = fs.base_path

# On Windows, sanitise UNC root paths for logging
if _util.is_windows() and fs_root.startswith("//?/"):
fs_root = fs_root[4:]

# If this is an FSSpec implementation, take the protocol from FSSpec as the FS type
base_fs = fs.base_fs
if isinstance(base_fs, pa_fs.PyFileSystem):
Expand Down
32 changes: 20 additions & 12 deletions tracdap-runtime/python/src/tracdap/rt/_impl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,27 +264,25 @@ def get_args(metaclass: type):

def try_clean_dir(dir_path: pathlib.Path, remove: bool = False) -> bool:

normalized_path = windows_unc_path(dir_path)

return __try_clean_dir(normalized_path, remove)


def __try_clean_dir(normalized_path, remove):

clean_ok = True
normalized_path = dir_path.resolve()

for item in normalized_path.iterdir():

if item.is_dir():
clean_ok &= try_clean_dir(item, remove=True)
clean_ok &= __try_clean_dir(item, remove=True)

else:
try:
# Windows MAX_PATH = 260 characters, including the drive letter and terminating nul character
# In Python the path string does not include a nul, so we need to limit to 259 characters
if is_windows() and len(str(item)) >= 259 and not str(item).startswith("\\\\?\\"):
unc_item = pathlib.Path("\\\\?\\" + str(item))
unc_item.unlink()
return True
else:
item.unlink()
return True
item.unlink()
except Exception as e: # noqa
return False
clean_ok = False

if remove:
try:
Expand All @@ -294,6 +292,16 @@ def try_clean_dir(dir_path: pathlib.Path, remove: bool = False) -> bool:
return False


def windows_unc_path(path: pathlib.Path) -> pathlib.Path:

# Convert a path to its UNC form on Windows

if is_windows() and not str(path).startswith("\\\\?\\"):
return pathlib.Path("\\\\?\\" + str(path.resolve()))
else:
return path


def error_details_from_trace(trace: tb.StackSummary):
last_frame = trace[len(trace) - 1]
filename = pathlib.PurePath(last_frame.filename).name
Expand Down
11 changes: 11 additions & 0 deletions tracdap-runtime/python/src/tracdap/rt/_plugins/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# And we don't want to put them .ext, those are public APIs that need to be maintained

import logging
import pathlib
import platform
import urllib.parse
import typing as tp
Expand Down Expand Up @@ -180,6 +181,16 @@ def is_windows():
return __IS_WINDOWS


def windows_unc_path(path: pathlib.Path) -> pathlib.Path:

# Convert a path to its UNC form on Windows

if is_windows() and not str(path).startswith("\\\\?\\"):
return pathlib.Path("\\\\?\\" + str(path.resolve()))
else:
return path


def logger_for_object(obj: object) -> logging.Logger:
return logger_for_class(obj.__class__)

Expand Down
8 changes: 7 additions & 1 deletion tracdap-runtime/python/src/tracdap/rt/_plugins/repo_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ def _do_native_checkout(self, model_def: meta.ModelDefinition, checkout_dir: pat

self._log.info(f"Checkout mechanism: [native]")

# Using windows_safe_path() to create UNC paths does not always work with Windows native Git
# So, use the regular checkout_dir, and set core.longpaths = true once the repo is created
# This will fail if the path for the repo config file exceeds the Windows MAX_PATH length
# I.e. checkout_dir/.git/config

git_cli = ["git", "-C", str(checkout_dir)]

git_cmds = [
Expand Down Expand Up @@ -169,7 +174,8 @@ def _do_python_checkout(self, model_def: meta.ModelDefinition, checkout_dir: pat

self._log.info("=> git init")

repo = git_repo.Repo.init(str(checkout_dir))
safe_checkout_dir = _helpers.windows_unc_path(checkout_dir)
repo = git_repo.Repo.init(str(safe_checkout_dir))
self._apply_config_from_properties(repo)

# Set up origin
Expand Down
3 changes: 2 additions & 1 deletion tracdap-runtime/python/src/tracdap/rt/_plugins/repo_pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ def do_checkout(self, model_def: meta.ModelDefinition, checkout_dir: pathlib.Pat

self._log.info(f"Downloaded [{len(content) / 1024:.1f}] KB in [{elapsed.total_seconds():.1f}] seconds")

safe_checkout_dir = _helpers.windows_unc_path(checkout_dir)
download_whl = zipfile.ZipFile(io.BytesIO(download_req.content))
download_whl.extractall(checkout_dir)
download_whl.extractall(safe_checkout_dir)

self._log.info(f"Unpacked [{len(download_whl.filelist)}] files")
self._log.info(f"PyPI checkout succeeded for {model_def.package} {model_def.version}")
Expand Down
21 changes: 13 additions & 8 deletions tracdap-runtime/python/src/tracdap/rt/_plugins/storage_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ def has_file_storage(self) -> bool:

def get_arrow_native(self) -> afs.SubTreeFileSystem:
root_fs = afs.LocalFileSystem()
return afs.SubTreeFileSystem(str(self._root_path), root_fs)
# Use a UNC root path on Windows to avoid max path length issues
sub_tree_path = _helpers.windows_unc_path(self._root_path)
return afs.SubTreeFileSystem(str(sub_tree_path), root_fs)

def get_file_storage(self) -> IFileStorage:

Expand Down Expand Up @@ -140,10 +142,9 @@ def __init__(self, config: cfg.PluginConfig, options: dict = None):
self._properties = config.properties
self._options = options # Not used

self._root_path = LocalStorageProvider.check_root_path(self._properties, self._log)

def _get_root(self):
return self._root_path
# Use a UNC root path on Windows to avoid max path length issues
self._raw_root_path = LocalStorageProvider.check_root_path(self._properties, self._log)
self._root_path = _helpers.windows_unc_path(self._raw_root_path)

def exists(self, storage_path: str) -> bool:

Expand Down Expand Up @@ -358,8 +359,12 @@ def _resolve_path(self, storage_path: str, operation_name: str, allow_root_dir:
if relative_path.is_absolute():
raise ex.EStorageValidation(f"Storage path is not relative: {operation_name} [{storage_path}]")

root_path = self._root_path
absolute_path = self._root_path.joinpath(relative_path).resolve(False)
# UNC paths on Windows have different behaviour for join / resolve
# Work on the raw path, then convert back to UNC afterward
# For other OSes, this is a no-op

root_path = self._raw_root_path
absolute_path = self._raw_root_path.joinpath(relative_path).resolve(False)

# is_relative_to only supported in Python 3.9+, we need to support 3.7
if absolute_path != root_path and root_path not in absolute_path.parents:
Expand All @@ -368,7 +373,7 @@ def _resolve_path(self, storage_path: str, operation_name: str, allow_root_dir:
if absolute_path == root_path and not allow_root_dir:
raise ex.EStorageValidation(f"Illegal operation for storage root: {operation_name} [{storage_path}]")

return absolute_path
return _helpers.windows_unc_path(absolute_path)

except ValueError as e:

Expand Down
16 changes: 13 additions & 3 deletions tracdap-runtime/python/test/tracdap_test/rt/impl/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,16 @@ def test_load_integrated_ok(self):

def test_load_local_ok(self):

self._test_load_local(self.test_scope)

def test_load_local_long_path_ok(self):

long_path_scope = "long_" + "A" * 250

self._test_load_local(long_path_scope)

def _test_load_local(self, test_scope):

example_repo_url = pathlib.Path(__file__) \
.parent \
.joinpath("../../../../../..") \
Expand All @@ -130,16 +140,16 @@ def test_load_local_ok(self):
)

loader = models.ModelLoader(sys_config, self.scratch_dir)
loader.create_scope(self.test_scope)
loader.create_scope(test_scope)

model_class = loader.load_model_class(self.test_scope, stub_model_def)
model_class = loader.load_model_class(test_scope, stub_model_def)
model = model_class()

self.assertIsInstance(model_class, api.TracModel.__class__)
self.assertIsInstance(model, model_class)
self.assertIsInstance(model, api.TracModel)

loader.destroy_scope(self.test_scope)
loader.destroy_scope(test_scope)

def test_load_git_ok(self):

Expand Down
Loading

0 comments on commit 11686e9

Please sign in to comment.