From b57a2ef3aa6028ccc6cfaabadf627c8ba7c30a7d Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sun, 26 Apr 2026 17:33:54 +0800 Subject: [PATCH] [python] Fix pathlib.Path producing backslashes paths on Windows --- .../pypaimon/filesystem/pyarrow_file_io.py | 12 ++++----- paimon-python/pypaimon/tests/file_io_test.py | 25 +++++++++++++++++++ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/paimon-python/pypaimon/filesystem/pyarrow_file_io.py b/paimon-python/pypaimon/filesystem/pyarrow_file_io.py index db06eb6f99a6..b5e03c015154 100644 --- a/paimon-python/pypaimon/filesystem/pyarrow_file_io.py +++ b/paimon-python/pypaimon/filesystem/pyarrow_file_io.py @@ -21,7 +21,7 @@ import subprocess import uuid from datetime import datetime, timezone -from pathlib import Path +from pathlib import PurePosixPath from typing import Any, Dict, List, Optional from urllib.parse import splitport, urlparse @@ -235,7 +235,7 @@ def new_output_stream(self, path: str): if parent_dir and not self.exists(parent_dir): self.mkdirs(parent_dir) else: - parent_dir = Path(path_str).parent + parent_dir = PurePosixPath(path_str).parent if str(parent_dir) and not self.exists(str(parent_dir)): self.mkdirs(str(parent_dir)) @@ -326,7 +326,7 @@ def mkdirs(self, path: str) -> bool: def rename(self, src: str, dst: str) -> bool: dst_str = self.to_filesystem_path(dst) - dst_parent = Path(dst_str).parent + dst_parent = PurePosixPath(dst_str).parent if str(dst_parent) and not self.exists(str(dst_parent)): self.mkdirs(str(dst_parent)) @@ -342,8 +342,8 @@ def rename(self, src: str, dst: str) -> bool: return False # Make it compatible with HadoopFileIO: if dst is an existing directory, # dst=dst/srcFileName - src_name = Path(src_str).name - dst_str = str(Path(dst_str) / src_name) + src_name = PurePosixPath(src_str).name + dst_str = str(PurePosixPath(dst_str) / src_name) final_dst_info = self._get_file_info(dst_str) if final_dst_info.type != pafs.FileType.NotFound: return False @@ -402,7 +402,7 @@ def copy_file(self, source_path: str, target_path: str, overwrite: bool = False) source_str = self.to_filesystem_path(source_path) target_str = self.to_filesystem_path(target_path) - target_parent = Path(target_str).parent + target_parent = PurePosixPath(target_str).parent if str(target_parent) and not self.exists(str(target_parent)): self.mkdirs(str(target_parent)) diff --git a/paimon-python/pypaimon/tests/file_io_test.py b/paimon-python/pypaimon/tests/file_io_test.py index d39d0c6461e9..86c4c28f82aa 100644 --- a/paimon-python/pypaimon/tests/file_io_test.py +++ b/paimon-python/pypaimon/tests/file_io_test.py @@ -464,5 +464,30 @@ def test_try_to_write_atomic(self): finally: shutil.rmtree(temp_dir, ignore_errors=True) + def test_path_on_windows(self): + oss_io = PyArrowFileIO("oss://test-bucket/warehouse", Options({ + OssOptions.OSS_ENDPOINT.key(): 'oss-cn-hangzhou.aliyuncs.com', + OssOptions.OSS_ACCESS_KEY_ID.key(): 'test-key', + OssOptions.OSS_ACCESS_KEY_SECRET.key(): 'test-secret', + OssOptions.OSS_IMPL.key(): 'legacy', + })) + mock_fs = MagicMock() + mock_fs.get_file_info.return_value = [MagicMock(type=pafs.FileType.NotFound)] + mock_fs.create_dir = MagicMock() + mock_fs.open_output_stream.return_value = MagicMock() + mock_fs.move = MagicMock() + mock_fs.copy_file = MagicMock() + oss_io.filesystem = mock_fs + + oss_io.new_output_stream("oss://test-bucket/db.db/tbl/bucket-0/data.parquet") + oss_io.rename("oss://test-bucket/db.db/tbl/old.parquet", + "oss://test-bucket/db.db/tbl/new.parquet") + oss_io.copy_file("oss://test-bucket/db.db/tbl/src.parquet", + "oss://test-bucket/db.db/tbl/dst.parquet") + + for call in mock_fs.create_dir.call_args_list: + self.assertNotIn("\\", call[0][0], f"backslash in path: {call[0][0]}") + + if __name__ == '__main__': unittest.main()