Skip to content

Commit

Permalink
ARROW-8146: [C++] Add per-filesystem facility to sanitize a path
Browse files Browse the repository at this point in the history
Some filesystem types (e.g. local filesystem on Windows) might allow slightly different semantics for paths on the user side.  This function allows bridging the gap (but it has to be called explicitly).

Closes #6657 from pitrou/ARROW-8146-normalize-path

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
pitrou committed Mar 19, 2020
1 parent 3e40cc7 commit 6fe8c18
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 11 deletions.
28 changes: 18 additions & 10 deletions cpp/src/arrow/filesystem/filesystem.cc
Expand Up @@ -115,6 +115,8 @@ std::string FileInfo::extension() const {

FileSystem::~FileSystem() {}

Result<std::string> FileSystem::NormalizePath(std::string path) { return path; }

Result<std::vector<FileInfo>> FileSystem::GetTargetInfos(
const std::vector<std::string>& paths) {
std::vector<FileInfo> res;
Expand All @@ -137,15 +139,18 @@ Status FileSystem::DeleteFiles(const std::vector<std::string>& paths) {
//////////////////////////////////////////////////////////////////////////
// SubTreeFileSystem implementation

// FIXME EnsureTrailingSlash works on abstract paths... but we will be
// passing a concrete path, e.g. "C:" on Windows.

SubTreeFileSystem::SubTreeFileSystem(const std::string& base_path,
std::shared_ptr<FileSystem> base_fs)
: base_path_(EnsureTrailingSlash(base_path)), base_fs_(base_fs) {}
: base_path_(NormalizeBasePath(base_path, base_fs).ValueOrDie()), base_fs_(base_fs) {}

SubTreeFileSystem::~SubTreeFileSystem() {}

Result<std::string> SubTreeFileSystem::NormalizeBasePath(
std::string base_path, const std::shared_ptr<FileSystem>& base_fs) {
ARROW_ASSIGN_OR_RAISE(base_path, base_fs->NormalizePath(std::move(base_path)));
return EnsureTrailingSlash(std::move(base_path));
}

std::string SubTreeFileSystem::PrependBase(const std::string& s) const {
if (s.empty()) {
return base_path_;
Expand All @@ -163,25 +168,28 @@ Status SubTreeFileSystem::PrependBaseNonEmpty(std::string* s) const {
}
}

Status SubTreeFileSystem::StripBase(const std::string& s, std::string* out) const {
Result<std::string> SubTreeFileSystem::StripBase(const std::string& s) const {
auto len = base_path_.length();
// Note base_path_ ends with a slash (if not empty)
if (s.length() >= len && s.substr(0, len) == base_path_) {
*out = s.substr(len);
return Status::OK();
return s.substr(len);
} else {
return Status::UnknownError("Underlying filesystem returned path '", s,
"', which is not a subpath of '", base_path_, "'");
}
}

Status SubTreeFileSystem::FixInfo(FileInfo* info) const {
std::string fixed_path;
RETURN_NOT_OK(StripBase(info->path(), &fixed_path));
info->set_path(fixed_path);
ARROW_ASSIGN_OR_RAISE(auto fixed_path, StripBase(info->path()));
info->set_path(std::move(fixed_path));
return Status::OK();
}

Result<std::string> SubTreeFileSystem::NormalizePath(std::string path) {
ARROW_ASSIGN_OR_RAISE(auto normalized, base_fs_->NormalizePath(PrependBase(path)));
return StripBase(std::move(normalized));
}

Result<FileInfo> SubTreeFileSystem::GetTargetInfo(const std::string& path) {
ARROW_ASSIGN_OR_RAISE(FileInfo info, base_fs_->GetTargetInfo(PrependBase(path)));
RETURN_NOT_OK(FixInfo(&info));
Expand Down
16 changes: 15 additions & 1 deletion cpp/src/arrow/filesystem/filesystem.h
Expand Up @@ -162,6 +162,12 @@ class ARROW_EXPORT FileSystem {

virtual std::string type_name() const = 0;

/// Normalize path for the given filesystem
///
/// The default implementation of this method is a no-op, but subclasses
/// may allow normalizing irregular path forms (such as Windows local paths).
virtual Result<std::string> NormalizePath(std::string path);

/// Get info for the given target.
///
/// Any symlink is automatically dereferenced, recursively.
Expand Down Expand Up @@ -246,12 +252,15 @@ class ARROW_EXPORT FileSystem {
/// "escape" the subtree and access other parts of the underlying filesystem.
class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
public:
// This constructor may abort if base_path is invalid.
explicit SubTreeFileSystem(const std::string& base_path,
std::shared_ptr<FileSystem> base_fs);
~SubTreeFileSystem() override;

std::string type_name() const override { return "subtree"; }

Result<std::string> NormalizePath(std::string path) override;

/// \cond FALSE
using FileSystem::GetTargetInfo;
using FileSystem::GetTargetInfos;
Expand Down Expand Up @@ -280,13 +289,18 @@ class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
const std::string& path) override;

protected:
SubTreeFileSystem() {}

const std::string base_path_;
std::shared_ptr<FileSystem> base_fs_;

std::string PrependBase(const std::string& s) const;
Status PrependBaseNonEmpty(std::string* s) const;
Status StripBase(const std::string& s, std::string* out) const;
Result<std::string> StripBase(const std::string& s) const;
Status FixInfo(FileInfo* info) const;

static Result<std::string> NormalizeBasePath(
std::string base_path, const std::shared_ptr<FileSystem>& base_fs);
};

/// \brief A FileSystem implementation that delegates to another
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/filesystem/localfs.cc
Expand Up @@ -240,6 +240,11 @@ LocalFileSystem::LocalFileSystem(const LocalFileSystemOptions& options)

LocalFileSystem::~LocalFileSystem() {}

Result<std::string> LocalFileSystem::NormalizePath(std::string path) {
ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
return fn.ToString();
}

Result<FileInfo> LocalFileSystem::GetTargetInfo(const std::string& path) {
ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
return StatFile(fn.ToNative());
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/filesystem/localfs.h
Expand Up @@ -50,6 +50,8 @@ class ARROW_EXPORT LocalFileSystem : public FileSystem {

std::string type_name() const override { return "local"; }

Result<std::string> NormalizePath(std::string path) override;

/// \cond FALSE
using FileSystem::GetTargetInfo;
using FileSystem::GetTargetInfos;
Expand Down
23 changes: 23 additions & 0 deletions cpp/src/arrow/filesystem/localfs_test.cc
Expand Up @@ -229,6 +229,8 @@ class TestLocalFS : public LocalFSTestMixin {
ASSERT_EQ(size, expected_size);
}

static void CheckNormalizePath(const std::shared_ptr<FileSystem>& fs) {}

protected:
PathFormatter path_formatter_;
std::shared_ptr<LocalFileSystem> local_fs_;
Expand All @@ -251,6 +253,27 @@ TYPED_TEST(TestLocalFS, CorrectPathExists) {
this->CheckConcreteFile(this->temp_dir_->path().ToString() + "abc", data_size);
}

TYPED_TEST(TestLocalFS, NormalizePath) {
#ifdef _WIN32
ASSERT_OK_AND_EQ("AB/CD", this->local_fs_->NormalizePath("AB\\CD"));
ASSERT_OK_AND_EQ("/AB/CD", this->local_fs_->NormalizePath("\\AB\\CD"));
ASSERT_OK_AND_EQ("C:DE/fgh", this->local_fs_->NormalizePath("C:DE\\fgh"));
ASSERT_OK_AND_EQ("C:/DE/fgh", this->local_fs_->NormalizePath("C:\\DE\\fgh"));
ASSERT_OK_AND_EQ("//some/share/AB",
this->local_fs_->NormalizePath("\\\\some\\share\\AB"));
#else
ASSERT_OK_AND_EQ("AB\\CD", this->local_fs_->NormalizePath("AB\\CD"));
#endif
}

TYPED_TEST(TestLocalFS, NormalizePathThroughSubtreeFS) {
#ifdef _WIN32
ASSERT_OK_AND_EQ("AB/CD", this->fs_->NormalizePath("AB\\CD"));
#else
ASSERT_OK_AND_EQ("AB\\CD", this->fs_->NormalizePath("AB\\CD"));
#endif
}

TYPED_TEST(TestLocalFS, FileSystemFromUriFile) {
// Concrete test with actual file
const auto uri_string = "file:" + this->local_path_;
Expand Down
7 changes: 7 additions & 0 deletions cpp/src/arrow/filesystem/test_util.cc
Expand Up @@ -172,6 +172,12 @@ void GenericFileSystemTest::TestEmpty(FileSystem* fs) {
ASSERT_EQ(files.size(), 0);
}

void GenericFileSystemTest::TestNormalizePath(FileSystem* fs) {
// Canonical abstract paths should go through unchanged
ASSERT_OK_AND_EQ("AB", fs->NormalizePath("AB"));
ASSERT_OK_AND_EQ("AB/CD/efg", fs->NormalizePath("AB/CD/efg"));
}

void GenericFileSystemTest::TestCreateDir(FileSystem* fs) {
ASSERT_OK(fs->CreateDir("AB"));
ASSERT_OK(fs->CreateDir("AB/CD/EF")); // Recursive
Expand Down Expand Up @@ -845,6 +851,7 @@ void GenericFileSystemTest::TestOpenInputFile(FileSystem* fs) {
void GenericFileSystemTest::FUNC_NAME() { FUNC_NAME(GetEmptyFileSystem().get()); }

GENERIC_FS_TEST_DEFINE(TestEmpty)
GENERIC_FS_TEST_DEFINE(TestNormalizePath)
GENERIC_FS_TEST_DEFINE(TestCreateDir)
GENERIC_FS_TEST_DEFINE(TestDeleteDir)
GENERIC_FS_TEST_DEFINE(TestDeleteDirContents)
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/arrow/filesystem/test_util.h
Expand Up @@ -99,6 +99,7 @@ class ARROW_EXPORT GenericFileSystemTest {
virtual ~GenericFileSystemTest();

void TestEmpty();
void TestNormalizePath();
void TestCreateDir();
void TestDeleteDir();
void TestDeleteDirContents();
Expand Down Expand Up @@ -136,6 +137,7 @@ class ARROW_EXPORT GenericFileSystemTest {
virtual bool have_flaky_directory_tree_deletion() const { return false; }

void TestEmpty(FileSystem* fs);
void TestNormalizePath(FileSystem* fs);
void TestCreateDir(FileSystem* fs);
void TestDeleteDir(FileSystem* fs);
void TestDeleteDirContents(FileSystem* fs);
Expand All @@ -159,6 +161,7 @@ class ARROW_EXPORT GenericFileSystemTest {

#define GENERIC_FS_TEST_FUNCTIONS_MACROS(TEST_MACRO, TEST_CLASS) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, Empty) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, NormalizePath) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, CreateDir) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, DeleteDir) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, DeleteDirContents) \
Expand Down

0 comments on commit 6fe8c18

Please sign in to comment.