Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cpp/src/arrow/filesystem/filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ std::string FileStats::base_name() const {
return internal::GetAbstractPathParent(path_).second;
}

// Debug helper
std::ostream& operator<<(std::ostream& os, const FileStats& stats) {
return os << "FileStats(" << stats.type() << ", " << stats.path() << ")";
}

//////////////////////////////////////////////////////////////////////////
// FileSystem default method implementations

Expand Down
9 changes: 9 additions & 0 deletions cpp/src/arrow/filesystem/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,20 @@ struct ARROW_EXPORT FileStats {
TimePoint mtime() const { return mtime_; }
void set_mtime(TimePoint mtime) { mtime_ = mtime; }

bool operator==(const FileStats& other) const {
return type() == other.type() && path() == other.path() && size() == other.size() &&
mtime() == other.mtime();
}

protected:
FileType type_ = FileType::Unknown;
std::string path_;
int64_t size_ = kNoSize;
TimePoint mtime_ = kNoTime;
};

ARROW_EXPORT std::ostream& operator<<(std::ostream& os, const FileStats&);

/// \brief EXPERIMENTAL: file selector
struct ARROW_EXPORT Selector {
// The directory in which to select files.
Expand All @@ -124,6 +131,8 @@ struct ARROW_EXPORT Selector {
bool allow_non_existent = false;
// Whether to recurse into subdirectories.
bool recursive = false;
// The maximum number of subdirectories to recurse into.
int32_t max_recursion = INT32_MAX;

Selector() {}
};
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/arrow/filesystem/localfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ Status StatFile(const std::string& path, FileStats* out) {
#endif

Status StatSelector(const NativePathString& path, const Selector& select,
std::vector<FileStats>* out) {
int32_t nesting_depth, std::vector<FileStats>* out) {
bfs::path p(path);

if (select.allow_non_existent) {
Expand All @@ -209,8 +209,9 @@ Status StatSelector(const NativePathString& path, const Selector& select,
if (st.type() != FileType::NonExistent) {
out->push_back(std::move(st));
}
if (select.recursive && st.type() == FileType::Directory) {
RETURN_NOT_OK(StatSelector(ns, select, out));
if (nesting_depth < select.max_recursion && select.recursive &&
st.type() == FileType::Directory) {
RETURN_NOT_OK(StatSelector(ns, select, nesting_depth + 1, out));
}
}
BOOST_FILESYSTEM_CATCH
Expand All @@ -235,7 +236,7 @@ Status LocalFileSystem::GetTargetStats(const Selector& select,
PlatformFilename fn;
RETURN_NOT_OK(PlatformFilename::FromString(select.base_dir, &fn));
out->clear();
return StatSelector(fn.ToNative(), select, out);
return StatSelector(fn.ToNative(), select, 0, out);
}

Status LocalFileSystem::CreateDir(const std::string& path, bool recursive) {
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/arrow/filesystem/mockfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -288,15 +288,16 @@ class MockFileSystem::Impl {
return (consumed == parts.size() - 1) ? entry : nullptr;
}

void GatherStats(const std::string& base_path, Directory& base_dir, bool recursive,
void GatherStats(const Selector& select, const std::string& base_path,
Directory& base_dir, int32_t nesting_depth,
std::vector<FileStats>* stats) {
for (const auto& pair : base_dir.entries) {
Entry* child = pair.second.get();
stats->push_back(child->GetStats(base_path));
if (recursive && child->is_dir()) {
if (select.recursive && nesting_depth < select.max_recursion && child->is_dir()) {
Directory& child_dir = child->as_dir();
std::string child_path = stats->back().path();
GatherStats(std::move(child_path), child_dir, recursive, stats);
GatherStats(select, std::move(child_path), child_dir, nesting_depth + 1, stats);
}
}
}
Expand Down Expand Up @@ -510,7 +511,7 @@ Status MockFileSystem::GetTargetStats(const Selector& selector,
return NotADir(selector.base_dir);
}

impl_->GatherStats(selector.base_dir, base_dir->as_dir(), selector.recursive, out);
impl_->GatherStats(selector, selector.base_dir, base_dir->as_dir(), 0, out);
return Status::OK();
}

Expand Down
6 changes: 4 additions & 2 deletions cpp/src/arrow/filesystem/s3fs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -955,8 +955,10 @@ class S3FileSystem::Impl {
ListObjectsV2(bucket, key, std::move(handle_results), std::move(handle_error)));

// Recurse
for (const auto& child_key : child_keys) {
RETURN_NOT_OK(Walk(select, bucket, child_key, nesting_depth + 1, out));
if (select.recursive && nesting_depth < select.max_recursion) {
for (const auto& child_key : child_keys) {
RETURN_NOT_OK(Walk(select, bucket, child_key, nesting_depth + 1, out));
}
}

// If no contents were found, perhaps it's an empty "directory",
Expand Down
94 changes: 94 additions & 0 deletions cpp/src/arrow/filesystem/test_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@
#include <utility>
#include <vector>

#include <gmock/gmock.h>
#include <gtest/gtest.h>

#include "arrow/filesystem/test_util.h"
#include "arrow/io/interfaces.h"
#include "arrow/testing/gtest_util.h"

using ::testing::ElementsAre;

namespace arrow {
namespace fs {

Expand Down Expand Up @@ -646,6 +649,96 @@ void GenericFileSystemTest::TestGetTargetStatsSelector(FileSystem* fs) {
ASSERT_RAISES(IOError, fs->GetTargetStats(s, &stats));
}

FileStats File(std::string path) {
FileStats st;
st.set_type(FileType::File);
st.set_path(path);
return st;
}

FileStats Dir(std::string path) {
FileStats st;
st.set_type(FileType::Directory);
st.set_path(path);
return st;
}

void GetSortedStats(FileSystem* fs, Selector s, std::vector<FileStats>& stats) {
ASSERT_OK(fs->GetTargetStats(s, &stats));
// Clear mtime & size for easier testing.
for_each(stats.begin(), stats.end(), [](FileStats& s) {
s.set_mtime(kNoTime);
s.set_size(kNoSize);
});
SortStats(&stats);
}

void GenericFileSystemTest::TestGetTargetStatsSelectorWithRecursion(FileSystem* fs) {
ASSERT_OK(fs->CreateDir("01/02/03/04"));
ASSERT_OK(fs->CreateDir("AA"));
CreateFile(fs, "00.file", "00");
CreateFile(fs, "01/01.file", "01");
CreateFile(fs, "AA/AA.file", "aa");
CreateFile(fs, "01/02/02.file", "02");
CreateFile(fs, "01/02/03/03.file", "03");
CreateFile(fs, "01/02/03/04/04.file", "04");

std::vector<FileStats> stats;
Selector s;

s.base_dir = "";
s.recursive = false;
GetSortedStats(fs, s, stats);
EXPECT_THAT(stats, ElementsAre(File("00.file"), Dir("01"), Dir("AA")));

// recursive should prevail on max_recursion
s.max_recursion = 9000;
GetSortedStats(fs, s, stats);
EXPECT_THAT(stats, ElementsAre(File("00.file"), Dir("01"), Dir("AA")));

// recursive but no traversal
s.recursive = true;
s.max_recursion = 0;
GetSortedStats(fs, s, stats);
EXPECT_THAT(stats, ElementsAre(File("00.file"), Dir("01"), Dir("AA")));

s.recursive = true;
s.max_recursion = 1;
GetSortedStats(fs, s, stats);
EXPECT_THAT(stats, ElementsAre(File("00.file"), Dir("01"), File("01/01.file"),
Dir("01/02"), Dir("AA"), File("AA/AA.file")));

s.recursive = true;
s.max_recursion = 2;
GetSortedStats(fs, s, stats);
EXPECT_THAT(stats, ElementsAre(File("00.file"), Dir("01"), File("01/01.file"),
Dir("01/02"), File("01/02/02.file"), Dir("01/02/03"),
Dir("AA"), File("AA/AA.file")));

s.base_dir = "01";
s.recursive = false;
GetSortedStats(fs, s, stats);
EXPECT_THAT(stats, ElementsAre(File("01/01.file"), Dir("01/02")));

s.base_dir = "01";
s.recursive = true;
s.max_recursion = 1;
GetSortedStats(fs, s, stats);
EXPECT_THAT(stats, ElementsAre(File("01/01.file"), Dir("01/02"), File("01/02/02.file"),
Dir("01/02/03")));

// All-in
s.base_dir = "";
s.recursive = true;
s.max_recursion = INT32_MAX;
GetSortedStats(fs, s, stats);
EXPECT_THAT(
stats, ElementsAre(File("00.file"), Dir("01"), File("01/01.file"), Dir("01/02"),
File("01/02/02.file"), Dir("01/02/03"), File("01/02/03/03.file"),
Dir("01/02/03/04"), File("01/02/03/04/04.file"), Dir("AA"),
File("AA/AA.file")));
}

void GenericFileSystemTest::TestOpenOutputStream(FileSystem* fs) {
std::shared_ptr<io::OutputStream> stream;
int64_t position = -1;
Expand Down Expand Up @@ -790,6 +883,7 @@ GENERIC_FS_TEST_DEFINE(TestCopyFile)
GENERIC_FS_TEST_DEFINE(TestGetTargetStatsSingle)
GENERIC_FS_TEST_DEFINE(TestGetTargetStatsVector)
GENERIC_FS_TEST_DEFINE(TestGetTargetStatsSelector)
GENERIC_FS_TEST_DEFINE(TestGetTargetStatsSelectorWithRecursion)
GENERIC_FS_TEST_DEFINE(TestOpenOutputStream)
GENERIC_FS_TEST_DEFINE(TestOpenAppendStream)
GENERIC_FS_TEST_DEFINE(TestOpenInputStream)
Expand Down
35 changes: 19 additions & 16 deletions cpp/src/arrow/filesystem/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ class ARROW_EXPORT GenericFileSystemTest {
void TestGetTargetStatsSingle();
void TestGetTargetStatsVector();
void TestGetTargetStatsSelector();
void TestGetTargetStatsSelectorWithRecursion();
void TestOpenOutputStream();
void TestOpenAppendStream();
void TestOpenInputStream();
Expand Down Expand Up @@ -131,6 +132,7 @@ class ARROW_EXPORT GenericFileSystemTest {
void TestGetTargetStatsSingle(FileSystem* fs);
void TestGetTargetStatsVector(FileSystem* fs);
void TestGetTargetStatsSelector(FileSystem* fs);
void TestGetTargetStatsSelectorWithRecursion(FileSystem* fs);
void TestOpenOutputStream(FileSystem* fs);
void TestOpenAppendStream(FileSystem* fs);
void TestOpenInputStream(FileSystem* fs);
Expand All @@ -140,22 +142,23 @@ class ARROW_EXPORT GenericFileSystemTest {
#define GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, NAME) \
TEST_MACRO(TEST_CLASS, NAME) { this->Test##NAME(); }

#define GENERIC_FS_TEST_FUNCTIONS_MACROS(TEST_MACRO, TEST_CLASS) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, Empty) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, CreateDir) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, DeleteDir) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, DeleteDirContents) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, DeleteFile) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, DeleteFiles) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, MoveFile) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, MoveDir) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, CopyFile) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetTargetStatsSingle) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetTargetStatsVector) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetTargetStatsSelector) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenOutputStream) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenAppendStream) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputStream) \
#define GENERIC_FS_TEST_FUNCTIONS_MACROS(TEST_MACRO, TEST_CLASS) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, Empty) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, CreateDir) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, DeleteDir) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, DeleteDirContents) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, DeleteFile) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, DeleteFiles) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, MoveFile) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, MoveDir) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, CopyFile) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetTargetStatsSingle) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetTargetStatsVector) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetTargetStatsSelector) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetTargetStatsSelectorWithRecursion) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenOutputStream) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenAppendStream) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputStream) \
GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputFile)

#define GENERIC_FS_TEST_FUNCTIONS(TEST_CLASS) \
Expand Down