Skip to content

Commit

Permalink
Combine GCSReadBenchmark and S3ReadBenchmark with ReadBenchmark and m…
Browse files Browse the repository at this point in the history
…ove to benchmarks folder (#8813)

Summary:
It is redundant to maintain different versions of ReadBenchmark per filesystem.
GCSReadBenchmark and S3ReadBenchmark are the same.
As discussed here facebookincubator/velox#8694, users will
find it easier to work with benchmarks if they are in one location.

Pull Request resolved: facebookincubator/velox#8813

Reviewed By: xiaoxmeng

Differential Revision: D54314538

Pulled By: pedroerp

fbshipit-source-id: 4fd3ee3d339304fafcd73a25dc8ae823d2472529
  • Loading branch information
majetideepak authored and facebook-github-bot committed Feb 28, 2024
1 parent 17457a8 commit c155ce9
Show file tree
Hide file tree
Showing 18 changed files with 159 additions and 448 deletions.
1 change: 1 addition & 0 deletions velox/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ target_link_libraries(velox_benchmark_builder gtest)

if(${VELOX_ENABLE_BENCHMARKS})
add_subdirectory(tpch)
add_subdirectory(filesystem)
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ target_link_libraries(velox_read_benchmark_lib

add_executable(velox_read_benchmark ReadBenchmarkMain.cpp)

target_link_libraries(velox_read_benchmark PRIVATE velox_read_benchmark_lib)
target_link_libraries(velox_read_benchmark PRIVATE velox_read_benchmark_lib velox_hive_config velox_s3fs velox_hdfs velox_abfs velox_gcs)
149 changes: 149 additions & 0 deletions velox/benchmarks/filesystem/ReadBenchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/benchmarks/filesystem/ReadBenchmark.h"

#include "velox/connectors/hive/storage_adapters/abfs/RegisterAbfsFileSystem.h"
#include "velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.h"
#include "velox/connectors/hive/storage_adapters/hdfs/RegisterHdfsFileSystem.h"
#include "velox/connectors/hive/storage_adapters/s3fs/RegisterS3FileSystem.h"
#include "velox/core/Config.h"

DEFINE_string(path, "", "Path of the input file");
DEFINE_int64(
file_size_gb,
0,
"Limits the test to the first --file_size_gb "
"of --path. 0 means use the whole file");
DEFINE_int32(num_threads, 16, "Test paralelism");
DEFINE_int32(seed, 0, "Random seed, 0 means no seed");
DEFINE_bool(odirect, false, "Use O_DIRECT");

DEFINE_int32(
bytes,
0,
"If 0, runs through a set of predefined read patterns. "
"If non-0, this is the size of a single read. The reads are "
"made in --num_in_run consecutive batchhes with --gap bytes between each read");
DEFINE_int32(gap, 0, "Gap between consecutive reads if --bytes is non-0");
DEFINE_int32(
num_in_run,
10,
"Number of consecutive reads of --bytes separated by --gap bytes");
DEFINE_int32(
measurement_size,
100 << 20,
"Total reads per thread when throughput for a --bytes/--gap/--/gap/"
"--num_in_run combination");
DEFINE_string(config, "", "Path of the config file");

namespace {
static bool notEmpty(const char* /*flagName*/, const std::string& value) {
return !value.empty();
}
} // namespace

DEFINE_validator(path, &notEmpty);

namespace facebook::velox {

std::shared_ptr<Config> readConfig(const std::string& filePath) {
std::ifstream configFile(filePath);
if (!configFile.is_open()) {
throw std::runtime_error(
fmt::format("Couldn't open config file {} for reading.", filePath));
}

std::unordered_map<std::string, std::string> properties;
std::string line;
while (getline(configFile, line)) {
line.erase(std::remove_if(line.begin(), line.end(), isspace), line.end());
if (line[0] == '#' || line.empty()) {
continue;
}
auto delimiterPos = line.find('=');
auto name = line.substr(0, delimiterPos);
auto value = line.substr(delimiterPos + 1);
properties.emplace(name, value);
}

return std::make_shared<facebook::velox::core::MemConfig>(properties);
}

// Initialize a LocalReadFile instance for the specified 'path'.
void ReadBenchmark::initialize() {
executor_ = std::make_unique<folly::IOThreadPoolExecutor>(FLAGS_num_threads);
if (FLAGS_odirect) {
int32_t o_direct =
#ifdef linux
O_DIRECT;
#else
0;
#endif
fd_ = open(
FLAGS_path.c_str(),
O_CREAT | O_RDWR | (FLAGS_odirect ? o_direct : 0),
S_IRUSR | S_IWUSR);
if (fd_ < 0) {
LOG(ERROR) << "Could not open " << FLAGS_path;
exit(1);
}
readFile_ = std::make_unique<LocalReadFile>(fd_);
} else {
filesystems::registerLocalFileSystem();
filesystems::registerS3FileSystem();
filesystems::registerGCSFileSystem();
filesystems::registerHdfsFileSystem();
filesystems::abfs::registerAbfsFileSystem();
std::shared_ptr<Config> config;
if (!FLAGS_config.empty()) {
config = readConfig(FLAGS_config);
}
auto fs = filesystems::getFileSystem(FLAGS_path, config);
readFile_ = fs->openFileForRead(FLAGS_path);
fileSize_ = readFile_->size();
if (FLAGS_file_size_gb) {
fileSize_ = std::min<uint64_t>(FLAGS_file_size_gb << 30, fileSize_);
}
}

if (fileSize_ <= FLAGS_measurement_size) {
LOG(ERROR) << "File size " << fileSize_ << " is <= then --measurement_size "
<< FLAGS_measurement_size;
exit(1);
}
if (FLAGS_seed) {
rng_.seed(FLAGS_seed);
}
}

void ReadBenchmark::finalize() {
filesystems::finalizeS3FileSystem();
}

void ReadBenchmark::run() {
if (FLAGS_bytes) {
modes(FLAGS_bytes, FLAGS_gap, FLAGS_num_in_run);
return;
}
modes(1100, 0, 10);
modes(1100, 1200, 10);
modes(16 * 1024, 0, 10);
modes(16 * 1024, 10000, 10);
modes(1000000, 0, 8);
modes(1000000, 100000, 8);
}
} // namespace facebook::velox
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ DECLARE_int32(gap);
DECLARE_int32(num_in_run);

DECLARE_int32(measurement_size);
DECLARE_string(config);

namespace facebook::velox {

Expand All @@ -62,46 +63,9 @@ class ReadBenchmark {
public:
virtual ~ReadBenchmark() = default;

// Initialize a LocalReadFile instance for the specified 'path'.
virtual void initialize() {
executor_ =
std::make_unique<folly::IOThreadPoolExecutor>(FLAGS_num_threads);
if (FLAGS_odirect) {
int32_t o_direct =
#ifdef linux
O_DIRECT;
#else
0;
#endif
fd_ = open(
FLAGS_path.c_str(),
O_CREAT | O_RDWR | (FLAGS_odirect ? o_direct : 0),
S_IRUSR | S_IWUSR);
if (fd_ < 0) {
LOG(ERROR) << "Could not open " << FLAGS_path;
exit(1);
}
readFile_ = std::make_unique<LocalReadFile>(fd_);
virtual void initialize();

} else {
filesystems::registerLocalFileSystem();
auto lfs = filesystems::getFileSystem(FLAGS_path, nullptr);
readFile_ = lfs->openFileForRead(FLAGS_path);
}
fileSize_ = readFile_->size();
if (FLAGS_file_size_gb) {
fileSize_ = std::min<uint64_t>(FLAGS_file_size_gb << 30, fileSize_);
}

if (fileSize_ <= FLAGS_measurement_size) {
LOG(ERROR) << "File size " << fileSize_
<< " is <= then --measurement_size " << FLAGS_measurement_size;
exit(1);
}
if (FLAGS_seed) {
rng_.seed(FLAGS_seed);
}
}
virtual void finalize();

void clearCache() {
#ifdef linux
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "velox/common/file/benchmark/ReadBenchmark.h"
#include "velox/benchmarks/filesystem/ReadBenchmark.h"

using namespace facebook::velox;

Expand All @@ -28,4 +28,5 @@ int main(int argc, char** argv) {
ReadBenchmark bm;
bm.initialize();
bm.run();
bm.finalize();
}
3 changes: 0 additions & 3 deletions velox/common/file/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,3 @@ target_link_libraries(
if(${VELOX_BUILD_TESTING})
add_subdirectory(tests)
endif()
if(${VELOX_ENABLE_BENCHMARKS})
add_subdirectory(benchmark)
endif()
68 changes: 0 additions & 68 deletions velox/common/file/benchmark/ReadBenchmark.cpp

This file was deleted.

2 changes: 1 addition & 1 deletion velox/connectors/hive/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

add_library(velox_hive_config OBJECT HiveConfig.cpp)
target_link_libraries(velox_hive_config velox_exception)
target_link_libraries(velox_hive_config velox_core velox_exception)

add_subdirectory(iceberg)

Expand Down
4 changes: 0 additions & 4 deletions velox/connectors/hive/storage_adapters/gcs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ if(VELOX_ENABLE_GCS)
add_subdirectory(tests)
endif()

if(${VELOX_ENABLE_BENCHMARKS})
add_subdirectory(benchmark)
endif()

if(${VELOX_ENABLE_EXAMPLES})
add_subdirectory(examples)
endif()
Expand Down

This file was deleted.

Loading

0 comments on commit c155ce9

Please sign in to comment.