Skip to content
Permalink
Browse files
Use ARROW_RETURN_NOT_OK (and friends) instead of ASSERT_OK (and frien…
…ds) in recipes (#52)

* Modified existing recipes to use a status returning method to contain the recipe so that we can use ARROW_RETURN_NOT_OK instead of ASSERT_OK in our examples.
  • Loading branch information
westonpace committed Mar 17, 2022
1 parent 3ebd1e0 commit 695216cd7fbdac2b9bd08e49b7a6f98cccb946d2
Showing 3 changed files with 50 additions and 32 deletions.
@@ -20,7 +20,7 @@

#include "common.h"

TEST(BasicArrow, ReturnNotOkNoMacro) {
arrow::Status ReturnNotOkMacro() {
StartRecipe("ReturnNotOkNoMacro");
std::function<arrow::Status()> test_fn = [] {
arrow::NullBuilder builder;
@@ -40,10 +40,11 @@ TEST(BasicArrow, ReturnNotOkNoMacro) {
arrow::Status st = test_fn();
rout << st << std::endl;
EndRecipe("ReturnNotOkNoMacro");
ASSERT_FALSE(st.ok());
EXPECT_FALSE(st.ok());
return arrow::Status::OK();
}

TEST(BasicArrow, ReturnNotOk) {
arrow::Status ReturnNotOk() {
StartRecipe("ReturnNotOk");
std::function<arrow::Status()> test_fn = [] {
arrow::NullBuilder builder;
@@ -55,5 +56,10 @@ TEST(BasicArrow, ReturnNotOk) {
arrow::Status st = test_fn();
rout << st << std::endl;
EndRecipe("ReturnNotOk");
ASSERT_FALSE(st.ok());
EXPECT_FALSE(st.ok());
return arrow::Status::OK();
}

TEST(BasicArrow, ReturnNotOkNoMacro) { ASSERT_OK(ReturnNotOkMacro()); }

TEST(BasicArrow, ReturnNotOk) { ASSERT_OK(ReturnNotOk()); }
@@ -20,36 +20,43 @@

#include "common.h"

TEST(CreatingArrowObjects, CreateArrays) {
arrow::Status CreatingArrays() {
StartRecipe("CreatingArrays");
arrow::Int32Builder builder;
ASSERT_OK(builder.Append(1));
ASSERT_OK(builder.Append(2));
ASSERT_OK(builder.Append(3));
ASSERT_OK_AND_ASSIGN(std::shared_ptr<arrow::Array> arr, builder.Finish())
ARROW_RETURN_NOT_OK(builder.Append(1));
ARROW_RETURN_NOT_OK(builder.Append(2));
ARROW_RETURN_NOT_OK(builder.Append(3));
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> arr, builder.Finish())
rout << arr->ToString() << std::endl;
EndRecipe("CreatingArrays");
return arrow::Status::OK();
}

arrow::Status CreatingArraysPtr() {
StartRecipe("CreatingArraysPtr");
// Raw pointers
arrow::Int64Builder long_builder = arrow::Int64Builder();
std::array<int64_t, 4> values = {1, 2, 3, 4};
ASSERT_OK(long_builder.AppendValues(values.data(), values.size()));
ASSERT_OK_AND_ASSIGN(arr, long_builder.Finish());
ARROW_RETURN_NOT_OK(long_builder.AppendValues(values.data(), values.size()));
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> arr, long_builder.Finish());
rout << arr->ToString() << std::endl;

// Vectors
arrow::StringBuilder str_builder = arrow::StringBuilder();
std::vector<std::string> strvals = {"x", "y", "z"};
ASSERT_OK(str_builder.AppendValues(strvals));
ASSERT_OK_AND_ASSIGN(arr, str_builder.Finish());
ARROW_RETURN_NOT_OK(str_builder.AppendValues(strvals));
ARROW_ASSIGN_OR_RAISE(arr, str_builder.Finish());
rout << arr->ToString() << std::endl;

// Iterators
arrow::DoubleBuilder dbl_builder = arrow::DoubleBuilder();
std::set<double> dblvals = {1.1, 1.1, 2.3};
ASSERT_OK(dbl_builder.AppendValues(dblvals.begin(), dblvals.end()));
ASSERT_OK_AND_ASSIGN(arr, dbl_builder.Finish());
ARROW_RETURN_NOT_OK(dbl_builder.AppendValues(dblvals.begin(), dblvals.end()));
ARROW_ASSIGN_OR_RAISE(arr, dbl_builder.Finish());
rout << arr->ToString() << std::endl;
EndRecipe("CreatingArraysPtr");
return arrow::Status::OK();
}

TEST(CreatingArrowObjects, CreatingArraysTest) { ASSERT_OK(CreatingArrays()); }
TEST(CreatingArrowObjects, CreatingArraysPtrTest) { ASSERT_OK(CreatingArraysPtr()); }
@@ -46,7 +46,8 @@ class DatasetReadingTest : public ::testing::Test {
std::make_shared<arrow::dataset::ParquetFileFormat>();

arrow::dataset::FileSystemDatasetWriteOptions write_options;
write_options.existing_data_behavior = arrow::dataset::ExistingDataBehavior::kDeleteMatchingPartitions;
write_options.existing_data_behavior =
arrow::dataset::ExistingDataBehavior::kDeleteMatchingPartitions;
write_options.filesystem = std::move(fs);
write_options.partitioning = std::move(partitioning);
write_options.base_dir = airquality_partitioned_dir_;
@@ -76,9 +77,9 @@ class DatasetReadingTest : public ::testing::Test {
std::string airquality_partitioned_dir_;
};

TEST_F(DatasetReadingTest, DatasetRead) {
arrow::Status DatasetRead(const std::string& airquality_basedir) {
StartRecipe("ListPartitionedDataset");
const std::string& directory_base = airquality_basedir();
const std::string& directory_base = airquality_basedir;

// Create a filesystem
std::shared_ptr<arrow::fs::LocalFileSystem> fs =
@@ -94,8 +95,8 @@ TEST_F(DatasetReadingTest, DatasetRead) {

// List out the files so we can see how our data is partitioned.
// This step is not necessary for reading a dataset
ASSERT_OK_AND_ASSIGN(std::vector<arrow::fs::FileInfo> file_infos,
fs->GetFileInfo(selector));
ARROW_ASSIGN_OR_RAISE(std::vector<arrow::fs::FileInfo> file_infos,
fs->GetFileInfo(selector));
int num_printed = 0;
for (const auto& path : file_infos) {
if (path.IsFile()) {
@@ -106,7 +107,6 @@ TEST_F(DatasetReadingTest, DatasetRead) {
}
}
}

EndRecipe("ListPartitionedDataset");
StartRecipe("CreatingADataset");
// Create a file format which describes the format of the files.
@@ -117,26 +117,26 @@ TEST_F(DatasetReadingTest, DatasetRead) {
std::make_shared<arrow::dataset::ParquetFileFormat>();

// Create a partitioning factory. A partitioning factory will be used by a dataset
// factory to infer the partitioning schema from the filenames. All we need to specify
// is the flavor of partitioning which, in our case, is "hive".
// factory to infer the partitioning schema from the filenames. All we need to
// specify is the flavor of partitioning which, in our case, is "hive".
//
// Alternatively, we could manually create a partitioning scheme from a schema. This is
// typically not necessary for hive partitioning as inference works well.
// Alternatively, we could manually create a partitioning scheme from a schema. This
// is typically not necessary for hive partitioning as inference works well.
std::shared_ptr<arrow::dataset::PartitioningFactory> partitioning_factory =
arrow::dataset::HivePartitioning::MakeFactory();

arrow::dataset::FileSystemFactoryOptions options;
options.partitioning = partitioning_factory;

// Create a dataset factory
ASSERT_OK_AND_ASSIGN(
ARROW_ASSIGN_OR_RAISE(
std::shared_ptr<arrow::dataset::DatasetFactory> dataset_factory,
arrow::dataset::FileSystemDatasetFactory::Make(fs, selector, format, options));

// Create the dataset, this will scan the dataset directory to find all the files
// and may scan some file metadata in order to determine the dataset schema.
ASSERT_OK_AND_ASSIGN(std::shared_ptr<arrow::dataset::Dataset> dataset,
dataset_factory->Finish());
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::dataset::Dataset> dataset,
dataset_factory->Finish());

rout << "We discovered the following schema for the dataset:" << std::endl
<< std::endl
@@ -146,14 +146,19 @@ TEST_F(DatasetReadingTest, DatasetRead) {

// Create a scanner
arrow::dataset::ScannerBuilder scanner_builder(dataset);
ASSERT_OK(scanner_builder.UseThreads(true));
ASSERT_OK_AND_ASSIGN(std::shared_ptr<arrow::dataset::Scanner> scanner,
scanner_builder.Finish());
ARROW_RETURN_NOT_OK(scanner_builder.UseThreads(true));
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::dataset::Scanner> scanner,
scanner_builder.Finish());

// Scan the dataset. There are a variety of other methods available on the scanner as
// well
ASSERT_OK_AND_ASSIGN(std::shared_ptr<arrow::Table> table, scanner->ToTable());
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Table> table, scanner->ToTable());
rout << "Read in a table with " << table->num_rows() << " rows and "
<< table->num_columns() << " columns";
EndRecipe("ScanningADataset");
return arrow::Status::OK();
}

TEST_F(DatasetReadingTest, TestDatasetRead) {
ASSERT_OK(DatasetRead(airquality_basedir()));
}

0 comments on commit 695216c

Please sign in to comment.