-
Notifications
You must be signed in to change notification settings - Fork 4k
Description
This issue can be reproduced by the following code.
auto format = std::make_shared();
auto fs = std::make_sharedfs::internal::MockFileSystem(fs::kNoTime);
FileSystemDatasetWriteOptions write_options;
write_options.file_write_options = format->DefaultWriteOptions();
write_options.filesystem = fs;
write_options.base_dir = "root";
write_options.partitioning = std::make_shared(schema({}));
write_options.basename_template = "{i}.parquet";
auto metadata =
std::shared_ptr(new KeyValueMetadata({"foo"}, {"bar"}));
auto dataset_schema = schema({field("a", int64())}, metadata);
RecordBatchVector batches{
ConstantArrayGenerator::Zeroes(kRowsPerBatch, dataset_schema)};
ASSERT_EQ(0, batches[0]>column(0)>null_count());
auto dataset = std::make_shared(dataset_schema, batches);
ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan());
ASSERT_OK(scanner_builder->Project(
{compute::call("add", {compute::field_ref("a"), compute::literal(1)})},
{"a_plus_one"}));
ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());
// Before write the schema has the metadata info.
ASSERT_EQ(1, dataset_schema->HasMetadata());
ASSERT_OK(FileSystemDataset::Write(write_options, scanner));
ASSERT_OK_AND_ASSIGN(auto dataset_factory, FileSystemDatasetFactory::Make(
fs, {"root/0.parquet"}, format, {}));
ASSERT_OK_AND_ASSIGN(auto written_dataset, dataset_factory->Finish(FinishOptions{}));
// After write the schema does not has the metadata info.
ASSERT_EQ(0, written_dataset->schema()->HasMetadata());
Reporter: Ke Jia
Note: This issue was originally created as ARROW-18140. Please see the migration documentation for further details.