-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
GH-37042: [MATLAB] Implement Feather V1 Writer using new MATLAB Inter…
…face APIs (#37043) ### Rationale for this change Now that we've have the basic building blocks for tabular IO in the MATLAB Interface (`Array`, `Schema`, `RecordBatch`), we can implement a Feather V1 writer in terms of the new APIs. This is the first in a series of pull requests in which we will work on replacing the legacy feather V1 infrastructure with a new implementation that use the MATLAB Interface APIs. A side effect of doing this work is that we can eventually delete a lot of legacy build infrastructure and code. ### What changes are included in this PR? 1. Added a new class called `arrow.internal.io.feather.Writer` which can be used to write feather V1 files. It has one public property named `Filename` and one public method `write`. Below is an example of its usage: ```matlab >> T = table([1; 2; 3], single([10; 11; 12])); T = 3×2 table Var1 Var2 ____ ____ 1 10 2 11 3 12 >> filename = "/tmp/table.feather"; >> writer = arrow.internal.io.feather.Writer(filename) writer = Writer with properties: Filename: "/tmp/table.feather" >> writer.write(T); ``` 2. Added an `unwrap` method to `proxy::RecordBatch` so that the `FeatherWriter::write` method can access the underlying `RecordBatch` from the proxy. 3. Changed the `SetAccess` and `GetAccess` of the `Proxy` property on `arrow.tabular.RecordBatch` to `private` and `public`, respectively. ### Are these changes tested? Yes, added a new test file called `tRoundTrip.m` in the `matlab/test/arrow/io/feather` folder. ### Are there any user-facing changes? No. ### Future Directions 1. Add a new class for reading feather V1 files (See #37041). 2. Integrate this class in the public `featherwrite` function. 5. Once this class is integrated with `featherwrite`, we can delete the legacy build infrastructure and source code. * Closes: #37042 Authored-by: Sarah Gilmore <sgilmore@mathworks.com> Signed-off-by: Kevin Gurney <kgurney@mathworks.com>
- Loading branch information
1 parent
3c00b08
commit 71329ce
Showing
10 changed files
with
247 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
90 changes: 90 additions & 0 deletions
90
matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#include "arrow/matlab/io/feather/proxy/feather_writer.h" | ||
#include "arrow/matlab/tabular/proxy/record_batch.h" | ||
#include "arrow/matlab/error/error.h" | ||
|
||
#include "arrow/result.h" | ||
#include "arrow/table.h" | ||
#include "arrow/util/utf8.h" | ||
|
||
#include "arrow/io/file.h" | ||
#include "arrow/ipc/feather.h" | ||
|
||
#include "libmexclass/proxy/ProxyManager.h" | ||
|
||
namespace arrow::matlab::io::feather::proxy { | ||
|
||
FeatherWriter::FeatherWriter(const std::string& filename) : filename{filename} { | ||
REGISTER_METHOD(FeatherWriter, getFilename); | ||
REGISTER_METHOD(FeatherWriter, write); | ||
} | ||
|
||
libmexclass::proxy::MakeResult FeatherWriter::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { | ||
namespace mda = ::matlab::data; | ||
mda::StructArray opts = constructor_arguments[0]; | ||
const mda::StringArray filename_mda = opts[0]["Filename"]; | ||
|
||
const auto filename_utf16 = std::u16string(filename_mda[0]); | ||
MATLAB_ASSIGN_OR_ERROR(const auto filename_utf8, | ||
arrow::util::UTF16StringToUTF8(filename_utf16), | ||
error::UNICODE_CONVERSION_ERROR_ID); | ||
|
||
return std::make_shared<FeatherWriter>(filename_utf8); | ||
} | ||
|
||
void FeatherWriter::getFilename(libmexclass::proxy::method::Context& context) { | ||
namespace mda = ::matlab::data; | ||
MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto utf16_filename, | ||
arrow::util::UTF8StringToUTF16(filename), | ||
context, | ||
error::UNICODE_CONVERSION_ERROR_ID); | ||
mda::ArrayFactory factory; | ||
auto str_mda = factory.createScalar(utf16_filename); | ||
context.outputs[0] = str_mda; | ||
} | ||
|
||
void FeatherWriter::write(libmexclass::proxy::method::Context& context) { | ||
namespace mda = ::matlab::data; | ||
mda::StructArray opts = context.inputs[0]; | ||
const mda::TypedArray<uint64_t> record_batch_proxy_id_mda = opts[0]["RecordBatchProxyID"]; | ||
const uint64_t record_batch_proxy_id = record_batch_proxy_id_mda[0]; | ||
|
||
auto proxy = libmexclass::proxy::ProxyManager::getProxy(record_batch_proxy_id); | ||
auto record_batch_proxy = std::static_pointer_cast<arrow::matlab::tabular::proxy::RecordBatch>(proxy); | ||
auto record_batch = record_batch_proxy->unwrap(); | ||
|
||
MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto table, | ||
arrow::Table::FromRecordBatches({record_batch}), | ||
context, | ||
error::TABLE_FROM_RECORD_BATCH); | ||
|
||
MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(std::shared_ptr<arrow::io::OutputStream> output_stream, | ||
arrow::io::FileOutputStream::Open(filename), | ||
context, | ||
error::FAILED_TO_OPEN_FILE_FOR_WRITE); | ||
|
||
// Specify the feather file format version as V1 | ||
arrow::ipc::feather::WriteProperties write_props; | ||
write_props.version = arrow::ipc::feather::kFeatherV1Version; | ||
|
||
MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(ipc::feather::WriteTable(*table, output_stream.get(), write_props), | ||
context, | ||
error::FEATHER_FAILED_TO_WRITE_TABLE); | ||
} | ||
} |
41 changes: 41 additions & 0 deletions
41
matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#pragma once | ||
|
||
#include "arrow/status.h" | ||
|
||
#include "libmexclass/proxy/Proxy.h" | ||
|
||
namespace arrow::matlab::io::feather::proxy { | ||
|
||
class FeatherWriter : public libmexclass::proxy::Proxy { | ||
public: | ||
FeatherWriter(const std::string& filename); | ||
|
||
~FeatherWriter() {} | ||
|
||
static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); | ||
|
||
protected: | ||
void getFilename(libmexclass::proxy::method::Context& context); | ||
void write(libmexclass::proxy::method::Context& context); | ||
|
||
private: | ||
const std::string filename; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
%WRITER Class for writing feather V1 files. | ||
|
||
% Licensed to the Apache Software Foundation (ASF) under one or more | ||
% contributor license agreements. See the NOTICE file distributed with | ||
% this work for additional information regarding copyright ownership. | ||
% The ASF licenses this file to you under the Apache License, Version | ||
% 2.0 (the "License"); you may not use this file except in compliance | ||
% with the License. You may obtain a copy of the License at | ||
% | ||
% http://www.apache.org/licenses/LICENSE-2.0 | ||
% | ||
% Unless required by applicable law or agreed to in writing, software | ||
% distributed under the License is distributed on an "AS IS" BASIS, | ||
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
% implied. See the License for the specific language governing | ||
% permissions and limitations under the License. | ||
classdef Writer < matlab.mixin.Scalar | ||
|
||
properties(Hidden, SetAccess=private, GetAccess=public) | ||
Proxy | ||
end | ||
|
||
properties(Dependent) | ||
Filename | ||
end | ||
|
||
methods | ||
function obj = Writer(filename) | ||
arguments | ||
filename(1, 1) {mustBeNonmissing, mustBeNonzeroLengthText} | ||
end | ||
|
||
args = struct(Filename=filename); | ||
proxyName = "arrow.io.feather.proxy.FeatherWriter"; | ||
obj.Proxy = arrow.internal.proxy.create(proxyName, args); | ||
end | ||
|
||
function write(obj, T) | ||
rb = arrow.recordbatch(T); | ||
args = struct(RecordBatchProxyID=rb.Proxy.ID); | ||
obj.Proxy.write(args); | ||
end | ||
|
||
function filename = get.Filename(obj) | ||
filename = obj.Proxy.getFilename(); | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
%TROUNDTRIP Round trip tests for feather. | ||
|
||
% Licensed to the Apache Software Foundation (ASF) under one or more | ||
% contributor license agreements. See the NOTICE file distributed with | ||
% this work for additional information regarding copyright ownership. | ||
% The ASF licenses this file to you under the Apache License, Version | ||
% 2.0 (the "License"); you may not use this file except in compliance | ||
% with the License. You may obtain a copy of the License at | ||
% | ||
% http://www.apache.org/licenses/LICENSE-2.0 | ||
% | ||
% Unless required by applicable law or agreed to in writing, software | ||
% distributed under the License is distributed on an "AS IS" BASIS, | ||
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
% implied. See the License for the specific language governing | ||
% permissions and limitations under the License. | ||
classdef tRoundTrip < matlab.unittest.TestCase | ||
|
||
methods(TestClassSetup) | ||
% Delete once arrow.internal.io.feather.Reader is submitted. | ||
function addFeatherFunctionsToMATLABPath(testCase) | ||
import matlab.unittest.fixtures.PathFixture | ||
% Add Feather test utilities to the MATLAB path. | ||
testCase.applyFixture(PathFixture('../../../util')); | ||
% arrow.cpp.call must be on the MATLAB path. | ||
testCase.assertTrue(~isempty(which('arrow.cpp.call')), ... | ||
'''arrow.cpp.call'' must be on the MATLAB path. Use ''addpath'' to add folders to the MATLAB path.'); | ||
end | ||
end | ||
|
||
methods(Test) | ||
function Basic(testCase) | ||
import matlab.unittest.fixtures.TemporaryFolderFixture | ||
|
||
fixture = testCase.applyFixture(TemporaryFolderFixture); | ||
filename = fullfile(fixture.Folder, "temp.feather"); | ||
|
||
DoubleVar = [10; 20; 30; 40]; | ||
SingleVar = single([10; 15; 20; 25]); | ||
tWrite = table(DoubleVar, SingleVar); | ||
|
||
featherwrite(tWrite, filename); | ||
tRead = featherread(filename); | ||
testCase.verifyEqual(tWrite, tRead); | ||
end | ||
end | ||
end | ||
|
||
function featherwrite(T, filename) | ||
writer = arrow.internal.io.feather.Writer(filename); | ||
writer.write(T); | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters