Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-35411: [MATLAB] Create a templated C++ Proxy Class for Numeric Arrays #35479

Merged
merged 22 commits into from
May 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
41768f0
Create a NumericArray class templated on CType to use as the proxy
sgilmore10 May 4, 2023
ebb44e9
Add missing header file numeric_array.h
sgilmore10 May 4, 2023
9b8f7b0
Scoping PR to Float64
sgilmore10 May 4, 2023
cdc4672
Rename Print() to ToString() and return a string array from C++.
sgilmore10 May 4, 2023
fd84dcf
Add ToMatlab() method to NumericArray Proxy Class.
sgilmore10 May 4, 2023
180b832
Add basic tests for double() method for Float64Array
sgilmore10 May 4, 2023
42ffe1a
Add test proxy class for creating arrow Arrays not backed by MATLAB
sgilmore10 May 5, 2023
438ca97
Remove test class for generating arrow arrays not backed by MATLAB.
sgilmore10 May 5, 2023
f1588b1
Add file that was excluded in the previous changelist
sgilmore10 May 5, 2023
ec1a61c
Enable creating Float64Array by copying the original Matlab array
sgilmore10 May 5, 2023
14078a8
Create Array Proxy Interface for shared functionality
sgilmore10 May 5, 2023
c4f82a6
1. Add basic tests for edge case values (NaN, (+)(-)inf, realmin, rea…
sgilmore10 May 5, 2023
5728984
Delete temporary method.
sgilmore10 May 5, 2023
281933d
Delete out of date comment in cmake file
sgilmore10 May 5, 2023
26657a4
Fix incorrect logic deciding when to make a deep copy or not in the C++
sgilmore10 May 8, 2023
3be33d1
Fix indentation in comment
sgilmore10 May 9, 2023
c0b6104
Use auto instead of bool
sgilmore10 May 9, 2023
9d929a2
Use auto instead of size_t
sgilmore10 May 9, 2023
6c2455a
Remove extra spaces
sgilmore10 May 9, 2023
eb111f4
Use auto and make_shared to create shared_ptr<arrow::Buffer>
sgilmore10 May 9, 2023
f264676
Fix indentation
sgilmore10 May 9, 2023
983ff90
Use ArrayData to construct Arrays.
sgilmore10 May 9, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions matlab/src/cpp/arrow/matlab/array/proxy/array.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/matlab/array/proxy/array.h"

namespace arrow::matlab::array::proxy {

Array::Array(const libmexclass::proxy::FunctionArguments& constructor_arguments) {

// Register Proxy methods.
REGISTER_METHOD(Array, ToString);
REGISTER_METHOD(Array, ToMatlab);

}

void Array::ToString(libmexclass::proxy::method::Context& context) {
::matlab::data::ArrayFactory factory;

// TODO: handle non-ascii characters
auto str_mda = factory.createScalar(array->ToString());
context.outputs[0] = str_mda;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,27 @@
// specific language governing permissions and limitations
// under the License.

#include "float64_array.h"
#pragma once

#include "arrow/array.h"

#include "libmexclass/proxy/Proxy.h"

namespace arrow::matlab::array::proxy {
void Float64Array::Print(libmexclass::proxy::method::Context& context) {
// TODO: Return an MDA string representation of the Arrow array.
std::cout << array->ToString() << std::endl;

class Array : public libmexclass::proxy::Proxy {
public:
Array(const libmexclass::proxy::FunctionArguments& constructor_arguments);

virtual ~Array() {}

protected:

void ToString(libmexclass::proxy::method::Context& context);

virtual void ToMatlab(libmexclass::proxy::method::Context& context) = 0;

std::shared_ptr<arrow::Array> array;
};

}
} // namespace arrow::matlab::array::proxy
60 changes: 0 additions & 60 deletions matlab/src/cpp/arrow/matlab/array/proxy/float64_array.h

This file was deleted.

97 changes: 97 additions & 0 deletions matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once


#include "arrow/array.h"
#include "arrow/array/data.h"
#include "arrow/array/util.h"

#include "arrow/builder.h"
#include "arrow/type_traits.h"

#include "arrow/matlab/array/proxy/array.h"

#include "libmexclass/proxy/Proxy.h"

namespace arrow::matlab::array::proxy {

template<typename CType>
class NumericArray : public arrow::matlab::array::proxy::Array {
public:
NumericArray(const libmexclass::proxy::FunctionArguments& constructor_arguments)
: arrow::matlab::array::proxy::Array(constructor_arguments) {
using ArrowType = typename arrow::CTypeTraits<CType>::ArrowType;
using BuilderType = typename arrow::CTypeTraits<CType>::BuilderType;

// Get the mxArray from constructor arguments
const ::matlab::data::TypedArray<CType> numeric_mda = constructor_arguments[0];
const ::matlab::data::TypedArray<bool> make_copy = constructor_arguments[1];

// Get raw pointer of mxArray
auto it(numeric_mda.cbegin());
auto dt = it.operator->();

const auto make_deep_copy = make_copy[0];

if (make_deep_copy) {
BuilderType builder;
auto st = builder.AppendValues(dt, numeric_mda.getNumberOfElements());

// TODO: handle error case
if (st.ok()) {
auto maybe_array = builder.Finish();
if (maybe_array.ok()) {
array = *maybe_array;
}
}
} else {
const auto data_type = arrow::CTypeTraits<CType>::type_singleton();
const auto length = static_cast<int64_t>(numeric_mda.getNumberOfElements()); // cast size_t to int64_t

// Do not make a copy when creating arrow::Buffer
auto data_buffer = std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(dt),
sizeof(CType) * numeric_mda.getNumberOfElements());

// TODO: Implement null support
std::shared_ptr<arrow::Buffer> null_buffer = nullptr;

auto array_data = arrow::ArrayData::Make(data_type, length, {null_buffer, data_buffer});
array = arrow::MakeArray(array_data);

}
}

protected:
void ToMatlab(libmexclass::proxy::method::Context& context) override {
using ArrowArrayType = typename arrow::CTypeTraits<CType>::ArrayType;

const auto num_elements = static_cast<size_t>(array->length());
const auto numeric_array = std::static_pointer_cast<ArrowArrayType>(array);
const CType* const data_begin = numeric_array->raw_values();
const CType* const data_end = data_begin + num_elements;

::matlab::data::ArrayFactory factory;

// Constructs a TypedArray from the raw values. Makes a copy.
::matlab::data::TypedArray<CType> result = factory.createArray({num_elements, 1}, data_begin, data_end);
context.outputs[0] = result;
}
};

}
4 changes: 2 additions & 2 deletions matlab/src/cpp/arrow/matlab/proxy/factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

#include "arrow/matlab/array/proxy/float64_array.h"
#include "arrow/matlab/array/proxy/numeric_array.h"

#include "factory.h"

Expand All @@ -26,7 +26,7 @@ namespace arrow::matlab::proxy {
std::shared_ptr<Proxy> Factory::make_proxy(const ClassName& class_name, const FunctionArguments& constructor_arguments) {

// Register MATLAB Proxy classes with corresponding C++ Proxy classes.
REGISTER_PROXY(arrow.array.proxy.Float64Array, arrow::matlab::array::proxy::Float64Array);
REGISTER_PROXY(arrow.array.proxy.Float64Array, arrow::matlab::array::proxy::NumericArray<double>);

// TODO: Decide what to do in the case that there isn't a Proxy match.
std::cout << "Did not find a matching C++ proxy for: " + class_name << std::endl;
Expand Down
27 changes: 20 additions & 7 deletions matlab/src/matlab/+arrow/+array/Float64Array.m
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,38 @@
Proxy
end

properties (Access=private)
properties (Hidden, SetAccess=private)
MatlabArray
end

methods
function obj = Float64Array(matlabArray)
obj.MatlabArray = matlabArray;
obj.Proxy = libmexclass.proxy.Proxy("Name", "arrow.array.proxy.Float64Array", "ConstructorArguments", {obj.MatlabArray});
function obj = Float64Array(data, opts)
arguments
data
opts.DeepCopy = false
end

validateattributes(data, "double", ["2d", "nonsparse", "real"]);
if ~isempty(data), validateattributes(data, "double", "vector"); end
% Store a reference to the array if not doing a deep copy
if (~opts.DeepCopy), obj.MatlabArray = data; end
obj.Proxy = libmexclass.proxy.Proxy("Name", "arrow.array.proxy.Float64Array", "ConstructorArguments", {data, opts.DeepCopy});
end

function Print(obj)
obj.Proxy.Print();
function data = double(obj)
data = obj.Proxy.ToMatlab();
end
end

methods (Access=protected)
function displayScalarObject(obj)
obj.Print();
disp(obj.ToString());
end
end

methods (Access=private)
function str = ToString(obj)
str = obj.Proxy.ToString();
end
end
end
81 changes: 72 additions & 9 deletions matlab/test/arrow/array/tFloat64Array.m
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
% implied. See the License for the specific language governing
% permissions and limitations under the License.

properties (TestParameter)
MakeDeepCopy = {true false}
end

methods(TestClassSetup)
function verifyOnMatlabPath(testCase)
% arrow.array.Float64Array must be on the MATLAB path.
Expand All @@ -24,18 +28,77 @@ function verifyOnMatlabPath(testCase)
end
end

methods(TestMethodSetup)
function setupTempWorkingDirectory(testCase)
import matlab.unittest.fixtures.WorkingFolderFixture;
testCase.applyFixture(WorkingFolderFixture);
end
end

methods(Test)
function Basic(testCase)
A = arrow.array.Float64Array([1, 2, 3]);
function Basic(testCase, MakeDeepCopy)
A = arrow.array.Float64Array([1, 2, 3], DeepCopy=MakeDeepCopy);
className = string(class(A));
testCase.verifyEqual(className, "arrow.array.Float64Array");
end

function ShallowCopy(testCase)
% By default, Float64Array does not create a deep copy on
% construction when constructed from a MATLAB array. Instead,
% it stores a shallow copy of the array keep the memory alive.
A = arrow.array.Float64Array([1, 2, 3]);
testCase.verifyEqual(A.MatlabArray, [1 2 3]);
testCase.verifyEqual(double(A), [1 2 3]');

A = arrow.array.Float64Array([1, 2, 3], DeepCopy=false);
testCase.verifyEqual(A.MatlabArray, [1 2 3]);
testCase.verifyEqual(double(A), [1 2 3]');
end

function DeepCopy(testCase)
% Verify Float64Array does not store shallow copy of the MATLAB
% array if DeepCopy=true was supplied.
A = arrow.array.Float64Array([1, 2, 3], DeepCopy=true);
testCase.verifyEqual(A.MatlabArray, []);
testCase.verifyEqual(double(A), [1 2 3]');
end

function Double(testCase, MakeDeepCopy)
% Create a Float64Array from a scalar double
A1 = arrow.array.Float64Array(100, DeepCopy=MakeDeepCopy);
data = double(A1);
testCase.verifyEqual(data, 100);

% Create a Float64Array from a double vector
A2 = arrow.array.Float64Array([1 2 3], DeepCopy=MakeDeepCopy);
data = double(A2);
testCase.verifyEqual(data, [1 2 3]');

% Create a Float64Array from an empty double vector
A3 = arrow.array.Float64Array([], DeepCopy=MakeDeepCopy);
data = double(A3);
testCase.verifyEqual(data, double.empty(0, 1));
end

function MinValue(testCase, MakeDeepCopy)
A1 = arrow.array.Float64Array(realmin, DeepCopy=MakeDeepCopy);
data = double(A1);
testCase.verifyEqual(data, realmin);
end

function MaxValue(testCase, MakeDeepCopy)
A1 = arrow.array.Float64Array(realmax, DeepCopy=MakeDeepCopy);
data = double(A1);
testCase.verifyEqual(data, realmax);
end

function InfValues(testCase, MakeDeepCopy)
A1 = arrow.array.Float64Array([Inf -Inf], DeepCopy=MakeDeepCopy);
data = double(A1);
testCase.verifyEqual(data, [Inf -Inf]');
end

function ErrorIfComplex(testCase, MakeDeepCopy)
fcn = @() arrow.array.Float64Array([10 + 1i, 4], DeepCopy=MakeDeepCopy);
testCase.verifyError(fcn, "MATLAB:expectedReal");
end

function ErrorIfSparse(testCase, MakeDeepCopy)
fcn = @() arrow.array.Float64Array(sparse(ones([10 1])), DeepCopy=MakeDeepCopy);
testCase.verifyError(fcn, "MATLAB:expectedNonsparse");
end
end
end
Loading