Skip to content

Commit

Permalink
GH-36560: [MATLAB] Remove the DeepCopy name-value pair from `arrow.ar…
Browse files Browse the repository at this point in the history
…ray.<Numeric>Array` constructors (#36561)

### Rationale for this change

We initially added the `DeepCopy` name-value pair to the numeric array class constructors for testing purposes. When `DeepCopy=true`, the proxy classes copy the data from the MATLAB array when creating the underlying `std::shared_ptr<arrow::NumericArray<CType>>`. By default, we don't make a copy and instead store the original array as a property named `MatlabArray`. Doing so keeps the backing memory of the arrow array alive and avoids a copy.

### What changes are included in this PR?
`DeepCopy` is no longer accepted as a name-value pair by the constructors of the numeric array classes. 

### Are these changes tested?
No tests were needed.

### Are there any user-facing changes?

This is technically a user facing change, but the MATLAB interface is still experimental and under active development. We don't expect anyone to be affected by this change.

* Closes: #36560

Authored-by: Sarah Gilmore <sgilmore@mathworks.com>
Signed-off-by: Kevin Gurney <kgurney@mathworks.com>
  • Loading branch information
sgilmore10 committed Jul 10, 2023
1 parent 82e4338 commit 5c4ba87
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 112 deletions.
37 changes: 9 additions & 28 deletions matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include "arrow/array/data.h"
#include "arrow/array/util.h"

#include "arrow/builder.h"
#include "arrow/type_traits.h"

#include "arrow/matlab/array/proxy/array.h"
Expand Down Expand Up @@ -50,39 +49,21 @@ class NumericArray : public arrow::matlab::array::proxy::Array {
// Get the mxArray from constructor arguments
const ::matlab::data::TypedArray<CType> numeric_mda = opts[0]["MatlabArray"];
const ::matlab::data::TypedArray<bool> valid_mda = opts[0]["Valid"];
const ::matlab::data::TypedArray<bool> make_copy = opts[0]["DeepCopy"];

// Get raw pointer of mxArray
auto it(numeric_mda.cbegin());
auto dt = it.operator->();

const auto make_deep_copy = make_copy[0];
const auto data_type = arrow::CTypeTraits<CType>::type_singleton();
const auto length = static_cast<int64_t>(numeric_mda.getNumberOfElements()); // cast size_t to int64_t

if (make_deep_copy) {
// Get the unpacked validity bitmap (if it exists)
auto unpacked_validity_bitmap = bit::extract_ptr(valid_mda);

BuilderType builder;

auto status = builder.AppendValues(dt, numeric_mda.getNumberOfElements(), unpacked_validity_bitmap);
MATLAB_ERROR_IF_NOT_OK(status, error::APPEND_VALUES_ERROR_ID);

MATLAB_ASSIGN_OR_ERROR(auto array, builder.Finish(), error::BUILD_ARRAY_ERROR_ID);

return std::make_shared<arrow::matlab::array::proxy::NumericArray<CType>>(array);

} else {
const auto data_type = arrow::CTypeTraits<CType>::type_singleton();
const auto length = static_cast<int64_t>(numeric_mda.getNumberOfElements()); // cast size_t to int64_t

// Do not make a copy when creating arrow::Buffer
auto data_buffer = std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(dt),
sizeof(CType) * numeric_mda.getNumberOfElements());
// Pack the validity bitmap values.
MATLAB_ASSIGN_OR_ERROR(auto packed_validity_bitmap, bit::packValid(valid_mda), error::BITPACK_VALIDITY_BITMAP_ERROR_ID);
auto array_data = arrow::ArrayData::Make(data_type, length, {packed_validity_bitmap, data_buffer});
return std::make_shared<arrow::matlab::array::proxy::NumericArray<CType>>(arrow::MakeArray(array_data));
}
// Do not make a copy when creating arrow::Buffer
auto data_buffer = std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(dt),
sizeof(CType) * numeric_mda.getNumberOfElements());
// Pack the validity bitmap values.
MATLAB_ASSIGN_OR_ERROR(auto packed_validity_bitmap, bit::packValid(valid_mda), error::BITPACK_VALIDITY_BITMAP_ERROR_ID);
auto array_data = arrow::ArrayData::Make(data_type, length, {packed_validity_bitmap, data_buffer});
return std::make_shared<arrow::matlab::array::proxy::NumericArray<CType>>(arrow::MakeArray(array_data));
}

protected:
Expand Down
17 changes: 8 additions & 9 deletions matlab/src/matlab/+arrow/+array/NumericArray.m
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,23 @@
end

methods
function obj = NumericArray(data, type, proxyName, opts, nullOpts)
function obj = NumericArray(data, type, proxyName, opts)
arguments
data
type(1, 1) string
proxyName(1, 1) string
opts.DeepCopy(1, 1) logical = false
nullOpts.InferNulls(1, 1) logical = true
nullOpts.Valid
opts.InferNulls(1, 1) logical = true
opts.Valid
end
arrow.args.validateTypeAndShape(data, type);
validElements = arrow.args.parseValidElements(data, nullOpts);
opts = struct(MatlabArray=data, Valid=validElements, DeepCopy=opts.DeepCopy);
validElements = arrow.args.parseValidElements(data, opts);
opts = struct(MatlabArray=data, Valid=validElements);
obj@arrow.array.Array("Name", proxyName, "ConstructorArguments", {opts});
obj.MatlabArray = cast(obj.MatlabArray, type);
% Store a reference to the array if not doing a deep copy
if ~opts.DeepCopy, obj.MatlabArray = data; end
% Store a reference to the array
obj.MatlabArray = data;
end

function matlabArray = toMATLAB(obj)
matlabArray = obj.Proxy.toMATLAB();
matlabArray(~obj.Valid) = obj.NullSubstitutionValue;
Expand Down
69 changes: 26 additions & 43 deletions matlab/test/arrow/array/hNumericArray.m
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@
ArrowType(1, 1)
end

properties (TestParameter)
MakeDeepCopy = {true false}
end

methods(TestClassSetup)
function verifyOnMatlabPath(tc)
% Verify the arrow array class is on the MATLAB Search Path.
Expand All @@ -41,103 +37,90 @@ function verifyOnMatlabPath(tc)
end

methods(Test)
function BasicTest(tc, MakeDeepCopy)
A = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([1 2 3]), DeepCopy=MakeDeepCopy);
function BasicTest(tc)
A = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([1 2 3]));
className = string(class(A));
tc.verifyEqual(className, tc.ArrowArrayClassName);
end

function ShallowCopyTest(tc)
% By default, NumericArrays do not create a deep copy on
% construction when constructed from a MATLAB array. Instead,
% it stores a shallow copy of the array keep the memory alive.
% NumericArrays stores a shallow copy of the array keep the
% memory alive.
A = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([1, 2, 3]));
tc.verifyEqual(A.MatlabArray, tc.MatlabArrayFcn([1, 2, 3]));
tc.verifyEqual(toMATLAB(A), tc.MatlabArrayFcn([1 2 3]'));

A = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([1, 2, 3]), DeepCopy=false);
tc.verifyEqual(A.MatlabArray, tc.MatlabArrayFcn([1 2 3]));
tc.verifyEqual(toMATLAB(A), tc.MatlabArrayFcn([1 2 3]'));
end

function DeepCopyTest(tc)
% Verify NumericArrays does not store shallow copy of the
% MATLAB array if DeepCopy=true was supplied.
A = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([1, 2, 3]), DeepCopy=true);
tc.verifyEqual(A.MatlabArray, tc.MatlabArrayFcn([]));
tc.verifyEqual(toMATLAB(A), tc.MatlabArrayFcn([1 2 3]'));
end

function ToMATLAB(tc, MakeDeepCopy)
function ToMATLAB(tc)
% Create array from a scalar
A1 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(100), DeepCopy=MakeDeepCopy);
A1 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(100));
data = toMATLAB(A1);
tc.verifyEqual(data, tc.MatlabArrayFcn(100));

% Create array from a vector
A2 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([1 2 3]), DeepCopy=MakeDeepCopy);
A2 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([1 2 3]));
data = toMATLAB(A2);
tc.verifyEqual(data, tc.MatlabArrayFcn([1 2 3]'));

% Create a Float64Array from an empty double vector
A3 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([]), DeepCopy=MakeDeepCopy);
A3 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([]));
data = toMATLAB(A3);
tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
end

function MatlabConversion(tc, MakeDeepCopy)
function MatlabConversion(tc)
% Tests the type-specific conversion methods, e.g. single for
% arrow.array.Float32Array, double for array.array.Float64Array

% Create array from a scalar
A1 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(100), DeepCopy=MakeDeepCopy);
A1 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(100));
data = tc.MatlabConversionFcn(A1);
tc.verifyEqual(data, tc.MatlabArrayFcn(100));

% Create array from a vector
A2 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([1 2 3]), DeepCopy=MakeDeepCopy);
A2 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([1 2 3]));
data = tc.MatlabConversionFcn(A2);
tc.verifyEqual(data, tc.MatlabArrayFcn([1 2 3]'));

% Create an array from an empty vector
A3 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([]), DeepCopy=MakeDeepCopy);
A3 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([]));
data = tc.MatlabConversionFcn(A3);
tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
end

function MinValueTest(tc, MakeDeepCopy)
A = tc.ArrowArrayConstructor(tc.MinValue, DeepCopy=MakeDeepCopy);
function MinValueTest(tc)
A = tc.ArrowArrayConstructor(tc.MinValue);
tc.verifyEqual(toMATLAB(A), tc.MinValue);
end

function MaxValueTest(tc, MakeDeepCopy)
A1 = tc.ArrowArrayConstructor(tc.MaxValue, DeepCopy=MakeDeepCopy);
function MaxValueTest(tc)
A1 = tc.ArrowArrayConstructor(tc.MaxValue);
tc.verifyEqual(toMATLAB(A1), tc.MaxValue);
end

function ErrorIfComplex(tc, MakeDeepCopy)
fcn = @() tc.ArrowArrayConstructor(tc.MatlabArrayFcn([10 + 1i, 4]), DeepCopy=MakeDeepCopy);
function ErrorIfComplex(tc)
fcn = @() tc.ArrowArrayConstructor(tc.MatlabArrayFcn([10 + 1i, 4]));
tc.verifyError(fcn, "MATLAB:expectedReal");
end

function ErrorIfNonVector(tc, MakeDeepCopy)
function ErrorIfNonVector(tc)
data = tc.MatlabArrayFcn([1 2 3 4 5 6 7 8 9]);
data = reshape(data, 3, 1, 3);
fcn = @() tc.ArrowArrayConstructor(tc.MatlabArrayFcn(data), DeepCopy=MakeDeepCopy);
fcn = @() tc.ArrowArrayConstructor(tc.MatlabArrayFcn(data));
tc.verifyError(fcn, "MATLAB:expectedVector");
end

function ErrorIfEmptyArrayIsNotTwoDimensional(tc, MakeDeepCopy)
function ErrorIfEmptyArrayIsNotTwoDimensional(tc)
data = tc.MatlabArrayFcn(reshape([], [1 0 0]));
fcn = @() tc.ArrowArrayConstructor(data, DeepCopy=MakeDeepCopy);
fcn = @() tc.ArrowArrayConstructor(data);
tc.verifyError(fcn, "MATLAB:expected2D");
end

function LogicalValidNVPair(tc, MakeDeepCopy)
function LogicalValidNVPair(tc)
% Verify the expected elements are treated as null when Valid
% is provided as a logical array
data = tc.MatlabArrayFcn([1 2 3 4]);
arrowArray = tc.ArrowArrayConstructor(data, Valid=[false true true false], DeepCopy=MakeDeepCopy);
arrowArray = tc.ArrowArrayConstructor(data, Valid=[false true true false]);

expectedData = data';
expectedData([1 4]) = tc.NullSubstitutionValue;
Expand All @@ -146,11 +129,11 @@ function LogicalValidNVPair(tc, MakeDeepCopy)
tc.verifyEqual(arrowArray.Valid, [false; true; true; false]);
end

function NumericValidNVPair(tc, MakeDeepCopy)
function NumericValidNVPair(tc)
% Verify the expected elements are treated as null when Valid
% is provided as a array of indices
data = tc.MatlabArrayFcn([1 2 3 4]);
arrowArray = tc.ArrowArrayConstructor(data, Valid=[2 4], DeepCopy=MakeDeepCopy);
arrowArray = tc.ArrowArrayConstructor(data, Valid=[2 4]);

expectedData = data';
expectedData([1 3]) = tc.NullSubstitutionValue;
Expand Down
30 changes: 15 additions & 15 deletions matlab/test/arrow/array/tFloat32Array.m
Original file line number Diff line number Diff line change
Expand Up @@ -28,68 +28,68 @@
end

methods(Test)
function InfValues(testCase, MakeDeepCopy)
A1 = arrow.array.Float32Array(single([Inf -Inf]), DeepCopy=MakeDeepCopy);
function InfValues(testCase)
A1 = arrow.array.Float32Array(single([Inf -Inf]));
data = single(A1);
testCase.verifyEqual(data, single([Inf -Inf]'));
end

function ValidBasic(testCase, MakeDeepCopy)
function ValidBasic(testCase)
% Create a MATLAB array with one null value (i.e. one NaN).
% Verify NaN is considered a null value by default.
matlabArray = single([1, NaN, 3]');
arrowArray = arrow.array.Float32Array(matlabArray, DeepCopy=MakeDeepCopy);
arrowArray = arrow.array.Float32Array(matlabArray);
expectedValid = [true, false, true]';
testCase.verifyEqual(arrowArray.Valid, expectedValid);
end

function InferNulls(testCase, MakeDeepCopy)
function InferNulls(testCase)
matlabArray = single([1, NaN, 3]);

% Verify NaN is treated as a null value when InferNulls=true.
arrowArray1 = arrow.array.Float32Array(matlabArray, InferNulls=true, DeepCopy=MakeDeepCopy);
arrowArray1 = arrow.array.Float32Array(matlabArray, InferNulls=true);
expectedValid1 = [true false true]';
testCase.verifyEqual(arrowArray1.Valid, expectedValid1);
testCase.verifyEqual(toMATLAB(arrowArray1), matlabArray');

% Verify NaN is not treated as a null value when InferNulls=false.
arrowArray2 = arrow.array.Float32Array(matlabArray, InferNulls=false, DeepCopy=MakeDeepCopy);
arrowArray2 = arrow.array.Float32Array(matlabArray, InferNulls=false);
expectedValid2 = [true true true]';
testCase.verifyEqual(arrowArray2.Valid, expectedValid2);
testCase.verifyEqual(toMATLAB(arrowArray2), matlabArray');
end

function ValidNoNulls(testCase, MakeDeepCopy)
function ValidNoNulls(testCase)
% Create a MATLAB array with no null values (i.e. no NaNs).
matlabArray = single([1, 2, 3]');
arrowArray = arrow.array.Float32Array(matlabArray, DeepCopy=MakeDeepCopy);
arrowArray = arrow.array.Float32Array(matlabArray);
expectedValid = [true, true, true]';
testCase.verifyEqual(arrowArray.Valid, expectedValid);
end

function ValidAllNulls(testCase, MakeDeepCopy)
function ValidAllNulls(testCase)
% Create a MATLAB array with all null values (i.e. all NaNs).
matlabArray = single([NaN, NaN, NaN]');
arrowArray = arrow.array.Float32Array(matlabArray, DeepCopy=MakeDeepCopy);
arrowArray = arrow.array.Float32Array(matlabArray);
expectedValid = [false, false, false]';
testCase.verifyEqual(arrowArray.Valid, expectedValid);
end

function EmptyArrayValidBitmap(testCase, MakeDeepCopy)
function EmptyArrayValidBitmap(testCase)
% Create an empty 0x0 MATLAB array.
matlabArray = single.empty(0, 0);
arrowArray = arrow.array.Float32Array(matlabArray, DeepCopy=MakeDeepCopy);
arrowArray = arrow.array.Float32Array(matlabArray);
expectedValid = logical.empty(0, 1);
testCase.verifyEqual(arrowArray.Valid, expectedValid);

% Create an empty 0x1 MATLAB array.
matlabArray = single.empty(0, 1);
arrowArray = arrow.array.Float32Array(matlabArray, DeepCopy=MakeDeepCopy);
arrowArray = arrow.array.Float32Array(matlabArray);
testCase.verifyEqual(arrowArray.Valid, expectedValid);

% Create an empty 1x0 MATLAB array.
matlabArray = single.empty(1, 0);
arrowArray = arrow.array.Float32Array(matlabArray, DeepCopy=MakeDeepCopy);
arrowArray = arrow.array.Float32Array(matlabArray);
testCase.verifyEqual(arrowArray.Valid, expectedValid);
end

Expand Down
Loading

0 comments on commit 5c4ba87

Please sign in to comment.