Skip to content

Commit

Permalink
GH-37628: [MATLAB] Implement isequal for the arrow.tabular.Table
Browse files Browse the repository at this point in the history
…MATLAB class (#37629)

### Rationale for this change

Following on to #37474, #37446, #37525,  and  #37627, we should implement `isequal` for the arrow.tabular.Table` MATLAB class.

### What changes are included in this PR?

1. Add new function `arrow.internal.tabular.isequal` that both `arrow.tabular.RecordBatch` and `arrow.tabular.Table` can use to implement their `isequal` methods.
2. Modified `arrow.tabular.RecordBatch` to use the new `isequal` package function to implement  its `isequal` method.
3. Implemented the `isequal` method for `arrow.tabular.Table` using the new `isequal` package function.

### Are these changes tested?

Yes, added `isequal` unit tests to `tTable.m`

### Are there any user-facing changes?

Yes. Users can now compare `arrow.tabular.Table`s using `isequal`:

```matlab
>> t1 = table(1, "A", false, VariableNames=["Number",  "String", "Logical"]);
>> t2 = table([1; 2], ["A"; "B"], [false; false], VariableNames=["Number",  "String", "Logical"]); 
>> tbl1 = arrow.table(t1);
>> tbl2 = arrow.table(t2);
>> tbl3 = arrow.table(t1);

>> isequal(tbl1, tbl2)

ans =

  logical

   0

>> isequal(tbl1, tbl3)

ans =

  logical

   1
```

* Closes: #37628

Authored-by: Sarah Gilmore <sgilmore@mathworks.com>
Signed-off-by: Kevin Gurney <kgurney@mathworks.com>
  • Loading branch information
sgilmore10 committed Sep 8, 2023
1 parent 0e6b8c5 commit f1d2fc9
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 39 deletions.
60 changes: 60 additions & 0 deletions matlab/src/matlab/+arrow/+tabular/+internal/isequal.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
%ISEQUAL Utility function used by both arrow.tabular.RecordBatch and
%arrow.tabular.Table to implement the isequal method.

% Licensed to the Apache Software Foundation (ASF) under one or more
% contributor license agreements. See the NOTICE file distributed with
% this work for additional information regarding copyright ownership.
% The ASF licenses this file to you under the Apache License, Version
% 2.0 (the "License"); you may not use this file except in compliance
% with the License. You may obtain a copy of the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS,
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
% implied. See the License for the specific language governing
% permissions and limitations under the License.

function tf = isequal(tabularObj, varargin)
narginchk(2, inf);
tf = false;

classType = string(class(tabularObj));

schemasToCompare = cell([1 numel(varargin)]);
for ii = 1:numel(varargin)
element = varargin{ii};
if ~isa(element, classType)
% If element is not an instance of classType, then it cannot
% be equal to tabularObj. Return false early.
return;
end
schemasToCompare{ii} = element.Schema;
end

if ~isequal(tabularObj.Schema, schemasToCompare{:})
% If the schemas are not equal, then the record batches (or tables)
% are not equal. Return false early.
return;
end

% Function that extracts the column stored at colIndex from the
% record batch (or table) stored at tabularIndex in varargin.
getColumnFcn = @(tabularIndex, colIndex) varargin{tabularIndex}.column(colIndex);

tabularObjIndices = 1:numel(varargin);
for ii = 1:tabularObj.NumColumns
colIndices = repmat(ii, [1 numel(tabularObjIndices)]);
% Gather all columns at index ii across the record batches (or
% tables) stored in varargin. Compare these columns with the
% corresponding column in obj. If they are not equal, then the
% record batches (or tables) are not equal. Return false.
columnsToCompare = arrayfun(getColumnFcn, tabularObjIndices, colIndices, UniformOutput=false);
if ~isequal(tabularObj.column(ii), columnsToCompare{:})
return;
end
end
tf = true;
end

38 changes: 1 addition & 37 deletions matlab/src/matlab/+arrow/+tabular/RecordBatch.m
Original file line number Diff line number Diff line change
Expand Up @@ -95,43 +95,7 @@
end

function tf = isequal(obj, varargin)
narginchk(2, inf);
tf = false;

schemasToCompare = cell([1 numel(varargin)]);
for ii = 1:numel(varargin)
rb = varargin{ii};
if ~isa(rb, "arrow.tabular.RecordBatch")
% If rb is not a RecordBatch, then it cannot be equal
% to obj. Return false early.
return;
end
schemasToCompare{ii} = rb.Schema;
end

if ~isequal(obj.Schema, schemasToCompare{:})
% If the schemas are not equal, the record batches are not
% equal. Return false early.
return;
end

% Function that extracts the column stored at colIndex from the
% record batch stored at rbIndex in varargin.
getColumnFcn = @(rbIndex, colIndex) varargin{rbIndex}.column(colIndex);

rbIndices = 1:numel(varargin);
for ii = 1:obj.NumColumns
colIndices = repmat(ii, [1 numel(rbIndices)]);
% Gather all columns at index ii across the record
% batches stored in varargin. Compare these columns with
% the corresponding column in obj. If they are not equal,
% then the record batches are not equal. Return false.
columnsToCompare = arrayfun(getColumnFcn, rbIndices, colIndices, UniformOutput=false);
if ~isequal(obj.column(ii), columnsToCompare{:})
return;
end
end
tf = true;
tf = arrow.tabular.internal.isequal(obj, varargin{:});
end
end

Expand Down
4 changes: 4 additions & 0 deletions matlab/src/matlab/+arrow/+tabular/Table.m
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@
T = obj.table();
end

function tf = isequal(obj, varargin)
tf = arrow.tabular.internal.isequal(obj, varargin{:});
end

end

methods (Access = private)
Expand Down
75 changes: 73 additions & 2 deletions matlab/test/arrow/tabular/tTable.m
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ function Table(testCase)
% Verify that the toMATLAB method converts
% an arrow.tabular.Table to a MATLAB table as expected.
TOriginal = table([1, 2, 3]');
arrowRecordBatch = arrow.recordBatch(TOriginal);
TConverted = table(arrowRecordBatch);
arrowTable = arrow.table(TOriginal);
TConverted = table(arrowTable);
testCase.verifyEqual(TOriginal, TConverted);
end

Expand Down Expand Up @@ -593,6 +593,77 @@ function ColumnNamesNoSetter(testCase)
"MATLAB:class:SetProhibited");
end

function TestIsEqualTrue(testCase)
% Verify two tables are considered equal if:
% 1. They have the same schema
% 2. Their corresponding columns are equal
import arrow.tabular.Table

a1 = arrow.array([1 2 3]);
a2 = arrow.array(["A" "B" "C"]);
a3 = arrow.array([true true false]);

t1 = Table.fromArrays(a1, a2, a3, ...
ColumnNames=["A", "B", "C"]);
t2 = Table.fromArrays(a1, a2, a3, ...
ColumnNames=["A", "B", "C"]);
testCase.verifyTrue(isequal(t1, t2));

% Compare zero-column tables
t3 = Table.fromArrays();
t4 = Table.fromArrays();
testCase.verifyTrue(isequal(t3, t4));

% Compare zero-row tables
a4 = arrow.array([]);
a5 = arrow.array(strings(0, 0));
t5 = Table.fromArrays(a4, a5, ColumnNames=["D" "E"]);
t6 = Table.fromArrays(a4, a5, ColumnNames=["D" "E"]);
testCase.verifyTrue(isequal(t5, t6));

% Call isequal with more than two arguments
testCase.verifyTrue(isequal(t3, t4, t3, t4));
end

function TestIsEqualFalse(testCase)
% Verify isequal returns false when expected.
import arrow.tabular.Table

a1 = arrow.array([1 2 3]);
a2 = arrow.array(["A" "B" "C"]);
a3 = arrow.array([true true false]);
a4 = arrow.array(["A" missing "C"]);
a5 = arrow.array([1 2]);
a6 = arrow.array(["A" "B"]);
a7 = arrow.array([true true]);

t1 = Table.fromArrays(a1, a2, a3, ...
ColumnNames=["A", "B", "C"]);
t2 = Table.fromArrays(a1, a2, a3, ...
ColumnNames=["D", "E", "F"]);
t3 = Table.fromArrays(a1, a4, a3, ...
ColumnNames=["A", "B", "C"]);
t4 = Table.fromArrays(a5, a6, a7, ...
ColumnNames=["A", "B", "C"]);
t5 = Table.fromArrays(a1, a2, a3, a1, ...
ColumnNames=["A", "B", "C", "D"]);

% The column names are not equal
testCase.verifyFalse(isequal(t1, t2));

% The columns are not equal
testCase.verifyFalse(isequal(t1, t3));

% The number of rows are not equal
testCase.verifyFalse(isequal(t1, t4));

% The number of columns are not equal
testCase.verifyFalse(isequal(t1, t5));

% Call isequal with more than two arguments
testCase.verifyFalse(isequal(t1, t2, t3, t4));
end

end

methods
Expand Down

0 comments on commit f1d2fc9

Please sign in to comment.