From bfd4ad7d75ced334634a884a92fb8b4f198190a3 Mon Sep 17 00:00:00 2001 From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Date: Fri, 8 Sep 2023 11:37:21 -0400 Subject: [PATCH] GH-37628: [MATLAB] Implement `isequal` for the `arrow.tabular.Table` MATLAB class (#37629) ### Rationale for this change Following on to #37474, #37446, #37525, and #37627, we should implement `isequal` for the arrow.tabular.Table` MATLAB class. ### What changes are included in this PR? 1. Add new function `arrow.internal.tabular.isequal` that both `arrow.tabular.RecordBatch` and `arrow.tabular.Table` can use to implement their `isequal` methods. 2. Modified `arrow.tabular.RecordBatch` to use the new `isequal` package function to implement its `isequal` method. 3. Implemented the `isequal` method for `arrow.tabular.Table` using the new `isequal` package function. ### Are these changes tested? Yes, added `isequal` unit tests to `tTable.m` ### Are there any user-facing changes? Yes. Users can now compare `arrow.tabular.Table`s using `isequal`: ```matlab >> t1 = table(1, "A", false, VariableNames=["Number", "String", "Logical"]); >> t2 = table([1; 2], ["A"; "B"], [false; false], VariableNames=["Number", "String", "Logical"]); >> tbl1 = arrow.table(t1); >> tbl2 = arrow.table(t2); >> tbl3 = arrow.table(t1); >> isequal(tbl1, tbl2) ans = logical 0 >> isequal(tbl1, tbl3) ans = logical 1 ``` * Closes: #37628 Authored-by: Sarah Gilmore Signed-off-by: Kevin Gurney --- .../+arrow/+tabular/+internal/isequal.m | 60 +++++++++++++++ .../src/matlab/+arrow/+tabular/RecordBatch.m | 38 +--------- matlab/src/matlab/+arrow/+tabular/Table.m | 4 + matlab/test/arrow/tabular/tTable.m | 75 ++++++++++++++++++- 4 files changed, 138 insertions(+), 39 deletions(-) create mode 100644 matlab/src/matlab/+arrow/+tabular/+internal/isequal.m diff --git a/matlab/src/matlab/+arrow/+tabular/+internal/isequal.m b/matlab/src/matlab/+arrow/+tabular/+internal/isequal.m new file mode 100644 index 0000000000000..9457620e131f4 --- /dev/null +++ b/matlab/src/matlab/+arrow/+tabular/+internal/isequal.m @@ -0,0 +1,60 @@ +%ISEQUAL Utility function used by both arrow.tabular.RecordBatch and +%arrow.tabular.Table to implement the isequal method. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function tf = isequal(tabularObj, varargin) + narginchk(2, inf); + tf = false; + + classType = string(class(tabularObj)); + + schemasToCompare = cell([1 numel(varargin)]); + for ii = 1:numel(varargin) + element = varargin{ii}; + if ~isa(element, classType) + % If element is not an instance of classType, then it cannot + % be equal to tabularObj. Return false early. + return; + end + schemasToCompare{ii} = element.Schema; + end + + if ~isequal(tabularObj.Schema, schemasToCompare{:}) + % If the schemas are not equal, then the record batches (or tables) + % are not equal. Return false early. + return; + end + + % Function that extracts the column stored at colIndex from the + % record batch (or table) stored at tabularIndex in varargin. + getColumnFcn = @(tabularIndex, colIndex) varargin{tabularIndex}.column(colIndex); + + tabularObjIndices = 1:numel(varargin); + for ii = 1:tabularObj.NumColumns + colIndices = repmat(ii, [1 numel(tabularObjIndices)]); + % Gather all columns at index ii across the record batches (or + % tables) stored in varargin. Compare these columns with the + % corresponding column in obj. If they are not equal, then the + % record batches (or tables) are not equal. Return false. + columnsToCompare = arrayfun(getColumnFcn, tabularObjIndices, colIndices, UniformOutput=false); + if ~isequal(tabularObj.column(ii), columnsToCompare{:}) + return; + end + end + tf = true; +end + diff --git a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m index 32269e91141ca..725039e87e426 100644 --- a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m +++ b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m @@ -95,43 +95,7 @@ end function tf = isequal(obj, varargin) - narginchk(2, inf); - tf = false; - - schemasToCompare = cell([1 numel(varargin)]); - for ii = 1:numel(varargin) - rb = varargin{ii}; - if ~isa(rb, "arrow.tabular.RecordBatch") - % If rb is not a RecordBatch, then it cannot be equal - % to obj. Return false early. - return; - end - schemasToCompare{ii} = rb.Schema; - end - - if ~isequal(obj.Schema, schemasToCompare{:}) - % If the schemas are not equal, the record batches are not - % equal. Return false early. - return; - end - - % Function that extracts the column stored at colIndex from the - % record batch stored at rbIndex in varargin. - getColumnFcn = @(rbIndex, colIndex) varargin{rbIndex}.column(colIndex); - - rbIndices = 1:numel(varargin); - for ii = 1:obj.NumColumns - colIndices = repmat(ii, [1 numel(rbIndices)]); - % Gather all columns at index ii across the record - % batches stored in varargin. Compare these columns with - % the corresponding column in obj. If they are not equal, - % then the record batches are not equal. Return false. - columnsToCompare = arrayfun(getColumnFcn, rbIndices, colIndices, UniformOutput=false); - if ~isequal(obj.column(ii), columnsToCompare{:}) - return; - end - end - tf = true; + tf = arrow.tabular.internal.isequal(obj, varargin{:}); end end diff --git a/matlab/src/matlab/+arrow/+tabular/Table.m b/matlab/src/matlab/+arrow/+tabular/Table.m index d9eb4d8409733..c2f73450408ef 100644 --- a/matlab/src/matlab/+arrow/+tabular/Table.m +++ b/matlab/src/matlab/+arrow/+tabular/Table.m @@ -97,6 +97,10 @@ T = obj.table(); end + function tf = isequal(obj, varargin) + tf = arrow.tabular.internal.isequal(obj, varargin{:}); + end + end methods (Access = private) diff --git a/matlab/test/arrow/tabular/tTable.m b/matlab/test/arrow/tabular/tTable.m index 8c6b9aae73752..7bb366a6043aa 100644 --- a/matlab/test/arrow/tabular/tTable.m +++ b/matlab/test/arrow/tabular/tTable.m @@ -64,8 +64,8 @@ function Table(testCase) % Verify that the toMATLAB method converts % an arrow.tabular.Table to a MATLAB table as expected. TOriginal = table([1, 2, 3]'); - arrowRecordBatch = arrow.recordBatch(TOriginal); - TConverted = table(arrowRecordBatch); + arrowTable = arrow.table(TOriginal); + TConverted = table(arrowTable); testCase.verifyEqual(TOriginal, TConverted); end @@ -593,6 +593,77 @@ function ColumnNamesNoSetter(testCase) "MATLAB:class:SetProhibited"); end + function TestIsEqualTrue(testCase) + % Verify two tables are considered equal if: + % 1. They have the same schema + % 2. Their corresponding columns are equal + import arrow.tabular.Table + + a1 = arrow.array([1 2 3]); + a2 = arrow.array(["A" "B" "C"]); + a3 = arrow.array([true true false]); + + t1 = Table.fromArrays(a1, a2, a3, ... + ColumnNames=["A", "B", "C"]); + t2 = Table.fromArrays(a1, a2, a3, ... + ColumnNames=["A", "B", "C"]); + testCase.verifyTrue(isequal(t1, t2)); + + % Compare zero-column tables + t3 = Table.fromArrays(); + t4 = Table.fromArrays(); + testCase.verifyTrue(isequal(t3, t4)); + + % Compare zero-row tables + a4 = arrow.array([]); + a5 = arrow.array(strings(0, 0)); + t5 = Table.fromArrays(a4, a5, ColumnNames=["D" "E"]); + t6 = Table.fromArrays(a4, a5, ColumnNames=["D" "E"]); + testCase.verifyTrue(isequal(t5, t6)); + + % Call isequal with more than two arguments + testCase.verifyTrue(isequal(t3, t4, t3, t4)); + end + + function TestIsEqualFalse(testCase) + % Verify isequal returns false when expected. + import arrow.tabular.Table + + a1 = arrow.array([1 2 3]); + a2 = arrow.array(["A" "B" "C"]); + a3 = arrow.array([true true false]); + a4 = arrow.array(["A" missing "C"]); + a5 = arrow.array([1 2]); + a6 = arrow.array(["A" "B"]); + a7 = arrow.array([true true]); + + t1 = Table.fromArrays(a1, a2, a3, ... + ColumnNames=["A", "B", "C"]); + t2 = Table.fromArrays(a1, a2, a3, ... + ColumnNames=["D", "E", "F"]); + t3 = Table.fromArrays(a1, a4, a3, ... + ColumnNames=["A", "B", "C"]); + t4 = Table.fromArrays(a5, a6, a7, ... + ColumnNames=["A", "B", "C"]); + t5 = Table.fromArrays(a1, a2, a3, a1, ... + ColumnNames=["A", "B", "C", "D"]); + + % The column names are not equal + testCase.verifyFalse(isequal(t1, t2)); + + % The columns are not equal + testCase.verifyFalse(isequal(t1, t3)); + + % The number of rows are not equal + testCase.verifyFalse(isequal(t1, t4)); + + % The number of columns are not equal + testCase.verifyFalse(isequal(t1, t5)); + + % Call isequal with more than two arguments + testCase.verifyFalse(isequal(t1, t2, t3, t4)); + end + end methods