Fix: Order-insensitive unit test equality assertion for expected/actu…

…al with multiple nulls (#10202) (cherry picked from commit 84456f5)
dbt-labs · May 24, 2024 · 168ddcd · 168ddcd
1 parent 11ea573
commit 168ddcd
Show file tree

Hide file tree

Showing 4 changed files with 217 additions and 10 deletions.
diff --git a/.changes/unreleased/Fixes-20240522-182855.yaml b/.changes/unreleased/Fixes-20240522-182855.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: 'Fix: Order-insensitive unit test equality assertion for expected/actual with
+  multiple nulls'
+time: 2024-05-22T18:28:55.91733-04:00
+custom:
+  Author: michelleark
+  Issue: "10167"
diff --git a/core/dbt/task/test.py b/core/dbt/task/test.py
@@ -343,16 +343,17 @@ def _get_unit_test_agate_table(self, result_table, actual_or_expected: str):
     def _get_daff_diff(
         self, expected: "agate.Table", actual: "agate.Table", ordered: bool = False
     ) -> daff.TableDiff:
-
-        expected_daff_table = daff.PythonTableView(list_rows_from_table(expected))
-        actual_daff_table = daff.PythonTableView(list_rows_from_table(actual))
-
-        alignment = daff.Coopy.compareTables(expected_daff_table, actual_daff_table).align()
-        result = daff.PythonTableView([])
+        # Sort expected and actual inputs prior to creating daff diff to ensure order insensitivity
+        # https://github.com/paulfitz/daff/issues/200
+        expected_daff_table = daff.PythonTableView(list_rows_from_table(expected, sort=True))
+        actual_daff_table = daff.PythonTableView(list_rows_from_table(actual, sort=True))
 
         flags = daff.CompareFlags()
         flags.ordered = ordered
 
+        alignment = daff.Coopy.compareTables(expected_daff_table, actual_daff_table, flags).align()
+        result = daff.PythonTableView([])
+
         diff = daff.TableDiff(alignment, flags)
         diff.hilite(result)
         return diff
@@ -413,10 +414,25 @@ def json_rows_from_table(table: "agate.Table") -> List[Dict[str, Any]]:
 
 
 # This was originally in agate_helper, but that was moved out into dbt_common
-def list_rows_from_table(table: "agate.Table") -> List[Any]:
-    "Convert a table to a list of lists, where the first element represents the header"
-    rows = [[col.name for col in table.columns]]
+def list_rows_from_table(table: "agate.Table", sort: bool = False) -> List[Any]:
+    """
+    Convert given table to a list of lists, where the first element represents the header
+
+    By default, sort is False and no sort order is applied to the non-header rows of the given table.
+
+    If sort is True, sort the non-header rows hierarchically, treating None values as lower in order.
+    Examples:
+        * [['a','b','c'],[4,5,6],[1,2,3]] -> [['a','b','c'],[1,2,3],[4,5,6]]
+        * [['a','b','c'],[4,5,6],[1,null,3]] -> [['a','b','c'],[1,null,3],[4,5,6]]
+        * [['a','b','c'],[4,5,6],[null,2,3]] -> [['a','b','c'],[4,5,6],[null,2,3]]
+    """
+    header = [col.name for col in table.columns]
+
+    rows = []
     for row in table.rows:
         rows.append(list(row.values()))
 
-    return rows
+    if sort:
+        rows = sorted(rows, key=lambda x: [(elem is None, elem) for elem in x])
+
+    return [header] + rows
diff --git a/tests/functional/unit_testing/test_ut_diffing.py b/tests/functional/unit_testing/test_ut_diffing.py
@@ -0,0 +1,113 @@
+import pytest
+
+from dbt.tests.util import run_dbt
+
+my_input_model = """
+SELECT 1 as id, 'some string' as status
+"""
+
+my_model = """
+SELECT * FROM {{ ref("my_input_model") }}
+"""
+
+test_my_model_order_insensitive = """
+unit_tests:
+  - name: unordered_no_nulls
+    model: my_model
+    given:
+      - input: ref("my_input_model")
+        rows:
+          - {"id": 1, "status": 'B'}
+          - {"id": 2, "status": 'B'}
+          - {"id": 3, "status": 'A'}
+    expect:
+        rows:
+          - {"id": 3, "status": 'A'}
+          - {"id": 2, "status": 'B'}
+          - {"id": 1, "status": 'B'}
+
+  - name: unordered_with_nulls
+    model: my_model
+    given:
+      - input: ref("my_input_model")
+        rows:
+          - {"id":  , "status": 'B'}
+          - {"id":  , "status": 'B'}
+          - {"id": 3, "status": 'A'}
+    expect:
+        rows:
+          - {"id": 3, "status": 'A'}
+          - {"id":  , "status": 'B'}
+          - {"id":  , "status": 'B'}
+
+  - name: unordered_with_nulls_2
+    model: my_model
+    given:
+      - input: ref("my_input_model")
+        rows:
+          - {"id": 3, "status": 'A'}
+          - {"id":  , "status": 'B'}
+          - {"id":  , "status": 'B'}
+    expect:
+        rows:
+          - {"id":  , "status": 'B'}
+          - {"id":  , "status": 'B'}
+          - {"id": 3, "status": 'A'}
+
+  - name: unordered_with_nulls_mixed_columns
+    model: my_model
+    given:
+      - input: ref("my_input_model")
+        rows:
+          - {"id": 3, "status": 'A'}
+          - {"id":  , "status": 'B'}
+          - {"id": 1, "status": }
+    expect:
+        rows:
+          - {"id": 1, "status": }
+          - {"id":  , "status": 'B'}
+          - {"id": 3, "status": 'A'}
+
+  - name: unordered_with_null
+    model: my_model
+    given:
+      - input: ref("my_input_model")
+        rows:
+          - {"id": 3, "status": 'A'}
+          - {"id":  , "status": 'B'}
+    expect:
+        rows:
+          - {"id":  , "status": 'B'}
+          - {"id": 3, "status": 'A'}
+
+  - name: ordered_with_nulls
+    model: my_model
+    given:
+      - input: ref("my_input_model")
+        rows:
+          - {"id": 3, "status": 'A'}
+          - {"id":  , "status": 'B'}
+          - {"id":  , "status": 'B'}
+    expect:
+        rows:
+          - {"id": 3, "status": 'A'}
+          - {"id":  , "status": 'B'}
+          - {"id":  , "status": 'B'}
+"""
+
+
+class TestUnitTestingDiffIsOrderAgnostic:
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_input_model.sql": my_input_model,
+            "my_model.sql": my_model,
+            "test_my_model.yml": test_my_model_order_insensitive,
+        }
+
+    def test_unit_testing_diff_is_order_insensitive(self, project):
+        run_dbt(["run"])
+
+        # Select by model name
+        results = run_dbt(["test", "--select", "my_model"], expect_pass=True)
+        assert len(results) == 6
diff --git a/tests/unit/task/test_test.py b/tests/unit/task/test_test.py
@@ -0,0 +1,71 @@
+import agate
+import pytest
+
+from dbt.task.test import list_rows_from_table
+
+
+class TestListRowsFromTable:
+    @pytest.mark.parametrize(
+        "agate_table_cols,agate_table_rows,expected_list_rows",
+        [
+            (["a", "b", "c"], [], [["a", "b", "c"]]),  # no rows
+            (["a", "b", "c"], [[1, 2, 3]], [["a", "b", "c"], [1, 2, 3]]),  # single row, no nulls
+            (
+                ["a", "b", "c"],
+                [[1, 2, 3], [2, 3, 4]],
+                [["a", "b", "c"], [1, 2, 3], [2, 3, 4]],
+            ),  # multiple rows
+            (
+                ["a", "b", "c"],
+                [[None, 2, 3], [2, None, 4]],
+                [["a", "b", "c"], [None, 2, 3], [2, None, 4]],
+            ),  # multiple rows, with nulls
+        ],
+    )
+    def test_list_rows_from_table_no_sort(
+        self, agate_table_cols, agate_table_rows, expected_list_rows
+    ):
+        table = agate.Table(rows=agate_table_rows, column_names=agate_table_cols)
+
+        list_rows = list_rows_from_table(table)
+        assert list_rows == expected_list_rows
+
+    @pytest.mark.parametrize(
+        "agate_table_cols,agate_table_rows,expected_list_rows",
+        [
+            (["a", "b", "c"], [], [["a", "b", "c"]]),  # no rows
+            (["a", "b", "c"], [[1, 2, 3]], [["a", "b", "c"], [1, 2, 3]]),  # single row, no nulls
+            (
+                ["a", "b", "c"],
+                [[1, 2, 3], [2, 3, 4]],
+                [["a", "b", "c"], [1, 2, 3], [2, 3, 4]],
+            ),  # multiple rows, in order
+            (
+                ["a", "b", "c"],
+                [[2, 3, 4], [1, 2, 3]],
+                [["a", "b", "c"], [1, 2, 3], [2, 3, 4]],
+            ),  # multiple rows, out of order
+            (
+                ["a", "b", "c"],
+                [[None, 2, 3], [2, 3, 4]],
+                [["a", "b", "c"], [2, 3, 4], [None, 2, 3]],
+            ),  # multiple rows, out of order with nulls in first position
+            (
+                ["a", "b", "c"],
+                [[4, 5, 6], [1, None, 3]],
+                [["a", "b", "c"], [1, None, 3], [4, 5, 6]],
+            ),  # multiple rows, out of order with null in non-first position
+            (
+                ["a", "b", "c"],
+                [[None, 5, 6], [1, None, 3]],
+                [["a", "b", "c"], [1, None, 3], [None, 5, 6]],
+            ),  # multiple rows, out of order with nulls in many positions
+        ],
+    )
+    def test_list_rows_from_table_with_sort(
+        self, agate_table_cols, agate_table_rows, expected_list_rows
+    ):
+        table = agate.Table(rows=agate_table_rows, column_names=agate_table_cols)
+
+        list_rows = list_rows_from_table(table, sort=True)
+        assert list_rows == expected_list_rows