chore: addressing minor inconsistencies

ryanseq-gyg · ryanseq-gyg · commit f9692bda69d9 · 2026-04-26T22:42:51.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -205,8 +205,15 @@ cython_debug/
 #   Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
 #   that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
 #   and can be added to the global gitignore or merged into this file. However, if you prefer,
-#   you could uncomment the following to ignore the entire vscode folder
-# .vscode/
+# IDE / Editor settings
+.vscode/
+.idea/
+
+# OS files
+.DS_Store
+
+# Databricks local config
+databricks.yml
 
 # Ruff stuff:
 .ruff_cache/
diff --git a/tests/expectations/aggregation/any_value_expectations/test_expect_max_null_count.py b/tests/expectations/aggregation/any_value_expectations/test_expect_max_null_count.py
@@ -14,6 +14,18 @@
 )
 
 
+def test_expectation_name():
+    """Test that the expectation name is correctly returned."""
+    expectation = DataFrameExpectationRegistry.get_expectation(
+        expectation_name="ExpectationMaxNullCount",
+        column_name="col1",
+        max_count=5,
+    )
+    assert expectation.get_expectation_name() == "ExpectationMaxNullCount", (
+        f"Expected 'ExpectationMaxNullCount' but got: {expectation.get_expectation_name()}"
+    )
+
+
 @pytest.mark.parametrize(
     "df_data, column_name, max_count, expected_result, expected_message",
     [
diff --git a/tests/expectations/aggregation/any_value_expectations/test_expect_max_null_percentage.py b/tests/expectations/aggregation/any_value_expectations/test_expect_max_null_percentage.py
@@ -14,6 +14,18 @@
 )
 
 
+def test_expectation_name():
+    """Test that the expectation name is correctly returned."""
+    expectation = DataFrameExpectationRegistry.get_expectation(
+        expectation_name="ExpectationMaxNullPercentage",
+        column_name="col1",
+        max_percentage=10.0,
+    )
+    assert expectation.get_expectation_name() == "ExpectationMaxNullPercentage", (
+        f"Expected 'ExpectationMaxNullPercentage' but got: {expectation.get_expectation_name()}"
+    )
+
+
 @pytest.mark.parametrize(
     "df_data, column_name, max_percentage, expected_result, expected_message",
     [
diff --git a/tests/expectations/aggregation/any_value_expectations/test_expect_max_rows.py b/tests/expectations/aggregation/any_value_expectations/test_expect_max_rows.py
@@ -14,6 +14,17 @@
 )
 
 
+def test_expectation_name():
+    """Test that the expectation name is correctly returned."""
+    expectation = DataFrameExpectationRegistry.get_expectation(
+        expectation_name="ExpectationMaxRows",
+        max_rows=10,
+    )
+    assert expectation.get_expectation_name() == "ExpectationMaxRows", (
+        f"Expected 'ExpectationMaxRows' but got: {expectation.get_expectation_name()}"
+    )
+
+
 @pytest.mark.parametrize(
     "df_data, max_rows, expected_result, expected_message",
     [
diff --git a/tests/expectations/aggregation/any_value_expectations/test_expect_min_rows.py b/tests/expectations/aggregation/any_value_expectations/test_expect_min_rows.py
@@ -14,6 +14,17 @@
 )
 
 
+def test_expectation_name():
+    """Test that the expectation name is correctly returned."""
+    expectation = DataFrameExpectationRegistry.get_expectation(
+        expectation_name="ExpectationMinRows",
+        min_rows=10,
+    )
+    assert expectation.get_expectation_name() == "ExpectationMinRows", (
+        f"Expected 'ExpectationMinRows' but got: {expectation.get_expectation_name()}"
+    )
+
+
 @pytest.mark.parametrize(
     "df_data, min_rows, expected_result, expected_message",
     [
diff --git a/tests/expectations/aggregation/any_value_expectations/test_expect_unique_rows.py b/tests/expectations/aggregation/any_value_expectations/test_expect_unique_rows.py
@@ -218,3 +218,21 @@ def test_column_missing_error(dataframe_factory):
     )
     with pytest.raises(DataFrameExpectationsSuiteFailure):
         expectations_suite.build().run(data_frame=data_frame)
+
+
+def test_large_dataset_performance(dataframe_factory):
+    """
+    Test the expectation with a larger dataset to ensure reasonable performance.
+    """
+    df_lib, make_df = dataframe_factory
+
+    expectation = DataFrameExpectationRegistry.get_expectation(
+        expectation_name="ExpectationUniqueRows",
+        column_names=["col1"],
+    )
+    # Create a DataFrame with unique values
+    data_frame = make_df({"col1": (list(range(10000)), "long")})
+    result = expectation.validate(data_frame=data_frame)
+    assert isinstance(result, DataFrameExpectationSuccessMessage), (
+        f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+    )
diff --git a/tests/expectations/column/string_expectations/test_expect_string_not_contains.py b/tests/expectations/column/string_expectations/test_expect_string_not_contains.py
@@ -15,6 +15,7 @@
 
 
 def test_expectation_name():
+    """Test that the expectation name is correctly returned."""
     expectation = DataFrameExpectationRegistry.get_expectation(
         expectation_name="ExpectationStringNotContains",
         column_name="col1",
@@ -35,21 +36,21 @@ def test_expectation_name():
         # Success different substring
         (["hello", "world", "python"], "java", "success", None),
         # Basic violations
-        (["foobar", "bar", "foo"], "foo", "violations", ["foobar", "foo"]),
+        (["foobar", "bar", "foo"], "foo", "failure", ["foobar", "foo"]),
         # All violations
         (
             ["testing", "test", "attest"],
             "test",
-            "violations",
+            "failure",
             ["testing", "test", "attest"],
         ),
         # Mixed violations
-        (["good", "bad", "badge"], "bad", "violations", ["bad", "badge"]),
+        (["good", "bad", "badge"], "bad", "failure", ["bad", "badge"]),
         # Substring at beginning
         (
             ["prefix_test", "prefix_demo", "other"],
             "prefix",
-            "violations",
+            "failure",
             ["prefix_test", "prefix_demo"],
         ),
         # No substring at beginning
@@ -58,7 +59,7 @@ def test_expectation_name():
         (
             ["test_suffix", "demo_suffix", "other"],
             "suffix",
-            "violations",
+            "failure",
             ["test_suffix", "demo_suffix"],
         ),
         # No substring at end
@@ -67,40 +68,40 @@ def test_expectation_name():
         (
             ["pre_mid_post", "another_mid_test", "nomatch"],
             "mid",
-            "violations",
+            "failure",
             ["pre_mid_post", "another_mid_test"],
         ),
         # No substring in middle
         (["no_match", "also_no", "nope"], "mid", "success", None),
         # Case sensitive success
         (["FOO", "Foo", "fOo"], "foo", "success", None),
         # Case sensitive violations
-        (["foo", "FOO", "test"], "FOO", "violations", ["FOO"]),
+        (["foo", "FOO", "test"], "FOO", "failure", ["FOO"]),
         # Empty string success
         (["", "", ""], "foo", "success", None),
         # Empty string with violation
-        (["", "foo", ""], "foo", "violations", ["foo"]),
+        (["", "foo", ""], "foo", "failure", ["foo"]),
         # Whitespace only success
         (["   ", "  ", " "], "test", "success", None),
         # Whitespace in text violations
         (
             ["test with spaces", "test", "no match"],
             "test",
-            "violations",
+            "failure",
             ["test with spaces", "test"],
         ),
         # Whitespace around violations
         (
             ["   test   ", "test", "clean"],
             "test",
-            "violations",
+            "failure",
             ["   test   ", "test"],
         ),
         # Special char at violations
         (
             ["test@email", "user@domain", "plain"],
             "@",
-            "violations",
+            "failure",
             ["test@email", "user@domain"],
         ),
         # Special char at success
@@ -109,35 +110,35 @@ def test_expectation_name():
         (
             ["test#tag", "demo#hash", "plain"],
             "#",
-            "violations",
+            "failure",
             ["test#tag", "demo#hash"],
         ),
         # Numbers violations
         (
             ["version123", "test456", "plain"],
             "123",
-            "violations",
+            "failure",
             ["version123"],
         ),
         # Numbers no match
         (["v1.0", "v2.1", "v3.5"], "123", "success", None),
         # Numbers exact match
-        (["test", "demo", "123"], "123", "violations", ["123"]),
+        (["test", "demo", "123"], "123", "failure", ["123"]),
         # Single char success
         (["a", "b", "c"], "x", "success", None),
         # Single char violation
-        (["a", "b", "c"], "a", "violations", ["a"]),
+        (["a", "b", "c"], "a", "failure", ["a"]),
         # Long string success
         (["a" * 100, "b" * 100, "c" * 100], "x", "success", None),
         # Long string with substring
         (
             ["a" * 50 + "test" + "b" * 50, "clean" * 20, "other"],
             "test",
-            "violations",
+            "failure",
             ["a" * 50 + "test" + "b" * 50],
         ),
         # Exact match
-        (["test", "demo", "exam"], "test", "violations", ["test"]),
+        (["test", "demo", "exam"], "test", "failure", ["test"]),
         # No exact match
         (["test", "demo", "exam"], "testing", "success", None),
     ],
@@ -208,7 +209,7 @@ def test_expectation_basic_scenarios(
         assert suite_result.success, "Expected all expectations to pass"
         assert suite_result.total_passed == 1, "Expected 1 passed expectation"
         assert suite_result.total_failed == 0, "Expected 0 failed expectations"
-    else:  # violations
+    else:  # failure
         violations_df = make_df({"col1": (expected_violations, "string")})
         expected_message = (
             f"Found {len(expected_violations)} row(s) where 'col1' contains '{substring}'."
diff --git a/tests/expectations/column/string_expectations/test_expect_string_starts_with.py b/tests/expectations/column/string_expectations/test_expect_string_starts_with.py
@@ -15,6 +15,7 @@
 
 
 def test_expectation_name():
+    """Test that the expectation name is correctly returned."""
     expectation = DataFrameExpectationRegistry.get_expectation(
         expectation_name="ExpectationStringStartsWith",
         column_name="col1",
@@ -34,7 +35,7 @@ def test_expectation_name():
         (
             ["bar", "baz", "qux"],
             "foo",
-            "violations",
+            "failure",
             ["bar", "baz", "qux"],
         ),
         # Exact match success
@@ -43,21 +44,21 @@ def test_expectation_name():
         (
             ["foo", "bar", "baz"],
             "foo",
-            "violations",
+            "failure",
             ["bar", "baz"],
         ),
         # Empty strings
         (
             ["", "", ""],
             "foo",
-            "violations",
+            "failure",
             ["", "", ""],
         ),
         # Whitespace only violations
         (
             ["   ", "  ", " "],
             "foo",
-            "violations",
+            "failure",
             ["   ", "  ", " "],
         ),
         # Whitespace in text success
@@ -71,21 +72,21 @@ def test_expectation_name():
         (
             ["bar foo", "baz foo", "qux foo"],
             "foo",
-            "violations",
+            "failure",
             ["bar foo", "baz foo", "qux foo"],
         ),
         # Special char at violations
         (
             ["@foo", "#foo", "$foo"],
             "@",
-            "violations",
+            "failure",
             ["#foo", "$foo"],
         ),
         # Special char in prefix violations
         (
             ["foo@bar", "foo#baz", "foo$qux"],
             "foo@",
-            "violations",
+            "failure",
             ["foo#baz", "foo$qux"],
         ),
         # Numbers success
@@ -94,7 +95,7 @@ def test_expectation_name():
         (
             ["1foo", "2foo", "3foo"],
             "foo",
-            "violations",
+            "failure",
             ["1foo", "2foo", "3foo"],
         ),
         # Long string success
@@ -108,14 +109,14 @@ def test_expectation_name():
         (
             ["a" * 100, "b" * 100, "c" * 100],
             "foo",
-            "violations",
+            "failure",
             ["a" * 100, "b" * 100, "c" * 100],
         ),
         # Mixed violations
         (
             ["foobar", "bar", "foo123"],
             "foo",
-            "violations",
+            "failure",
             ["bar"],
         ),
     ],
@@ -170,7 +171,7 @@ def test_expectation_basic_scenarios(
         assert suite_result.success, "Expected all expectations to pass"
         assert suite_result.total_passed == 1, "Expected 1 passed expectation"
         assert suite_result.total_failed == 0, "Expected 0 failed expectations"
-    else:  # violations
+    else:  # failure
         violations_df = make_df({"col1": (expected_violations, "string")})
         expected_message = (
             f"Found {len(expected_violations)} row(s) where 'col1' does not start with '{prefix}'."