[HACKATHON] add expect_column_values_to_be_valid_new_hampshire_zip (#…

…4775) * [HACKATHON] add expect_column_values_to_be_valid_new_hampshire_zip * adding space to doc string
great-expectations · Apr 8, 2022 · 0b36a6b · 0b36a6b
1 parent f641c1a
commit 0b36a6b
Showing 1 changed file with 146 additions and 0 deletions.
diff --git a/...ectations_experimental/expectations/expect_column_values_to_be_valid_new_hampshire_zip.py b/...ectations_experimental/expectations/expect_column_values_to_be_valid_new_hampshire_zip.py
@@ -0,0 +1,146 @@
+import json
+from typing import Optional
+
+import zipcodes
+
+from great_expectations.core.expectation_configuration import ExpectationConfiguration
+from great_expectations.exceptions import InvalidExpectationConfigurationError
+from great_expectations.execution_engine import (
+    PandasExecutionEngine,
+    SparkDFExecutionEngine,
+    SqlAlchemyExecutionEngine,
+)
+from great_expectations.expectations.expectation import ColumnMapExpectation
+from great_expectations.expectations.metrics import (
+    ColumnMapMetricProvider,
+    column_condition_partial,
+)
+
+
+def is_valid_new_hampshire_zip(zip: str):
+    list_of_dicts_of_new_hampshire_zips = zipcodes.filter_by(state="NH")
+    list_of_new_hampshire_zips = [
+        d["zip_code"] for d in list_of_dicts_of_new_hampshire_zips
+    ]
+    if len(zip) > 10:
+        return False
+    elif type(zip) != str:
+        return False
+    elif zip in list_of_new_hampshire_zips:
+        return True
+    else:
+        return False
+
+
+# This class defines a Metric to support your Expectation.
+# For most ColumnMapExpectations, the main business logic for calculation will live in this class.
+class ColumnValuesToBeValidNewHampshireZip(ColumnMapMetricProvider):
+
+    # This is the id string that will be used to reference your metric.
+    condition_metric_name = "column_values.valid_new_hampshire_zip"
+
+    # This method implements the core logic for the PandasExecutionEngine
+    @column_condition_partial(engine=PandasExecutionEngine)
+    def _pandas(cls, column, **kwargs):
+        return column.apply(lambda x: is_valid_new_hampshire_zip(x))
+
+    # This method defines the business logic for evaluating your metric when using a SqlAlchemyExecutionEngine
+    # @column_condition_partial(engine=SqlAlchemyExecutionEngine)
+    # def _sqlalchemy(cls, column, _dialect, **kwargs):
+    #     raise NotImplementedError
+
+    # This method defines the business logic for evaluating your metric when using a SparkDFExecutionEngine
+    # @column_condition_partial(engine=SparkDFExecutionEngine)
+    # def _spark(cls, column, **kwargs):
+    #     raise NotImplementedError
+
+
+# This class defines the Expectation itself
+class ExpectColumnValuesToBeValidNewHampshireZip(ColumnMapExpectation):
+    """Expect values in this column to be valid New Hampshire zipcodes.
+    See https://pypi.org/project/zipcodes/ for more information.
+    """
+
+    # These examples will be shown in the public gallery.
+    # They will also be executed as unit tests for your Expectation.
+    examples = [
+        {
+            "data": {
+                "valid_new_hampshire_zip": ["03031", "03752", "03884", "03896"],
+                "invalid_new_hampshire_zip": ["-10000", "1234", "99999", "25487"],
+            },
+            "tests": [
+                {
+                    "title": "basic_positive_test",
+                    "exact_match_out": False,
+                    "include_in_gallery": True,
+                    "in": {"column": "valid_new_hampshire_zip"},
+                    "out": {"success": True},
+                },
+                {
+                    "title": "basic_negative_test",
+                    "exact_match_out": False,
+                    "include_in_gallery": True,
+                    "in": {"column": "invalid_new_hampshire_zip"},
+                    "out": {"success": False},
+                },
+            ],
+        }
+    ]
+
+    # This is the id string of the Metric used by this Expectation.
+    # For most Expectations, it will be the same as the `condition_metric_name` defined in your Metric class above.
+    map_metric = "column_values.valid_new_hampshire_zip"
+
+    # This is a list of parameter names that can affect whether the Expectation evaluates to True or False
+    success_keys = ("mostly",)
+
+    # This dictionary contains default values for any parameters that should have default values
+    default_kwarg_values = {}
+
+    def validate_configuration(
+        self, configuration: Optional[ExpectationConfiguration]
+    ) -> None:
+        """
+        Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
+        necessary configuration arguments have been provided for the validation of the expectation.
+
+        Args:
+            configuration (OPTIONAL[ExpectationConfiguration]): \
+                An optional Expectation Configuration entry that will be used to configure the expectation
+        Returns:
+            None. Raises InvalidExpectationConfigurationError if the config is not validated successfully
+        """
+
+        super().validate_configuration(configuration)
+        if configuration is None:
+            configuration = self.configuration
+
+        # # Check other things in configuration.kwargs and raise Exceptions if needed
+        # try:
+        #     assert (
+        #         ...
+        #     ), "message"
+        #     assert (
+        #         ...
+        #     ), "message"
+        # except AssertionError as e:
+        #     raise InvalidExpectationConfigurationError(str(e))
+
+    # This object contains metadata for display in the public Gallery
+    library_metadata = {
+        "maturity": "experimental",  # "experimental", "beta", or "production"
+        "tags": [
+            "hackathon",
+            "typed-entities",
+        ],  # Tags for this Expectation in the Gallery
+        "contributors": [  # Github handles for all contributors to this Expectation.
+            "@luismdiaz01",
+            "@derekma73",  # Don't forget to add your github handle here!
+        ],
+        "requirements": ["zipcodes"],
+    }
+
+
+if __name__ == "__main__":
+    ExpectColumnValuesToBeValidNewHampshireZip().print_diagnostic_checklist()