Skip to content

Commit

Permalink
[HACKATHON] add expect_column_values_to_be_valid_new_hampshire_zip (#…
Browse files Browse the repository at this point in the history
…4775)

* [HACKATHON] add expect_column_values_to_be_valid_new_hampshire_zip

* adding space to doc string
  • Loading branch information
Derekma73 committed Apr 8, 2022
1 parent f641c1a commit 0b36a6b
Showing 1 changed file with 146 additions and 0 deletions.
@@ -0,0 +1,146 @@
import json
from typing import Optional

import zipcodes

from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.exceptions import InvalidExpectationConfigurationError
from great_expectations.execution_engine import (
PandasExecutionEngine,
SparkDFExecutionEngine,
SqlAlchemyExecutionEngine,
)
from great_expectations.expectations.expectation import ColumnMapExpectation
from great_expectations.expectations.metrics import (
ColumnMapMetricProvider,
column_condition_partial,
)


def is_valid_new_hampshire_zip(zip: str):
list_of_dicts_of_new_hampshire_zips = zipcodes.filter_by(state="NH")
list_of_new_hampshire_zips = [
d["zip_code"] for d in list_of_dicts_of_new_hampshire_zips
]
if len(zip) > 10:
return False
elif type(zip) != str:
return False
elif zip in list_of_new_hampshire_zips:
return True
else:
return False


# This class defines a Metric to support your Expectation.
# For most ColumnMapExpectations, the main business logic for calculation will live in this class.
class ColumnValuesToBeValidNewHampshireZip(ColumnMapMetricProvider):

# This is the id string that will be used to reference your metric.
condition_metric_name = "column_values.valid_new_hampshire_zip"

# This method implements the core logic for the PandasExecutionEngine
@column_condition_partial(engine=PandasExecutionEngine)
def _pandas(cls, column, **kwargs):
return column.apply(lambda x: is_valid_new_hampshire_zip(x))

# This method defines the business logic for evaluating your metric when using a SqlAlchemyExecutionEngine
# @column_condition_partial(engine=SqlAlchemyExecutionEngine)
# def _sqlalchemy(cls, column, _dialect, **kwargs):
# raise NotImplementedError

# This method defines the business logic for evaluating your metric when using a SparkDFExecutionEngine
# @column_condition_partial(engine=SparkDFExecutionEngine)
# def _spark(cls, column, **kwargs):
# raise NotImplementedError


# This class defines the Expectation itself
class ExpectColumnValuesToBeValidNewHampshireZip(ColumnMapExpectation):
"""Expect values in this column to be valid New Hampshire zipcodes.
See https://pypi.org/project/zipcodes/ for more information.
"""

# These examples will be shown in the public gallery.
# They will also be executed as unit tests for your Expectation.
examples = [
{
"data": {
"valid_new_hampshire_zip": ["03031", "03752", "03884", "03896"],
"invalid_new_hampshire_zip": ["-10000", "1234", "99999", "25487"],
},
"tests": [
{
"title": "basic_positive_test",
"exact_match_out": False,
"include_in_gallery": True,
"in": {"column": "valid_new_hampshire_zip"},
"out": {"success": True},
},
{
"title": "basic_negative_test",
"exact_match_out": False,
"include_in_gallery": True,
"in": {"column": "invalid_new_hampshire_zip"},
"out": {"success": False},
},
],
}
]

# This is the id string of the Metric used by this Expectation.
# For most Expectations, it will be the same as the `condition_metric_name` defined in your Metric class above.
map_metric = "column_values.valid_new_hampshire_zip"

# This is a list of parameter names that can affect whether the Expectation evaluates to True or False
success_keys = ("mostly",)

# This dictionary contains default values for any parameters that should have default values
default_kwarg_values = {}

def validate_configuration(
self, configuration: Optional[ExpectationConfiguration]
) -> None:
"""
Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
necessary configuration arguments have been provided for the validation of the expectation.
Args:
configuration (OPTIONAL[ExpectationConfiguration]): \
An optional Expectation Configuration entry that will be used to configure the expectation
Returns:
None. Raises InvalidExpectationConfigurationError if the config is not validated successfully
"""

super().validate_configuration(configuration)
if configuration is None:
configuration = self.configuration

# # Check other things in configuration.kwargs and raise Exceptions if needed
# try:
# assert (
# ...
# ), "message"
# assert (
# ...
# ), "message"
# except AssertionError as e:
# raise InvalidExpectationConfigurationError(str(e))

# This object contains metadata for display in the public Gallery
library_metadata = {
"maturity": "experimental", # "experimental", "beta", or "production"
"tags": [
"hackathon",
"typed-entities",
], # Tags for this Expectation in the Gallery
"contributors": [ # Github handles for all contributors to this Expectation.
"@luismdiaz01",
"@derekma73", # Don't forget to add your github handle here!
],
"requirements": ["zipcodes"],
}


if __name__ == "__main__":
ExpectColumnValuesToBeValidNewHampshireZip().print_diagnostic_checklist()

0 comments on commit 0b36a6b

Please sign in to comment.