Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HACKATHON] add expect_column_values_to_be_valid_wisconsin_zip #4803

Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -0,0 +1,144 @@
import json
from typing import Optional

import zipcodes

from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.exceptions import InvalidExpectationConfigurationError
from great_expectations.execution_engine import (
PandasExecutionEngine,
SparkDFExecutionEngine,
SqlAlchemyExecutionEngine,
)
from great_expectations.expectations.expectation import ColumnMapExpectation
from great_expectations.expectations.metrics import (
ColumnMapMetricProvider,
column_condition_partial,
)


def is_valid_wisconsin_zip(zip: str):
list_of_dicts_of_wisconsin_zips = zipcodes.filter_by(state="WI")
list_of_wisconsin_zips = [d["zip_code"] for d in list_of_dicts_of_wisconsin_zips]
if len(zip) > 10:
return False
elif type(zip) != str:
return False
elif zip in list_of_wisconsin_zips:
return True
else:
return False


# This class defines a Metric to support your Expectation.
# For most ColumnMapExpectations, the main business logic for calculation will live in this class.
class ColumnValuesToBeValidWisconsinZip(ColumnMapMetricProvider):

# This is the id string that will be used to reference your metric.
condition_metric_name = "column_values.valid_wisconsin_zip"

# This method implements the core logic for the PandasExecutionEngine
@column_condition_partial(engine=PandasExecutionEngine)
def _pandas(cls, column, **kwargs):
return column.apply(lambda x: is_valid_wisconsin_zip(x))

# This method defines the business logic for evaluating your metric when using a SqlAlchemyExecutionEngine
# @column_condition_partial(engine=SqlAlchemyExecutionEngine)
# def _sqlalchemy(cls, column, _dialect, **kwargs):
# raise NotImplementedError

# This method defines the business logic for evaluating your metric when using a SparkDFExecutionEngine
# @column_condition_partial(engine=SparkDFExecutionEngine)
# def _spark(cls, column, **kwargs):
# raise NotImplementedError


# This class defines the Expectation itself
class ExpectColumnValuesToBeValidWisconsinZip(ColumnMapExpectation):
"""Expect values in this column to be valid Wisconsin zipcodes.
See https://pypi.org/project/zipcodes/ for more information.
"""

# These examples will be shown in the public gallery.
# They will also be executed as unit tests for your Expectation.
examples = [
{
"data": {
"valid_wisconsin_zip": ["53001", "53541", "54466", "54990"],
"invalid_wisconsin_zip": ["-10000", "1234", "99999", "25487"],
},
"tests": [
{
"title": "basic_positive_test",
"exact_match_out": False,
"include_in_gallery": True,
"in": {"column": "valid_wisconsin_zip"},
"out": {"success": True},
},
{
"title": "basic_negative_test",
"exact_match_out": False,
"include_in_gallery": True,
"in": {"column": "invalid_wisconsin_zip"},
"out": {"success": False},
},
],
}
]

# This is the id string of the Metric used by this Expectation.
# For most Expectations, it will be the same as the `condition_metric_name` defined in your Metric class above.
map_metric = "column_values.valid_wisconsin_zip"

# This is a list of parameter names that can affect whether the Expectation evaluates to True or False
success_keys = ("mostly",)

# This dictionary contains default values for any parameters that should have default values
default_kwarg_values = {}

def validate_configuration(
self, configuration: Optional[ExpectationConfiguration]
) -> None:
"""
Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
necessary configuration arguments have been provided for the validation of the expectation.

Args:
configuration (OPTIONAL[ExpectationConfiguration]): \
An optional Expectation Configuration entry that will be used to configure the expectation
Returns:
None. Raises InvalidExpectationConfigurationError if the config is not validated successfully
"""

super().validate_configuration(configuration)
if configuration is None:
configuration = self.configuration

# # Check other things in configuration.kwargs and raise Exceptions if needed
# try:
# assert (
# ...
# ), "message"
# assert (
# ...
# ), "message"
# except AssertionError as e:
# raise InvalidExpectationConfigurationError(str(e))

# This object contains metadata for display in the public Gallery
library_metadata = {
"maturity": "experimental", # "experimental", "beta", or "production"
"tags": [
"hackathon",
"typed-entities",
], # Tags for this Expectation in the Gallery
"contributors": [ # Github handles for all contributors to this Expectation.
"@luismdiaz01",
"@derekma73", # Don't forget to add your github handle here!
],
"requirements": ["zipcodes"],
}


if __name__ == "__main__":
ExpectColumnValuesToBeValidWisconsinZip().print_diagnostic_checklist()