Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[HACKATHON] add expect_column_values_to_be_valid_new_hampshire_zip (#…
…4775) * [HACKATHON] add expect_column_values_to_be_valid_new_hampshire_zip * adding space to doc string
- Loading branch information
Showing
1 changed file
with
146 additions
and
0 deletions.
There are no files selected for viewing
146 changes: 146 additions & 0 deletions
146
...ectations_experimental/expectations/expect_column_values_to_be_valid_new_hampshire_zip.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
import json | ||
from typing import Optional | ||
|
||
import zipcodes | ||
|
||
from great_expectations.core.expectation_configuration import ExpectationConfiguration | ||
from great_expectations.exceptions import InvalidExpectationConfigurationError | ||
from great_expectations.execution_engine import ( | ||
PandasExecutionEngine, | ||
SparkDFExecutionEngine, | ||
SqlAlchemyExecutionEngine, | ||
) | ||
from great_expectations.expectations.expectation import ColumnMapExpectation | ||
from great_expectations.expectations.metrics import ( | ||
ColumnMapMetricProvider, | ||
column_condition_partial, | ||
) | ||
|
||
|
||
def is_valid_new_hampshire_zip(zip: str): | ||
list_of_dicts_of_new_hampshire_zips = zipcodes.filter_by(state="NH") | ||
list_of_new_hampshire_zips = [ | ||
d["zip_code"] for d in list_of_dicts_of_new_hampshire_zips | ||
] | ||
if len(zip) > 10: | ||
return False | ||
elif type(zip) != str: | ||
return False | ||
elif zip in list_of_new_hampshire_zips: | ||
return True | ||
else: | ||
return False | ||
|
||
|
||
# This class defines a Metric to support your Expectation. | ||
# For most ColumnMapExpectations, the main business logic for calculation will live in this class. | ||
class ColumnValuesToBeValidNewHampshireZip(ColumnMapMetricProvider): | ||
|
||
# This is the id string that will be used to reference your metric. | ||
condition_metric_name = "column_values.valid_new_hampshire_zip" | ||
|
||
# This method implements the core logic for the PandasExecutionEngine | ||
@column_condition_partial(engine=PandasExecutionEngine) | ||
def _pandas(cls, column, **kwargs): | ||
return column.apply(lambda x: is_valid_new_hampshire_zip(x)) | ||
|
||
# This method defines the business logic for evaluating your metric when using a SqlAlchemyExecutionEngine | ||
# @column_condition_partial(engine=SqlAlchemyExecutionEngine) | ||
# def _sqlalchemy(cls, column, _dialect, **kwargs): | ||
# raise NotImplementedError | ||
|
||
# This method defines the business logic for evaluating your metric when using a SparkDFExecutionEngine | ||
# @column_condition_partial(engine=SparkDFExecutionEngine) | ||
# def _spark(cls, column, **kwargs): | ||
# raise NotImplementedError | ||
|
||
|
||
# This class defines the Expectation itself | ||
class ExpectColumnValuesToBeValidNewHampshireZip(ColumnMapExpectation): | ||
"""Expect values in this column to be valid New Hampshire zipcodes. | ||
See https://pypi.org/project/zipcodes/ for more information. | ||
""" | ||
|
||
# These examples will be shown in the public gallery. | ||
# They will also be executed as unit tests for your Expectation. | ||
examples = [ | ||
{ | ||
"data": { | ||
"valid_new_hampshire_zip": ["03031", "03752", "03884", "03896"], | ||
"invalid_new_hampshire_zip": ["-10000", "1234", "99999", "25487"], | ||
}, | ||
"tests": [ | ||
{ | ||
"title": "basic_positive_test", | ||
"exact_match_out": False, | ||
"include_in_gallery": True, | ||
"in": {"column": "valid_new_hampshire_zip"}, | ||
"out": {"success": True}, | ||
}, | ||
{ | ||
"title": "basic_negative_test", | ||
"exact_match_out": False, | ||
"include_in_gallery": True, | ||
"in": {"column": "invalid_new_hampshire_zip"}, | ||
"out": {"success": False}, | ||
}, | ||
], | ||
} | ||
] | ||
|
||
# This is the id string of the Metric used by this Expectation. | ||
# For most Expectations, it will be the same as the `condition_metric_name` defined in your Metric class above. | ||
map_metric = "column_values.valid_new_hampshire_zip" | ||
|
||
# This is a list of parameter names that can affect whether the Expectation evaluates to True or False | ||
success_keys = ("mostly",) | ||
|
||
# This dictionary contains default values for any parameters that should have default values | ||
default_kwarg_values = {} | ||
|
||
def validate_configuration( | ||
self, configuration: Optional[ExpectationConfiguration] | ||
) -> None: | ||
""" | ||
Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that | ||
necessary configuration arguments have been provided for the validation of the expectation. | ||
Args: | ||
configuration (OPTIONAL[ExpectationConfiguration]): \ | ||
An optional Expectation Configuration entry that will be used to configure the expectation | ||
Returns: | ||
None. Raises InvalidExpectationConfigurationError if the config is not validated successfully | ||
""" | ||
|
||
super().validate_configuration(configuration) | ||
if configuration is None: | ||
configuration = self.configuration | ||
|
||
# # Check other things in configuration.kwargs and raise Exceptions if needed | ||
# try: | ||
# assert ( | ||
# ... | ||
# ), "message" | ||
# assert ( | ||
# ... | ||
# ), "message" | ||
# except AssertionError as e: | ||
# raise InvalidExpectationConfigurationError(str(e)) | ||
|
||
# This object contains metadata for display in the public Gallery | ||
library_metadata = { | ||
"maturity": "experimental", # "experimental", "beta", or "production" | ||
"tags": [ | ||
"hackathon", | ||
"typed-entities", | ||
], # Tags for this Expectation in the Gallery | ||
"contributors": [ # Github handles for all contributors to this Expectation. | ||
"@luismdiaz01", | ||
"@derekma73", # Don't forget to add your github handle here! | ||
], | ||
"requirements": ["zipcodes"], | ||
} | ||
|
||
|
||
if __name__ == "__main__": | ||
ExpectColumnValuesToBeValidNewHampshireZip().print_diagnostic_checklist() |