Skip to content

Commit

Permalink
Add support for parse_strings_as_datetimes to expect_column_values_to…
Browse files Browse the repository at this point in the history
…_be_in_set (#422)
  • Loading branch information
jcampbell committed Apr 12, 2019
1 parent 103b962 commit 0d11698
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 23 deletions.
5 changes: 4 additions & 1 deletion great_expectations/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ def expect_column_values_to_be_in_set(self,
column,
value_set,
mostly=None,
parse_strings_as_datetimes=None,
result_format=None, include_config=False, catch_exceptions=None, meta=None
):
"""Expect each column value to be in a given set.
Expand Down Expand Up @@ -552,6 +553,8 @@ def expect_column_values_to_be_in_set(self,
mostly (None or a float between 0 and 1): \
Return `"success": True` if at least mostly percent of values match the expectation. \
For more detail, see :ref:`mostly`.
parse_strings_as_datetimes (boolean or None) : If True values provided in value_set will be parsed as \
datetimes before making comparisons.
Other Parameters:
result_format (str or None): \
Expand Down Expand Up @@ -590,7 +593,7 @@ def expect_column_values_to_not_be_in_set(self,
::
# my_df.my_col = [1,2,2,3,3,3]
>>> my_df.expect_column_values_to_be_in_set(
>>> my_df.expect_column_values_to_not_be_in_set(
"my_col",
[1,2]
)
Expand Down
7 changes: 6 additions & 1 deletion great_expectations/dataset/pandas_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,13 @@ def expect_column_values_to_be_in_type_list(self, column, type_list,
@MetaPandasDataset.column_map_expectation
def expect_column_values_to_be_in_set(self, column, value_set,
mostly=None,
parse_strings_as_datetimes=None,
result_format=None, include_config=False, catch_exceptions=None, meta=None):
return column.map(lambda x: x in value_set)
if parse_strings_as_datetimes:
parsed_value_set = [parse(value) if isinstance(value, string_types) else value for value in value_set]
else:
parsed_value_set = value_set
return column.map(lambda x: x in parsed_value_set)

@DocInherit
@MetaPandasDataset.column_map_expectation
Expand Down
9 changes: 7 additions & 2 deletions great_expectations/dataset/sqlalchemy_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def inner_wrapper(self, column, mostly=None, result_format=None, *args, **kwargs
if "unexpected_count" not in count_results or count_results["unexpected_count"] is None:
count_results["unexpected_count"] = 0

# Retrieve unexpected values
# Retrieve unexpected values
unexpected_query_results = self.engine.execute(
sa.select([sa.column(column)]).select_from(self._table).where(
sa.and_(sa.not_(expected_condition),
Expand Down Expand Up @@ -468,9 +468,14 @@ def expect_column_values_to_be_in_set(self,
column,
value_set,
mostly=None,
parse_strings_as_datetimes=None,
result_format=None, include_config=False, catch_exceptions=None, meta=None
):
return sa.column(column).in_(tuple(value_set))
if parse_strings_as_datetimes:
parsed_value_set = [parse(value) if isinstance(value, string_types) else value for value in value_set]
else:
parsed_value_set = value_set
return sa.column(column).in_(tuple(parsed_value_set))

@DocInherit
@MetaSqlAlchemyDataset.column_map_expectation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,14 @@
},
"schemas": {
"pandas" : {
"x": "np.int_",
"y": "np.float_",
"x": "int",
"y": "float",
"z": "str",
"n": "null",
"n": "str",
"b": "bool",
"s": ["string", "int"],
"s1": ["string", "np.int64", "int"]
},
"sql": {},
"spark": {}
"s": "str",
"s1": "str"
}
},
"tests": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,14 @@
},
"schemas": {
"pandas" : {
"c1": "int",
"c2": "float",
"c3": "str",
"c4": "null",
"c5": "bool",
"c6": "np.float64",
"c7": "np.int64"
},
"sql": {},
"spark": {}
"x": "int",
"y": "float",
"z": "str",
"n": "str",
"b": "bool",
"s": "float64",
"s1": "int64"
}
},
"tests": [
{
Expand Down
6 changes: 5 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ def get_dataset(dataset_type, data, schemas=None, autoinspect_func=autoinspect.c
"""
if dataset_type == 'PandasDataset':
return PandasDataset(data, autoinspect_func=autoinspect_func)
df = pd.DataFrame(data)
if schemas and "pandas" in schemas:
pandas_schema = {key:np.dtype(value) for (key, value) in schemas["pandas"].items()}
df = df.astype(pandas_schema)
return PandasDataset(df, autoinspect_func=autoinspect_func)
elif dataset_type == 'SqlAlchemyDataset':
# Create a new database

Expand Down

0 comments on commit 0d11698

Please sign in to comment.