Skip to content

Commit

Permalink
Merge branch 'develop' into feature/type_profiling
Browse files Browse the repository at this point in the history
  • Loading branch information
jcampbell committed Aug 1, 2019
2 parents 3fb0a74 + 8dd3f9e commit a1e035b
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
6 changes: 6 additions & 0 deletions great_expectations/data_asset/data_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,12 @@ def remove_expectation(self,
else:
return expectation

def set_config_value(self, key, value):
self._config[key] = value

def get_config_value(self, key):
return self._config[key]

def get_batch_kwargs(self):
return self._batch_kwargs

Expand Down
15 changes: 11 additions & 4 deletions great_expectations/profile/basic_dataset_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class BasicDatasetProfiler(DatasetProfiler):
@classmethod
def _get_column_type(cls, df, column):
# list of types is used to support pandas and sqlalchemy
df.set_config_value("interactive_evaluation", True)
try:
if df.expect_column_values_to_be_in_type_list(column, type_list=sorted(list(cls.INT_TYPE_NAMES)))["success"]:
type_ = "int"
Expand All @@ -46,13 +47,14 @@ def _get_column_type(cls, df, column):
except NotImplementedError:
type_ = "unknown"

df.set_config_value('interactive_evaluation', False)
return type_

@classmethod
def _get_column_cardinality(cls, df, column):

num_unique = None
pct_unique = None
df.set_config_value("interactive_evaluation", True)

try:
num_unique = df.expect_column_unique_value_count_to_be_between(column, None, None)[
Expand Down Expand Up @@ -92,20 +94,24 @@ def _get_column_cardinality(cls, df, column):
cardinality = "many"
# print('col: {0:s}, num_unique: {1:s}, pct_unique: {2:s}, card: {3:s}'.format(column, str(num_unique), str(pct_unique), cardinality))

df.set_config_value('interactive_evaluation', False)

return cardinality

@classmethod
def _profile(cls, dataset):


df = dataset

df.set_default_expectation_argument("catch_exceptions", True)

df.expect_table_row_count_to_be_between(min_value=0, max_value=None)
df.expect_table_columns_to_match_ordered_list(None)
df.set_config_value('interactive_evaluation', False)

for column in df.get_table_columns():
columns = df.get_table_columns()
number_of_columns = len(columns)
for i, column in enumerate(columns):
logger.info(" Preparing column {} of {}: {}".format(i, number_of_columns, column))

# df.expect_column_to_exist(column)

Expand Down Expand Up @@ -198,4 +204,5 @@ def _profile(cls, dataset):
# print(column, type_, cardinality)
pass

df.set_config_value("interactive_evaluation", True)
return df.get_expectation_suite(suppress_warnings=True, discard_failed_expectations=False)

0 comments on commit a1e035b

Please sign in to comment.