Skip to content

Commit

Permalink
Merge pull request #331 from great-expectations/fix/warnings
Browse files Browse the repository at this point in the history
Fix/warnings
  • Loading branch information
jcampbell committed Jul 11, 2018
2 parents 7cc7099 + 8bc0341 commit 37ff26f
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 14 deletions.
4 changes: 2 additions & 2 deletions great_expectations/dataset/pandas_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,7 +1078,7 @@ def expect_column_chisquare_test_p_value_to_be_greater_than(self, column, partit
# Convert to Series object to allow joining on index values
expected_column = pd.Series(partition_object['weights'], index=partition_object['values'], name='expected') * len(column)
# Join along the indices to allow proper comparison of both types of possible missing values
test_df = pd.concat([expected_column, observed_frequencies], axis = 1)
test_df = pd.concat([expected_column, observed_frequencies], axis=1, sort=True)

na_counts = test_df.isnull().sum()

Expand Down Expand Up @@ -1218,7 +1218,7 @@ def expect_column_kl_divergence_to_be_less_than(self, column, partition_object=N
# Data are expected to be discrete, use value_counts
observed_weights = column.value_counts() / len(column)
expected_weights = pd.Series(partition_object['weights'], index=partition_object['values'], name='expected')
test_df = pd.concat([expected_weights, observed_weights], axis=1)
test_df = pd.concat([expected_weights, observed_weights], axis=1, sort=True)

na_counts = test_df.isnull().sum()

Expand Down
22 changes: 10 additions & 12 deletions great_expectations/dataset/sqlalchemy_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,32 +653,30 @@ def expect_column_median_to_be_between(self,
sa.func.sum(
sa.case([(sa.column(column) == None, 1)], else_=0)
).label('null_count')
]).
select_from(sa.table(self.table_name))
]).select_from(sa.table(self.table_name))
)

elements = count_query.fetchone()
# The number of non-null/non-ignored values
nonnull_count = elements['element_count'] - elements['null_count']

element_values = self.engine.execute(
sa.select(column).order_by(column).where(
sa.select([sa.column(column)]).order_by(sa.column(column)).where(
sa.column(column) != None
).select_from(sa.table(self.table_name))
).offset(nonnull_count // 2 - 1).limit(2).select_from(sa.table(self.table_name))
)

# Fetch the Element count, null count, and sorted/null dropped column values
elements = count_query.fetchone()
column_values = list(element_values.fetchall())

# The number of non-null/non-ignored values
nonnull_count = elements['element_count'] - elements['null_count']

if nonnull_count % 2 == 0:
# An even number of column values: take the average of the two center values
column_median = (
column_values[nonnull_count // 2 - 1][0] + # left center value
column_values[nonnull_count // 2][0] # right center value
column_values[0][0] + # left center value
column_values[1][0] # right center value
) / 2.0 # Average center values
else:
# An odd number of column values, we can just take the center value
column_median = column_values[nonnull_count // 2][0] # True center value
column_median = column_values[1][0] # True center value

return {
'success':
Expand Down

0 comments on commit 37ff26f

Please sign in to comment.