From a718f60704b26f18597353a0d1a0ed6178e97d28 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 2 Apr 2020 11:28:13 -0500 Subject: [PATCH] Fix unpacking of median aggregation --- eland/operations.py | 11 +++++++---- eland/tests/dataframe/test_aggs_pytest.py | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/eland/operations.py b/eland/operations.py index adc89ee9..48105370 100644 --- a/eland/operations.py +++ b/eland/operations.py @@ -480,7 +480,6 @@ def aggs(self, query_compiler, pd_aggs): field_names = query_compiler.get_field_names(include_scripted_fields=False) body = Query(query_params["query"]) - # convert pandas aggs to ES equivalent es_aggs = self._map_pd_aggs_to_es_aggs(pd_aggs) @@ -509,9 +508,13 @@ def aggs(self, query_compiler, pd_aggs): values = list() for es_agg in es_aggs: if isinstance(es_agg, tuple): - values.append( - response["aggregations"][es_agg[0] + "_" + field][es_agg[1]] - ) + agg_value = response["aggregations"][es_agg[0] + "_" + field] + + # Pull multiple values from 'percentiles' result. + if es_agg[0] == "percentiles": + agg_value = agg_value["values"] + + values.append(agg_value[es_agg[1]]) else: values.append( response["aggregations"][es_agg + "_" + field]["value"] diff --git a/eland/tests/dataframe/test_aggs_pytest.py b/eland/tests/dataframe/test_aggs_pytest.py index fd84a04d..3243f3fa 100644 --- a/eland/tests/dataframe/test_aggs_pytest.py +++ b/eland/tests/dataframe/test_aggs_pytest.py @@ -68,3 +68,22 @@ def test_terms_aggs(self): print(ed_sum_min_std.dtypes) assert_almost_equal(pd_sum_min_std, ed_sum_min_std, check_less_precise=True) + + def test_aggs_median_var(self): + pd_ecommerce = self.pd_ecommerce() + ed_ecommerce = self.ed_ecommerce() + + pd_aggs = pd_ecommerce[ + ["taxful_total_price", "taxless_total_price", "total_quantity"] + ].agg(["median", "var"]) + ed_aggs = ed_ecommerce[ + ["taxful_total_price", "taxless_total_price", "total_quantity"] + ].agg(["median", "var"]) + + print(pd_aggs, pd_aggs.dtypes) + print(ed_aggs, ed_aggs.dtypes) + + # Eland returns all float values for all metric aggs, pandas can return int + # TODO - investigate this more + pd_aggs = pd_aggs.astype("float64") + assert_almost_equal(pd_aggs, ed_aggs, check_less_precise=2)