From 987740aa8dfff4bf771b587a40f1e12811453660 Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Fri, 18 Feb 2022 23:28:28 +0800 Subject: [PATCH] fix: contribution operator meets nan value (#18782) --- .../src/Timeseries/Area/controlPanel.tsx | 4 +-- .../Timeseries/Regular/Bar/controlPanel.tsx | 4 +-- .../src/Timeseries/Regular/controlPanel.tsx | 4 +-- .../src/Timeseries/Step/controlPanel.tsx | 4 +-- .../src/Timeseries/controlPanel.tsx | 4 +-- superset/common/query_object.py | 2 ++ .../pandas_postprocessing/contribution.py | 1 + .../test_contribution.py | 36 ++++++++++++------- 8 files changed, 36 insertions(+), 23 deletions(-) diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx index 720eb9428723..bae735b692ba 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx @@ -72,10 +72,10 @@ const config: ControlPanelConfig = { default: contributionMode, choices: [ [null, 'None'], - [EchartsTimeseriesContributionType.Row, 'Total'], + [EchartsTimeseriesContributionType.Row, 'Row'], [EchartsTimeseriesContributionType.Column, 'Series'], ], - description: t('Calculate contribution per series or total'), + description: t('Calculate contribution per series or row'), }, }, ], diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx index 96cb0b6bda60..08d09a014727 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx @@ -69,10 +69,10 @@ const config: ControlPanelConfig = { default: contributionMode, choices: [ [null, 'None'], - [EchartsTimeseriesContributionType.Row, 'Total'], + [EchartsTimeseriesContributionType.Row, 'Row'], [EchartsTimeseriesContributionType.Column, 'Series'], ], - description: t('Calculate contribution per series or total'), + description: t('Calculate contribution per series or row'), }, }, ], diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx index 14edc341f2ba..701c3a57ff56 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx @@ -66,10 +66,10 @@ const config: ControlPanelConfig = { default: contributionMode, choices: [ [null, 'None'], - [EchartsTimeseriesContributionType.Row, 'Total'], + [EchartsTimeseriesContributionType.Row, 'Row'], [EchartsTimeseriesContributionType.Column, 'Series'], ], - description: t('Calculate contribution per series or total'), + description: t('Calculate contribution per series or row'), }, }, ], diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx index 8178e4929782..8b40330730af 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx @@ -72,10 +72,10 @@ const config: ControlPanelConfig = { default: contributionMode, choices: [ [null, 'None'], - [EchartsTimeseriesContributionType.Row, 'Total'], + [EchartsTimeseriesContributionType.Row, 'Row'], [EchartsTimeseriesContributionType.Column, 'Series'], ], - description: t('Calculate contribution per series or total'), + description: t('Calculate contribution per series or row'), }, }, ], diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx index 1012b2f6e7df..f7fd56b7fa57 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx @@ -73,10 +73,10 @@ const config: ControlPanelConfig = { default: contributionMode, choices: [ [null, 'None'], - [EchartsTimeseriesContributionType.Row, 'Total'], + [EchartsTimeseriesContributionType.Row, 'Row'], [EchartsTimeseriesContributionType.Column, 'Series'], ], - description: t('Calculate contribution per series or total'), + description: t('Calculate contribution per series or row'), }, }, ], diff --git a/superset/common/query_object.py b/superset/common/query_object.py index 03ee9cb1d305..2a40155d1ca4 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -19,6 +19,7 @@ import logging from datetime import datetime, timedelta +from pprint import pformat from typing import Any, Dict, List, NamedTuple, Optional, TYPE_CHECKING from flask_babel import gettext as _ @@ -395,6 +396,7 @@ def exec_post_processing(self, df: DataFrame) -> DataFrame: :raises QueryObjectValidationError: If the post processing operation is incorrect """ + logger.debug("post_processing: %s", pformat(self.post_processing)) for post_process in self.post_processing: operation = post_process.get("operation") if not operation: diff --git a/superset/utils/pandas_postprocessing/contribution.py b/superset/utils/pandas_postprocessing/contribution.py index d813961fb5f5..7097ea33355a 100644 --- a/superset/utils/pandas_postprocessing/contribution.py +++ b/superset/utils/pandas_postprocessing/contribution.py @@ -49,6 +49,7 @@ def contribution( """ contribution_df = df.copy() numeric_df = contribution_df.select_dtypes(include=["number", Decimal]) + numeric_df.fillna(0, inplace=True) # verify column selections if columns: numeric_columns = numeric_df.columns.tolist() diff --git a/tests/unit_tests/pandas_postprocessing/test_contribution.py b/tests/unit_tests/pandas_postprocessing/test_contribution.py index 78212cbe5b85..9d2df7697111 100644 --- a/tests/unit_tests/pandas_postprocessing/test_contribution.py +++ b/tests/unit_tests/pandas_postprocessing/test_contribution.py @@ -18,6 +18,8 @@ from datetime import datetime import pytest +from numpy import nan +from numpy.testing import assert_array_equal from pandas import DataFrame from superset.exceptions import QueryObjectValidationError @@ -28,9 +30,14 @@ def test_contribution(): df = DataFrame( { - DTTM_ALIAS: [datetime(2020, 7, 16, 14, 49), datetime(2020, 7, 16, 14, 50),], - "a": [1, 3], - "b": [1, 9], + DTTM_ALIAS: [ + datetime(2020, 7, 16, 14, 49), + datetime(2020, 7, 16, 14, 50), + datetime(2020, 7, 16, 14, 51), + ], + "a": [1, 3, nan], + "b": [1, 9, nan], + "c": [nan, nan, nan], } ) with pytest.raises(QueryObjectValidationError, match="not numeric"): @@ -43,18 +50,20 @@ def test_contribution(): processed_df = contribution( df, orientation=PostProcessingContributionOrientation.ROW, ) - assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b"] - assert processed_df["a"].tolist() == [0.5, 0.25] - assert processed_df["b"].tolist() == [0.5, 0.75] + assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b", "c"] + assert_array_equal(processed_df["a"].tolist(), [0.5, 0.25, nan]) + assert_array_equal(processed_df["b"].tolist(), [0.5, 0.75, nan]) + assert_array_equal(processed_df["c"].tolist(), [0, 0, nan]) # cell contribution across column without temporal column df.pop(DTTM_ALIAS) processed_df = contribution( df, orientation=PostProcessingContributionOrientation.COLUMN ) - assert processed_df.columns.tolist() == ["a", "b"] - assert processed_df["a"].tolist() == [0.25, 0.75] - assert processed_df["b"].tolist() == [0.1, 0.9] + assert processed_df.columns.tolist() == ["a", "b", "c"] + assert_array_equal(processed_df["a"].tolist(), [0.25, 0.75, 0]) + assert_array_equal(processed_df["b"].tolist(), [0.1, 0.9, 0]) + assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan]) # contribution only on selected columns processed_df = contribution( @@ -63,7 +72,8 @@ def test_contribution(): columns=["a"], rename_columns=["pct_a"], ) - assert processed_df.columns.tolist() == ["a", "b", "pct_a"] - assert processed_df["a"].tolist() == [1, 3] - assert processed_df["b"].tolist() == [1, 9] - assert processed_df["pct_a"].tolist() == [0.25, 0.75] + assert processed_df.columns.tolist() == ["a", "b", "c", "pct_a"] + assert_array_equal(processed_df["a"].tolist(), [1, 3, nan]) + assert_array_equal(processed_df["b"].tolist(), [1, 9, nan]) + assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan]) + assert processed_df["pct_a"].tolist() == [0.25, 0.75, 0]