Skip to content

Commit

Permalink
fix: contribution operator meets nan value (#18782)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhaoyongjie committed Feb 18, 2022
1 parent 38cd696 commit 987740a
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ const config: ControlPanelConfig = {
default: contributionMode,
choices: [
[null, 'None'],
[EchartsTimeseriesContributionType.Row, 'Total'],
[EchartsTimeseriesContributionType.Row, 'Row'],
[EchartsTimeseriesContributionType.Column, 'Series'],
],
description: t('Calculate contribution per series or total'),
description: t('Calculate contribution per series or row'),
},
},
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ const config: ControlPanelConfig = {
default: contributionMode,
choices: [
[null, 'None'],
[EchartsTimeseriesContributionType.Row, 'Total'],
[EchartsTimeseriesContributionType.Row, 'Row'],
[EchartsTimeseriesContributionType.Column, 'Series'],
],
description: t('Calculate contribution per series or total'),
description: t('Calculate contribution per series or row'),
},
},
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ const config: ControlPanelConfig = {
default: contributionMode,
choices: [
[null, 'None'],
[EchartsTimeseriesContributionType.Row, 'Total'],
[EchartsTimeseriesContributionType.Row, 'Row'],
[EchartsTimeseriesContributionType.Column, 'Series'],
],
description: t('Calculate contribution per series or total'),
description: t('Calculate contribution per series or row'),
},
},
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ const config: ControlPanelConfig = {
default: contributionMode,
choices: [
[null, 'None'],
[EchartsTimeseriesContributionType.Row, 'Total'],
[EchartsTimeseriesContributionType.Row, 'Row'],
[EchartsTimeseriesContributionType.Column, 'Series'],
],
description: t('Calculate contribution per series or total'),
description: t('Calculate contribution per series or row'),
},
},
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ const config: ControlPanelConfig = {
default: contributionMode,
choices: [
[null, 'None'],
[EchartsTimeseriesContributionType.Row, 'Total'],
[EchartsTimeseriesContributionType.Row, 'Row'],
[EchartsTimeseriesContributionType.Column, 'Series'],
],
description: t('Calculate contribution per series or total'),
description: t('Calculate contribution per series or row'),
},
},
],
Expand Down
2 changes: 2 additions & 0 deletions superset/common/query_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import logging
from datetime import datetime, timedelta
from pprint import pformat
from typing import Any, Dict, List, NamedTuple, Optional, TYPE_CHECKING

from flask_babel import gettext as _
Expand Down Expand Up @@ -395,6 +396,7 @@ def exec_post_processing(self, df: DataFrame) -> DataFrame:
:raises QueryObjectValidationError: If the post processing operation
is incorrect
"""
logger.debug("post_processing: %s", pformat(self.post_processing))
for post_process in self.post_processing:
operation = post_process.get("operation")
if not operation:
Expand Down
1 change: 1 addition & 0 deletions superset/utils/pandas_postprocessing/contribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def contribution(
"""
contribution_df = df.copy()
numeric_df = contribution_df.select_dtypes(include=["number", Decimal])
numeric_df.fillna(0, inplace=True)
# verify column selections
if columns:
numeric_columns = numeric_df.columns.tolist()
Expand Down
36 changes: 23 additions & 13 deletions tests/unit_tests/pandas_postprocessing/test_contribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from datetime import datetime

import pytest
from numpy import nan
from numpy.testing import assert_array_equal
from pandas import DataFrame

from superset.exceptions import QueryObjectValidationError
Expand All @@ -28,9 +30,14 @@
def test_contribution():
df = DataFrame(
{
DTTM_ALIAS: [datetime(2020, 7, 16, 14, 49), datetime(2020, 7, 16, 14, 50),],
"a": [1, 3],
"b": [1, 9],
DTTM_ALIAS: [
datetime(2020, 7, 16, 14, 49),
datetime(2020, 7, 16, 14, 50),
datetime(2020, 7, 16, 14, 51),
],
"a": [1, 3, nan],
"b": [1, 9, nan],
"c": [nan, nan, nan],
}
)
with pytest.raises(QueryObjectValidationError, match="not numeric"):
Expand All @@ -43,18 +50,20 @@ def test_contribution():
processed_df = contribution(
df, orientation=PostProcessingContributionOrientation.ROW,
)
assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b"]
assert processed_df["a"].tolist() == [0.5, 0.25]
assert processed_df["b"].tolist() == [0.5, 0.75]
assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b", "c"]
assert_array_equal(processed_df["a"].tolist(), [0.5, 0.25, nan])
assert_array_equal(processed_df["b"].tolist(), [0.5, 0.75, nan])
assert_array_equal(processed_df["c"].tolist(), [0, 0, nan])

# cell contribution across column without temporal column
df.pop(DTTM_ALIAS)
processed_df = contribution(
df, orientation=PostProcessingContributionOrientation.COLUMN
)
assert processed_df.columns.tolist() == ["a", "b"]
assert processed_df["a"].tolist() == [0.25, 0.75]
assert processed_df["b"].tolist() == [0.1, 0.9]
assert processed_df.columns.tolist() == ["a", "b", "c"]
assert_array_equal(processed_df["a"].tolist(), [0.25, 0.75, 0])
assert_array_equal(processed_df["b"].tolist(), [0.1, 0.9, 0])
assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan])

# contribution only on selected columns
processed_df = contribution(
Expand All @@ -63,7 +72,8 @@ def test_contribution():
columns=["a"],
rename_columns=["pct_a"],
)
assert processed_df.columns.tolist() == ["a", "b", "pct_a"]
assert processed_df["a"].tolist() == [1, 3]
assert processed_df["b"].tolist() == [1, 9]
assert processed_df["pct_a"].tolist() == [0.25, 0.75]
assert processed_df.columns.tolist() == ["a", "b", "c", "pct_a"]
assert_array_equal(processed_df["a"].tolist(), [1, 3, nan])
assert_array_equal(processed_df["b"].tolist(), [1, 9, nan])
assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan])
assert processed_df["pct_a"].tolist() == [0.25, 0.75, 0]

0 comments on commit 987740a

Please sign in to comment.