diff --git a/superset/common/query_context.py b/superset/common/query_context.py index 310adc6ad9079..b1677c04906bf 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -33,7 +33,8 @@ from superset.common.db_query_status import QueryStatus from superset.common.query_actions import get_query_results from superset.common.query_object import QueryObject -from superset.common.utils import QueryCacheManager +from superset.common.utils import dataframe_utils as df_utils +from superset.common.utils.query_cache_manager import QueryCacheManager from superset.constants import CacheRegion from superset.exceptions import QueryObjectValidationError, SupersetException from superset.extensions import cache_manager, security_manager @@ -106,14 +107,6 @@ def __init__( self.custom_cache_timeout = custom_cache_timeout self.cache_values = cache_values - @staticmethod - def left_join_df( - left_df: pd.DataFrame, right_df: pd.DataFrame, join_keys: List[str], - ) -> pd.DataFrame: - df = left_df.set_index(join_keys).join(right_df.set_index(join_keys)) - df.reset_index(inplace=True) - return df - def processing_time_offsets( # pylint: disable=too-many-locals self, df: pd.DataFrame, query_object: QueryObject, ) -> CachedTimeOffset: @@ -194,7 +187,7 @@ def processing_time_offsets( # pylint: disable=too-many-locals ] - DateOffset(**normalize_time_delta(offset)) # df left join `offset_metrics_df` - offset_df = self.left_join_df( + offset_df = df_utils.left_join_df( left_df=df, right_df=offset_metrics_df, join_keys=join_keys, ) offset_slice = offset_df[metrics_mapping.values()] @@ -231,7 +224,7 @@ def normalize_df(self, df: pd.DataFrame, query_object: QueryObject) -> pd.DataFr ) if self.enforce_numerical_metrics: - self.df_metrics_to_num(df, query_object) + df_utils.df_metrics_to_num(df, query_object) df.replace([np.inf, -np.inf], np.nan, inplace=True) @@ -271,15 +264,6 @@ def get_query_result(self, query_object: QueryObject) -> QueryResult: result.query = query return result - @staticmethod - def df_metrics_to_num(df: pd.DataFrame, query_object: QueryObject) -> None: - """Converting metrics to numeric when pandas.read_sql cannot""" - for col, dtype in df.dtypes.items(): - if dtype.type == np.object_ and col in query_object.metric_names: - # soft-convert a metric column to numeric - # will stay as strings if conversion fails - df[col] = df[col].infer_objects() - def get_data(self, df: pd.DataFrame,) -> Union[str, List[Dict[str, Any]]]: if self.result_format == ChartDataResultFormat.CSV: include_index = not isinstance(df.index, pd.RangeIndex) diff --git a/superset/common/utils/__init__.py b/superset/common/utils/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/superset/common/utils/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/superset/common/utils/dataframe_utils.py b/superset/common/utils/dataframe_utils.py new file mode 100644 index 0000000000000..55d03e6343410 --- /dev/null +++ b/superset/common/utils/dataframe_utils.py @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import List, TYPE_CHECKING + +import numpy as np +import pandas as pd + +if TYPE_CHECKING: + from superset.common.query_object import QueryObject + + +def left_join_df( + left_df: pd.DataFrame, right_df: pd.DataFrame, join_keys: List[str], +) -> pd.DataFrame: + df = left_df.set_index(join_keys).join(right_df.set_index(join_keys)) + df.reset_index(inplace=True) + return df + + +def df_metrics_to_num(df: pd.DataFrame, query_object: QueryObject) -> None: + """Converting metrics to numeric when pandas.read_sql cannot""" + for col, dtype in df.dtypes.items(): + if dtype.type == np.object_ and col in query_object.metric_names: + # soft-convert a metric column to numeric + # will stay as strings if conversion fails + df[col] = df[col].infer_objects() diff --git a/superset/common/utils.py b/superset/common/utils/query_cache_manager.py similarity index 99% rename from superset/common/utils.py rename to superset/common/utils/query_cache_manager.py index d5cad68eb26d8..92fb3561234f4 100644 --- a/superset/common/utils.py +++ b/superset/common/utils/query_cache_manager.py @@ -14,6 +14,8 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations + import logging from typing import Any, Dict, List, Optional