Skip to content

Commit

Permalink
Fix the issue that tiny segment's not showing up
Browse files Browse the repository at this point in the history
  • Loading branch information
dyang415 committed Oct 5, 2023
1 parent e7e652c commit 4fd4b5c
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions backend/app/insight/services/insight_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,7 @@ def gen_sub_df_for_columns(columns: List[str]):
) for columns in column_combinations_list]
wait(futures)

total_rows = self.overall_aggregated_df['count_baseline'].sum() + self.overall_aggregated_df['count'].sum()
multi_dimension_grouping_result = polars.concat([future.result() for future in futures]) \
.filter((polars.col("count") + polars.col("count_baseline")) / polars.lit(total_rows) > 0.01)
multi_dimension_grouping_result = polars.concat([future.result() for future in futures])

dimension_info_df = multi_dimension_grouping_result.filter(polars.col("dimension_name").list.lengths() == 1) \
.with_columns(polars.col("dimension_name").list.first()) \
Expand Down Expand Up @@ -317,7 +315,10 @@ def _build_serialized_key(row):
return "|".join([f"{column}:{value}" for column, value in zip(row['dimension_name'], row['dimension_value'])])

if len(self.key_dimensions) > 0:
total_rows = self.overall_aggregated_df['count_baseline'].sum() + self.overall_aggregated_df['count'].sum()

top_segments_df = df.with_columns(polars.concat_list([polars.lit(dimension) for dimension in self.key_dimensions]).alias("key_dimensions")) \
.filter((polars.col("count") + polars.col("count_baseline")) / polars.lit(total_rows) > 0.01) \
.filter(polars.col("dimension_name").list.set_intersection("key_dimensions").list.lengths() == polars.col("dimension_name").list.lengths()) \
.limit(1000)
top_segment_keys = [_build_serialized_key(row) for row in top_segments_df.rows(named=True)]
Expand Down

0 comments on commit 4fd4b5c

Please sign in to comment.