Skip to content

Commit

Permalink
pass datecolumn / groupby columns separately
Browse files Browse the repository at this point in the history
  • Loading branch information
liuyl committed Sep 3, 2023
1 parent 3ad3c2a commit 1c994e7
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 51 deletions.
42 changes: 16 additions & 26 deletions backend/app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
import pandas as pd
import polars
import sentry_sdk
from orjson import orjson
from sentry_sdk.integrations.flask import FlaskIntegration

from app.data_source import bp as data_source_bp
from app.file_upload.services.file_upload import FileUploadService
from app.insight.datasource.bqMetrics import BqMetrics
Expand All @@ -18,6 +15,8 @@
from flask import Flask, request
from flask_cors import CORS
from loguru import logger
from orjson import orjson
from sentry_sdk.integrations.flask import FlaskIntegration

flask_env_value = os.environ.get('FLASK_ENV', '')
if flask_env_value != 'development':
Expand Down Expand Up @@ -62,6 +61,10 @@ def getBqInsight():
baseDateRange = data['baseDateRange']
comparisonDateRange = data['comparisonDateRange']
selectedColumns = data['selectedColumns']
date_column = data['dateColumn']
# TODO(liuyl): Fix this, right now did not pass this value to backend
date_column_type = data['dateColumnType']
group_by_columns = data['groupByColumns']

baselineStart = datetime.strptime(
baseDateRange['from'], '%Y-%m-%dT%H:%M:%S.%fZ').date()
Expand All @@ -72,22 +75,15 @@ def getBqInsight():
comparisonEnd = datetime.strptime(
comparisonDateRange['to'], '%Y-%m-%dT%H:%M:%S.%fZ').date()

date_column = list(
filter(lambda x: x[1]['type'] == 'date', selectedColumns.items()))[0][0].strip()
date_column_type = list(filter(lambda x: x[1]['type'] == 'date', selectedColumns.items()))[0][1]['fieldType'].strip()

agg_method = list(filter(lambda x: x[1]['type'] == 'metric' or x[1]
['type'] == 'supporting_metric', selectedColumns.items()))
['type'] == 'supporting_metric', selectedColumns.items()))
expected_value = list(filter(lambda x: x[1]['type'] == 'metric', selectedColumns.items()))[
0][1]['expectedValue']

metrics_name = {k: k for k, v in agg_method}
metrics_name.update({date_column: 'count'})
agg_method = {k: agg_method_map[v['aggregationOption']]
for k, v in agg_method}
dimensions = list(
filter(lambda x: x[1]['type'] == 'dimension', selectedColumns.items()))
dimensions = [k for k, v in dimensions]

bq_metric = BqMetrics(
table_name=table_name,
Expand All @@ -97,7 +93,7 @@ def getBqInsight():
date_column_type=date_column_type,
agg_method=agg_method,
metrics_name=metrics_name,
columns=dimensions,
columns=group_by_columns,
expected_value=expected_value)
return bq_metric.get_metrics()

Expand All @@ -110,6 +106,7 @@ def get_time_series():
baseDateRange = data['baseDateRange']
comparisonDateRange = data['comparisonDateRange']
selectedColumns = data['selectedColumns']
date_column = data['dateColumn']

baselineStart = datetime.strptime(
baseDateRange['from'], '%Y-%m-%dT%H:%M:%S.%fZ').date()
Expand All @@ -119,12 +116,9 @@ def get_time_series():
comparisonDateRange['from'], '%Y-%m-%dT%H:%M:%S.%fZ').date()
comparisonEnd = datetime.strptime(
comparisonDateRange['to'], '%Y-%m-%dT%H:%M:%S.%fZ').date()
date_column = list(
filter(lambda x: x[1]['type'] == 'date', selectedColumns.items())
)[0][0].strip()

agg_method = list(filter(lambda x: x[1]['type'] == 'metric' or x[1]
['type'] == 'supporting_metric', selectedColumns.items()))
['type'] == 'supporting_metric', selectedColumns.items()))

metrics_name = {k: k for k, v in agg_method}
metrics_name.update({date_column: 'count'})
Expand All @@ -135,7 +129,8 @@ def get_time_series():
segment_key = data['segmentKey']
filtering_clause = polars.lit(True)
for sub_key in segment_key:
filtering_clause = filtering_clause & (polars.col(sub_key['dimension']).cast(str).eq(polars.lit(sub_key['value'])))
filtering_clause = filtering_clause & (polars.col(
sub_key['dimension']).cast(str).eq(polars.lit(sub_key['value'])))

df = polars.read_csv(f'/tmp/dsensei/{fileId}') \
.with_columns(polars.col(date_column).str.slice(0, 10).str.to_date().alias("date")) \
Expand All @@ -160,6 +155,8 @@ def getInsight():
base_date_range = data['baseDateRange']
comparison_date_range = data['comparisonDateRange']
selected_columns = data['selectedColumns']
date_column = data['dateColumn']
group_by_columns = data['groupByColumns']

baseline_start = datetime.strptime(
base_date_range['from'], '%Y-%m-%dT%H:%M:%S.%fZ').date()
Expand All @@ -170,11 +167,8 @@ def getInsight():
comparison_end = datetime.strptime(
comparison_date_range['to'], '%Y-%m-%dT%H:%M:%S.%fZ').date()

date_column = list(
filter(lambda x: x[1]['type'] == 'date', selected_columns.items()))[0][0].strip()

agg_method = list(filter(lambda x: x[1]['type'] == 'metric' or x[1]
['type'] == 'supporting_metric', selected_columns.items()))
['type'] == 'supporting_metric', selected_columns.items()))
expected_value = list(filter(lambda x: x[1]['type'] == 'metric', selected_columns.items()))[
0][1]['expectedValue']

Expand All @@ -184,10 +178,6 @@ def getInsight():
for k, v in agg_method}
agg_method.update({date_column: 'count'})

dimensions = list(
filter(lambda x: x[1]['type'] == 'dimension', selected_columns.items()))
dimensions = [k for k, v in dimensions]

logger.info('Reading file')
df = polars.read_csv(f'/tmp/dsensei/{file_id}') \
.with_columns(polars.col(date_column).str.slice(0, 10).str.to_date().alias("date"))
Expand All @@ -198,7 +188,7 @@ def getInsight():
(baseline_start, baseline_end),
(comparison_start, comparison_end),
date_column,
dimensions,
group_by_columns,
agg_method,
metrics_name,
expected_value
Expand Down
4 changes: 4 additions & 0 deletions frontend/src/components/main-dashboard/MainDashboard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ export default function MainDashboard() {
baseDateRange,
comparisonDateRange,
selectedColumns,
dateColumn,
groupByColumns,
dataSourceType,
targetDirection,
} = routerState;
Expand Down Expand Up @@ -86,6 +88,8 @@ export default function MainDashboard() {
baseDateRange,
comparisonDateRange,
selectedColumns,
dateColumn,
groupByColumns,
}),
headers: {
"Content-Type": "application/json",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ export default function BigqueryBasedReportConfig({ schema }: Props) {
selectedColumns: {
[key: string]: ColumnConfig;
},
dateColumn: string,
groupByColumns: string[],
baseDateRange: DateRangeConfig,
comparisonDateRange: DateRangeConfig,
targetDirection: TargetDirection
Expand All @@ -30,6 +32,8 @@ export default function BigqueryBasedReportConfig({ schema }: Props) {
state: {
tableName: name,
dataSourceType: "bigquery",
dateColumn,
groupByColumns,
selectedColumns,
baseDateRange,
comparisonDateRange,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ export default function CSVBasedReportConfig({
selectedColumns: {
[key: string]: ColumnConfig;
},
dateColumn: string,
groupByColumns: string[],
baseDateRange: DateRangeConfig,
comparisonDateRange: DateRangeConfig,
targetDirection: TargetDirection
Expand All @@ -164,6 +166,8 @@ export default function CSVBasedReportConfig({
state: {
fileId: id,
dataSourceType: "csv",
dateColumn,
groupByColumns,
selectedColumns,
baseDateRange,
comparisonDateRange,
Expand Down
44 changes: 19 additions & 25 deletions frontend/src/components/uploader/report-config/ReportConfig.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
ColumnConfig,
ColumnType,
DateRangeConfig,
MetricColumn,
PrefillConfig,
RowCountByColumn,
RowCountByDateAndColumn,
Expand All @@ -41,6 +42,8 @@ type Props = {
selectedColumns: {
[key: string]: ColumnConfig;
},
dateColumn: string,
groupByColumns: string[],
baseDateRange: DateRangeConfig,
comparisonDateRange: DateRangeConfig,
targetDirection: TargetDirection
Expand All @@ -57,6 +60,13 @@ function ReportConfig({
onSubmit,
}: Props) {
const { trackEvent } = useTracking();

const [dateColumn, setDateColumn] = useState<string>("");
const [groupByColumns, setGroupByColumns] = useState<string[]>([]);
const [metricColumn, setMetricColumn] = useState<MetricColumn | undefined>(undefined);
const [relevantMetricColumns, setRelevantMetricColumns] = useState<string[]>([]);


const [selectedColumns, setSelectedColumns] = useState<{
[k: string]: ColumnConfig;
}>({});
Expand Down Expand Up @@ -156,31 +166,16 @@ function ReportConfig({
);
removedDimension.map((m) => delete selectedColumnsClone[m]);
setSelectedColumns(selectedColumnsClone);
setGroupByColumns(dimensions);
};

const onSelectDateColumn = (dateCol: string) => {
const selectedColumnsClone = Object.assign({}, selectedColumns);
const prevDateColumns = Object.keys(selectedColumnsClone).filter(
(m) => selectedColumnsClone[m]["type"] === "date"
);
if (prevDateColumns.length > 1) {
throw new Error("Found more than 1 date columns.");
}
prevDateColumns.map((d) => delete selectedColumnsClone[d]);
selectedColumnsClone[dateCol] = {
type: "date",
fieldType: schema.fields.find((f) => f.name === dateCol)!.type,
};
setSelectedColumns(selectedColumnsClone);
setDateColumn(dateCol);

setBaseDateRangeData({ range: {}, stats: {} });
setComparisonDateRangeData({ range: {}, stats: {} });
};

const selectedDateColumn = Object.keys(selectedColumns).find(
(c) => selectedColumns[c]["type"] === "date"
);

function getDateColumns() {
const dateColumnsByType = schema.fields.filter(
(h) =>
Expand Down Expand Up @@ -240,10 +235,7 @@ function ReportConfig({
(column) => column.type === "metric"
).length > 0;

const hasDimensionColumn =
Object.values(selectedColumns).filter(
(column) => column.type === "dimension"
).length > 0;
const hasDimensionColumn = groupByColumns.length > 0;

const hasRows =
!rowCountByDateColumn ||
Expand Down Expand Up @@ -301,13 +293,13 @@ function ReportConfig({
}

function renderDatePicker() {
if (rowCountByDateColumn && !selectedDateColumn) {
if (rowCountByDateColumn && !dateColumn) {
return null;
}

let countByDate;
if (rowCountByDateColumn && selectedDateColumn) {
countByDate = rowCountByDateColumn[selectedDateColumn];
if (rowCountByDateColumn && dateColumn) {
countByDate = rowCountByDateColumn[dateColumn];
if (!countByDate) {
return null;
}
Expand Down Expand Up @@ -364,7 +356,7 @@ function ReportConfig({
? schema.fields.map((h) => h.name)
: getDateColumns().map((h) => h.name)
}
selectedValue={selectedDateColumn ? selectedDateColumn : ""}
selectedValue={dateColumn ? dateColumn : ""}
onValueChange={onSelectDateColumn}
instruction={
<Text>
Expand Down Expand Up @@ -500,6 +492,8 @@ function ReportConfig({
trackSubmit();
await onSubmit(
selectedColumns,
dateColumn,
groupByColumns,
baseDateRangeData.range,
comparisonDateRangeData.range,
targetDirection
Expand Down
6 changes: 6 additions & 0 deletions frontend/src/types/report-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ export type ColumnType = "metric" | "supporting_metric" | "dimension" | "date";
export type AggregationType = "sum" | "count" | "distinct" | "ratio";
export type TargetDirection = "increasing" | "decreasing";

export interface MetricColumn {
columnName: string;
aggregationOption: AggregationType;
expectedValue?: number;
}

export interface ColumnConfig {
type: ColumnType;
fieldType: FieldType;
Expand Down

0 comments on commit 1c994e7

Please sign in to comment.