Skip to content

Commit

Permalink
Merge pull request #2356 from hawkfish/hawkfish-summarize
Browse files Browse the repository at this point in the history
Issue #2286: Numeric SUMMARIZE stats
  • Loading branch information
Mytherin committed Sep 30, 2021
2 parents eac951c + fb7995d commit 0edad5d
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 12 deletions.
33 changes: 31 additions & 2 deletions src/planner/binder/statement/bind_summarize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ static unique_ptr<ParsedExpression> SummarizeCreateAggregate(const string &aggre
return move(cast_function);
}

static unique_ptr<ParsedExpression> SummarizeCreateAggregate(const string &aggregate, string column_name,
const Value &modifier) {
vector<unique_ptr<ParsedExpression>> children;
children.push_back(make_unique<ColumnRefExpression>(move(column_name)));
children.push_back(make_unique<ConstantExpression>(modifier));
auto aggregate_function = make_unique<FunctionExpression>(aggregate, move(children));
auto cast_function = make_unique<CastExpression>(LogicalType::VARCHAR, move(aggregate_function));
return move(cast_function);
}

static unique_ptr<ParsedExpression> SummarizeCreateCountStar() {
vector<unique_ptr<ParsedExpression>> children;
auto aggregate_function = make_unique<FunctionExpression>("count_star", move(children));
Expand Down Expand Up @@ -70,6 +80,10 @@ BoundStatement Binder::BindSummarize(ShowStatement &stmt) {
vector<unique_ptr<ParsedExpression>> max_children;
vector<unique_ptr<ParsedExpression>> unique_children;
vector<unique_ptr<ParsedExpression>> avg_children;
vector<unique_ptr<ParsedExpression>> std_children;
vector<unique_ptr<ParsedExpression>> q25_children;
vector<unique_ptr<ParsedExpression>> q50_children;
vector<unique_ptr<ParsedExpression>> q75_children;
vector<unique_ptr<ParsedExpression>> count_children;
vector<unique_ptr<ParsedExpression>> null_percentage_children;
auto select = make_unique<SelectStatement>();
Expand All @@ -80,8 +94,19 @@ BoundStatement Binder::BindSummarize(ShowStatement &stmt) {
min_children.push_back(SummarizeCreateAggregate("min", plan.names[i]));
max_children.push_back(SummarizeCreateAggregate("max", plan.names[i]));
unique_children.push_back(SummarizeCreateAggregate("approx_count_distinct", plan.names[i]));
avg_children.push_back(plan.types[i].IsNumeric() ? SummarizeCreateAggregate("avg", plan.names[i])
: make_unique<ConstantExpression>(Value()));
if (plan.types[i].IsNumeric()) {
avg_children.push_back(SummarizeCreateAggregate("avg", plan.names[i]));
std_children.push_back(SummarizeCreateAggregate("stddev", plan.names[i]));
q25_children.push_back(SummarizeCreateAggregate("approx_quantile", plan.names[i], Value::FLOAT(0.25)));
q50_children.push_back(SummarizeCreateAggregate("approx_quantile", plan.names[i], Value::FLOAT(0.50)));
q75_children.push_back(SummarizeCreateAggregate("approx_quantile", plan.names[i], Value::FLOAT(0.75)));
} else {
avg_children.push_back(make_unique<ConstantExpression>(Value()));
std_children.push_back(make_unique<ConstantExpression>(Value()));
q25_children.push_back(make_unique<ConstantExpression>(Value()));
q50_children.push_back(make_unique<ConstantExpression>(Value()));
q75_children.push_back(make_unique<ConstantExpression>(Value()));
}
count_children.push_back(SummarizeCreateCountStar());
null_percentage_children.push_back(SummarizeCreateNullPercentage(plan.names[i]));
}
Expand All @@ -95,6 +120,10 @@ BoundStatement Binder::BindSummarize(ShowStatement &stmt) {
select_node->select_list.push_back(SummarizeWrapUnnest(max_children, "max"));
select_node->select_list.push_back(SummarizeWrapUnnest(unique_children, "approx_unique"));
select_node->select_list.push_back(SummarizeWrapUnnest(avg_children, "avg"));
select_node->select_list.push_back(SummarizeWrapUnnest(std_children, "std"));
select_node->select_list.push_back(SummarizeWrapUnnest(q25_children, "q25"));
select_node->select_list.push_back(SummarizeWrapUnnest(q50_children, "q50"));
select_node->select_list.push_back(SummarizeWrapUnnest(q75_children, "q75"));
select_node->select_list.push_back(SummarizeWrapUnnest(count_children, "count"));
select_node->select_list.push_back(SummarizeWrapUnnest(null_percentage_children, "null_percentage"));
select_node->from_table = move(subquery_ref);
Expand Down
20 changes: 10 additions & 10 deletions test/sql/show_select/test_summarize.test
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,18 @@ j VARCHAR hello world
k HUGEINT -12 12
d DOUBLE -0.5 0.5

query IIIIIIII
query IIIIIIIIIIII
SUMMARIZE types;
----
i INTEGER 1 3 3 2.0 3 0.0%
j VARCHAR hello world 2 NULL 3 33.33%
k HUGEINT -12 12 2 0.0 3 33.33%
d DOUBLE -0.5 0.5 2 0.0 3 33.33%
i INTEGER 1 3 3 2.0 1.0 1 2 2 3 0.0%
j VARCHAR hello world 2 NULL NULL NULL NULL NULL 3 33.33%
k HUGEINT -12 12 2 0.0 16.97056274847714 -12.0 0.0 12.0 3 33.33%
d DOUBLE -0.5 0.5 2 0.0 0.7071067811865476 -0.5 0.0 0.5 3 33.33%

query IIIIIIII
query IIIIIIIIIIII
SUMMARIZE SELECT * FROM types;
----
i INTEGER 1 3 3 2.0 3 0.0%
j VARCHAR hello world 2 NULL 3 33.33%
k HUGEINT -12 12 2 0.0 3 33.33%
d DOUBLE -0.5 0.5 2 0.0 3 33.33%
i INTEGER 1 3 3 2.0 1.0 1 2 2 3 0.0%
j VARCHAR hello world 2 NULL NULL NULL NULL NULL 3 33.33%
k HUGEINT -12 12 2 0.0 16.97056274847714 -12.0 0.0 12.0 3 33.33%
d DOUBLE -0.5 0.5 2 0.0 0.7071067811865476 -0.5 0.0 0.5 3 33.33%

0 comments on commit 0edad5d

Please sign in to comment.