Skip to content
This repository has been archived by the owner on Sep 27, 2019. It is now read-only.

Commit

Permalink
Add cardinality estimate to AbstractPlan object (#1475)
Browse files Browse the repository at this point in the history
* Clarified variable name

* Added cardinality and test

* Add optimizer testing class

* Remove debugging code

* Revert default estimate to fix broken test

* Formatting

* Removed unecessary override in Cardinality test

* More comments and clean up tests
  • Loading branch information
GustavoAngulo authored and apavlo committed Sep 18, 2018
1 parent e738acb commit 1fc8b55
Show file tree
Hide file tree
Showing 14 changed files with 254 additions and 49 deletions.
2 changes: 1 addition & 1 deletion src/include/optimizer/optimizer_task.h
Expand Up @@ -211,7 +211,7 @@ class OptimizeInputs : public OptimizerTask {
GroupExpression *group_expr_;
double cur_total_cost_;
int cur_child_idx_ = -1;
int pre_child_idx_ = -1;
int prev_child_idx_ = -1;
int cur_prop_pair_idx_ = 0;
};

Expand Down
9 changes: 5 additions & 4 deletions src/include/optimizer/plan_generator.h
Expand Up @@ -2,11 +2,11 @@
//
// Peloton
//
// operator_to_plan_transformer.h
// plan_generator.h
//
// Identification: src/include/optimizer/operator_to_plan_transformer.h
// Identification: src/include/optimizer/plan_generator.h
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -46,7 +46,8 @@ class PlanGenerator : public OperatorVisitor {
std::vector<expression::AbstractExpression *> required_cols,
std::vector<expression::AbstractExpression *> output_cols,
std::vector<std::unique_ptr<planner::AbstractPlan>> &children_plans,
std::vector<ExprMap> children_expr_map);
std::vector<ExprMap> children_expr_map,
int estimated_cardinality);

void Visit(const DummyScan *) override;

Expand Down
8 changes: 4 additions & 4 deletions src/include/optimizer/stats_calculator.h
Expand Up @@ -2,11 +2,11 @@
//
// Peloton
//
// cost_and_stats_calculator.h
// stats_calculator.h
//
// Identification: src/include/optimizer/stats_calculator.h
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -56,13 +56,13 @@ class StatsCalculator : public OperatorVisitor {
std::unordered_map<std::string, std::shared_ptr<ColumnStats>> &stats,
bool copy);
/**
* @brief Update selectivity for predicate evaluation
* @brief Return estimated cardinality for a filter
*
* @param num_rows Number of rows of base table
* @param predicate_stats The stats for columns in the expression
* @param predicates conjunction predicates
*/
void UpdateStatsForFilter(
size_t EstimateCardinalityForFilter(
size_t num_rows,
std::unordered_map<std::string, std::shared_ptr<ColumnStats>>
&predicate_stats,
Expand Down
8 changes: 3 additions & 5 deletions src/include/planner/abstract_plan.h
Expand Up @@ -85,8 +85,8 @@ class AbstractPlan : public Printable {
// Get the estimated cardinality of this plan
int GetCardinality() const { return estimated_cardinality_; }

// TODO: This is only for testing now. When the optimizer is ready, we should
// delete this function and pass this information to constructor
// FOR TESTING ONLY. This function should only be called during construction of plan (ConvertOpExpression) or
// for tests.
void SetCardinality(int cardinality) { estimated_cardinality_ = cardinality; }

//===--------------------------------------------------------------------===//
Expand Down Expand Up @@ -152,9 +152,7 @@ class AbstractPlan : public Printable {
std::vector<std::unique_ptr<AbstractPlan>> children_;

AbstractPlan *parent_ = nullptr;

// TODO: This field is harded coded now. This needs to be changed when
// optimizer has the cost model and cardinality estimation

int estimated_cardinality_ = 500000;

private:
Expand Down
8 changes: 4 additions & 4 deletions src/optimizer/child_stats_deriver.cpp
Expand Up @@ -2,11 +2,11 @@
//
// Peloton
//
// cost_and_stats_calculator.h
// child_stats_deriver.cpp
//
// Identification: src/optimizer/stats_calculator.cpp
// Identification: src/optimizer/child_stats_deriver.cpp
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -67,7 +67,7 @@ void ChildStatsDeriver::PassDownColumn(expression::AbstractExpression *col) {
auto child_group = memo_->GetGroupByID(gexpr_->GetChildGroupId(idx));
if (child_group->GetTableAliases().count(tv_expr->GetTableName()) &&
// If we have not derived the column stats yet
child_group->HasColumnStats(tv_expr->GetColFullName())) {
!child_group->HasColumnStats(tv_expr->GetColFullName())) {
output_[idx].insert(col);
break;
}
Expand Down
5 changes: 2 additions & 3 deletions src/optimizer/optimizer.cpp
Expand Up @@ -6,7 +6,7 @@
//
// Identification: src/optimizer/optimizer.cpp
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -366,8 +366,7 @@ unique_ptr<planner::AbstractPlan> Optimizer::ChooseBestPlan(
PlanGenerator generator;
auto plan = generator.ConvertOpExpression(op, required_props, required_cols,
output_cols, children_plans,
children_expr_map);

children_expr_map, group->GetNumRows());
LOG_TRACE("Finish Choosing best plan for group %d", id);
return plan;
}
Expand Down
13 changes: 6 additions & 7 deletions src/optimizer/optimizer_task.cpp
Expand Up @@ -2,7 +2,7 @@
//
// Peloton
//
// rule.h
// optimizer_task.cpp
//
// Identification: src/optimizer/optimizer_task.cpp
//
Expand Down Expand Up @@ -313,14 +313,13 @@ void OptimizeInputs::execute() {
cur_total_cost_ += child_best_expr->GetCost(i_prop);
// Pruning
if (cur_total_cost_ > context_->cost_upper_bound) break;
} else if (pre_child_idx_ !=
cur_child_idx_) { // First time to optimize child group
pre_child_idx_ = cur_child_idx_;
} else if (prev_child_idx_ !=
cur_child_idx_) { // We haven't optimized child group
prev_child_idx_ = cur_child_idx_;
PushTask(new OptimizeInputs(this));
PushTask(new OptimizeGroup(
child_group, std::make_shared<OptimizeContext>(
context_->metadata, i_prop,
context_->cost_upper_bound - cur_total_cost_)));
context_->metadata, i_prop, context_->cost_upper_bound - cur_total_cost_)));
return;
} else { // If we return from OptimizeGroup, then there is no expr for
// the context
Expand Down Expand Up @@ -401,7 +400,7 @@ void OptimizeInputs::execute() {
}

// Reset child idx and total cost
pre_child_idx_ = -1;
prev_child_idx_ = -1;
cur_child_idx_ = 0;
cur_total_cost_ = 0;
}
Expand Down
8 changes: 5 additions & 3 deletions src/optimizer/plan_generator.cpp
Expand Up @@ -2,9 +2,9 @@
//
// Peloton
//
// operator_to_plan_transformer.cpp
// plan_generator.cpp
//
// Identification: src/optimizer/operator_to_plan_transformer.cpp
// Identification: src/optimizer/plan_generator.cpp
//
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
Expand Down Expand Up @@ -59,14 +59,16 @@ unique_ptr<planner::AbstractPlan> PlanGenerator::ConvertOpExpression(
vector<expression::AbstractExpression *> required_cols,
vector<expression::AbstractExpression *> output_cols,
vector<unique_ptr<planner::AbstractPlan>> &children_plans,
vector<ExprMap> children_expr_map) {
vector<ExprMap> children_expr_map,
int estimated_cardinality) {
required_props_ = move(required_props);
required_cols_ = move(required_cols);
output_cols_ = move(output_cols);
children_plans_ = move(children_plans);
children_expr_map_ = move(children_expr_map);
op->Op().Accept(this);
BuildProjectionPlan();
output_plan_->SetCardinality(estimated_cardinality);
return move(output_plan_);
}

Expand Down
6 changes: 2 additions & 4 deletions src/optimizer/stats/table_stats.cpp
Expand Up @@ -120,12 +120,10 @@ double TableStats::GetCardinality(const std::string column_name) {
return column_stats->cardinality;
}

// Returns true if we have column stats for a specific column
bool TableStats::HasColumnStats(const std::string col_name) {
auto it = col_name_to_stats_map_.find(col_name);
if (it == col_name_to_stats_map_.end()) {
return false;
}
return true;
return it != col_name_to_stats_map_.end();
}

std::shared_ptr<ColumnStats> TableStats::GetColumnStats(
Expand Down
21 changes: 11 additions & 10 deletions src/optimizer/stats_calculator.cpp
Expand Up @@ -2,11 +2,11 @@
//
// Peloton
//
// cost_and_stats_calculator.h
// stats_calculator.cpp
//
// Identification: src/optimizer/stats_calculator.cpp
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -64,18 +64,19 @@ void StatsCalculator::Visit(const LogicalGet *op) {
AddBaseTableStats(col, table_stats, predicate_stats, false);
}
}
// Use predicates to update the stats accordingly
UpdateStatsForFilter(
table_stats->GetColumnCount() == 0 ? 0 : table_stats->num_rows,
predicate_stats, op->predicates);
// Use predicates to estimate cardinality. If we were unable to find any column stats from the catalog, default to 0
if (table_stats->GetColumnCount() == 0) {
root_group->SetNumRows(0);
} else {
root_group->SetNumRows(EstimateCardinalityForFilter(table_stats->num_rows, predicate_stats, op->predicates));
}
}
// Add the stats to the group
for (auto &column_name_stats_pair : required_stats) {
auto &column_name = column_name_stats_pair.first;
auto &column_stats = column_name_stats_pair.second;
column_stats->num_rows = root_group->GetNumRows();
memo_->GetGroupByID(gexpr_->GetGroupID())
->AddStats(column_name, column_stats);
root_group->AddStats(column_name, column_stats);
}
}

Expand Down Expand Up @@ -233,7 +234,7 @@ void StatsCalculator::AddBaseTableStats(
}
}

void StatsCalculator::UpdateStatsForFilter(
size_t StatsCalculator::EstimateCardinalityForFilter(
size_t num_rows,
std::unordered_map<std::string, std::shared_ptr<ColumnStats>>
&predicate_stats,
Expand All @@ -255,7 +256,7 @@ void StatsCalculator::UpdateStatsForFilter(
annotated_expr.expr.get());
}
// Update selectivity
memo_->GetGroupByID(gexpr_->GetGroupID())->SetNumRows(num_rows * selectivity);
return num_rows * selectivity;
}

// Calculate the selectivity given the predicate and the stats of columns in the
Expand Down
5 changes: 3 additions & 2 deletions src/planner/abstract_plan.cpp
Expand Up @@ -6,7 +6,7 @@
//
// Identification: src/planner/abstract_plan.cpp
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -50,7 +50,8 @@ std::ostream &operator<<(std::ostream &os, const AbstractPlan &plan) {
const std::string AbstractPlan::GetInfo() const {
std::ostringstream os;
os << PlanNodeTypeToString(GetPlanNodeType())
<< " [NumChildren=" << children_.size() << "]";
<< " [NumChildren=" << children_.size() << "]"
<< " [Estimated Cardinality=" << GetCardinality() << "]";
return os.str();
}

Expand Down
6 changes: 4 additions & 2 deletions test/CMakeLists.txt
Expand Up @@ -47,8 +47,9 @@ set(TESTING_UTIL_TXN ${PROJECT_SOURCE_DIR}/test/concurrency/testing_trans
set(TESTING_UTIL_STATS ${PROJECT_SOURCE_DIR}/test/statistics/testing_stats_util.cpp)
set(TESTING_UTIL_SQL ${PROJECT_SOURCE_DIR}/test/sql/testing_sql_util.cpp)
set(TESTING_UTIL_INDEX ${PROJECT_SOURCE_DIR}/test/index/testing_index_util.cpp)
set(TESTING_UTIL_CODEGEN ${PROJECT_SOURCE_DIR}/test/codegen/testing_codegen_util.cpp)
set(TESTING_UTIL_FORECAST ${PROJECT_SOURCE_DIR}/test/brain/testing_forecast_util.cpp)
set(TESTING_UTIL_CODEGEN ${PROJECT_SOURCE_DIR}/test/codegen/testing_codegen_util.cpp)
set(TESTING_UTIL_FORECAST ${PROJECT_SOURCE_DIR}/test/brain/testing_forecast_util.cpp)
set(TESTING_UTIL_OPTIMIZER ${PROJECT_SOURCE_DIR}/test/optimizer/optimizer_test_util.cpp)

add_library(peloton-test-common EXCLUDE_FROM_ALL ${gmock_srcs} ${HARNESS}
${TESTING_UTIL_EXECUTOR}
Expand All @@ -60,6 +61,7 @@ add_library(peloton-test-common EXCLUDE_FROM_ALL ${gmock_srcs} ${HARNESS}
${TESTING_UTIL_SQL}
${TESTING_UTIL_CODEGEN}
${TESTING_UTIL_FORECAST}
${TESTING_UTIL_OPTIMIZER}
)

# --[ Add "make check" target
Expand Down
45 changes: 45 additions & 0 deletions test/optimizer/cardinality_test.cpp
@@ -0,0 +1,45 @@
//===----------------------------------------------------------------------===//
//
// Peloton
//
// cardinality_test.cpp
//
// Identification: test/optimizer/cardinality_test.cpp
//
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

#include "optimizer_test_util.cpp"
#include <chrono>


namespace peloton {
namespace test {

class CardinalityTest : public OptimizerTestUtil {};

TEST_F(CardinalityTest, EstimatedCardinalityTest) {

const std::string test_table_name = "testtable";
const int num_rows = 10;
OptimizerTestUtil::CreateTable(test_table_name, num_rows);

auto plan = GeneratePlan("SELECT a from " + test_table_name + ";");

EXPECT_EQ(num_rows, plan->GetCardinality());
}

TEST_F(CardinalityTest, EstimatedCardinalityTestWithPredicate) {

const std::string test_table_name = "testtable";
const int num_rows = 10;
OptimizerTestUtil::CreateTable(test_table_name, num_rows);

auto plan = GeneratePlan("SELECT a from " + test_table_name + " WHERE " + "a < 10;");

EXPECT_GE(num_rows, plan->GetCardinality());
}

}
}

0 comments on commit 1fc8b55

Please sign in to comment.