Skip to content
This repository has been archived by the owner on Sep 27, 2019. It is now read-only.

Commit

Permalink
Add cardinality estimate to AbstractPlan object (#1475)
Browse files Browse the repository at this point in the history
* Clarified variable name

* Added cardinality and test

* Add optimizer testing class

* Remove debugging code

* Revert default estimate to fix broken test

* Formatting

* Removed unecessary override in Cardinality test

* More comments and clean up tests
  • Loading branch information
GustavoAngulo authored and apavlo committed Sep 18, 2018
1 parent e738acb commit 1fc8b55
Show file tree
Hide file tree
Showing 14 changed files with 254 additions and 49 deletions.
2 changes: 1 addition & 1 deletion src/include/optimizer/optimizer_task.h
Expand Up @@ -211,7 +211,7 @@ class OptimizeInputs : public OptimizerTask {
GroupExpression *group_expr_; GroupExpression *group_expr_;
double cur_total_cost_; double cur_total_cost_;
int cur_child_idx_ = -1; int cur_child_idx_ = -1;
int pre_child_idx_ = -1; int prev_child_idx_ = -1;
int cur_prop_pair_idx_ = 0; int cur_prop_pair_idx_ = 0;
}; };


Expand Down
9 changes: 5 additions & 4 deletions src/include/optimizer/plan_generator.h
Expand Up @@ -2,11 +2,11 @@
// //
// Peloton // Peloton
// //
// operator_to_plan_transformer.h // plan_generator.h
// //
// Identification: src/include/optimizer/operator_to_plan_transformer.h // Identification: src/include/optimizer/plan_generator.h
// //
// Copyright (c) 2015-16, Carnegie Mellon University Database Group // Copyright (c) 2015-2018, Carnegie Mellon University Database Group
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//


Expand Down Expand Up @@ -46,7 +46,8 @@ class PlanGenerator : public OperatorVisitor {
std::vector<expression::AbstractExpression *> required_cols, std::vector<expression::AbstractExpression *> required_cols,
std::vector<expression::AbstractExpression *> output_cols, std::vector<expression::AbstractExpression *> output_cols,
std::vector<std::unique_ptr<planner::AbstractPlan>> &children_plans, std::vector<std::unique_ptr<planner::AbstractPlan>> &children_plans,
std::vector<ExprMap> children_expr_map); std::vector<ExprMap> children_expr_map,
int estimated_cardinality);


void Visit(const DummyScan *) override; void Visit(const DummyScan *) override;


Expand Down
8 changes: 4 additions & 4 deletions src/include/optimizer/stats_calculator.h
Expand Up @@ -2,11 +2,11 @@
// //
// Peloton // Peloton
// //
// cost_and_stats_calculator.h // stats_calculator.h
// //
// Identification: src/include/optimizer/stats_calculator.h // Identification: src/include/optimizer/stats_calculator.h
// //
// Copyright (c) 2015-16, Carnegie Mellon University Database Group // Copyright (c) 2015-2018, Carnegie Mellon University Database Group
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//


Expand Down Expand Up @@ -56,13 +56,13 @@ class StatsCalculator : public OperatorVisitor {
std::unordered_map<std::string, std::shared_ptr<ColumnStats>> &stats, std::unordered_map<std::string, std::shared_ptr<ColumnStats>> &stats,
bool copy); bool copy);
/** /**
* @brief Update selectivity for predicate evaluation * @brief Return estimated cardinality for a filter
* *
* @param num_rows Number of rows of base table * @param num_rows Number of rows of base table
* @param predicate_stats The stats for columns in the expression * @param predicate_stats The stats for columns in the expression
* @param predicates conjunction predicates * @param predicates conjunction predicates
*/ */
void UpdateStatsForFilter( size_t EstimateCardinalityForFilter(
size_t num_rows, size_t num_rows,
std::unordered_map<std::string, std::shared_ptr<ColumnStats>> std::unordered_map<std::string, std::shared_ptr<ColumnStats>>
&predicate_stats, &predicate_stats,
Expand Down
8 changes: 3 additions & 5 deletions src/include/planner/abstract_plan.h
Expand Up @@ -85,8 +85,8 @@ class AbstractPlan : public Printable {
// Get the estimated cardinality of this plan // Get the estimated cardinality of this plan
int GetCardinality() const { return estimated_cardinality_; } int GetCardinality() const { return estimated_cardinality_; }


// TODO: This is only for testing now. When the optimizer is ready, we should // FOR TESTING ONLY. This function should only be called during construction of plan (ConvertOpExpression) or
// delete this function and pass this information to constructor // for tests.
void SetCardinality(int cardinality) { estimated_cardinality_ = cardinality; } void SetCardinality(int cardinality) { estimated_cardinality_ = cardinality; }


//===--------------------------------------------------------------------===// //===--------------------------------------------------------------------===//
Expand Down Expand Up @@ -152,9 +152,7 @@ class AbstractPlan : public Printable {
std::vector<std::unique_ptr<AbstractPlan>> children_; std::vector<std::unique_ptr<AbstractPlan>> children_;


AbstractPlan *parent_ = nullptr; AbstractPlan *parent_ = nullptr;


// TODO: This field is harded coded now. This needs to be changed when
// optimizer has the cost model and cardinality estimation
int estimated_cardinality_ = 500000; int estimated_cardinality_ = 500000;


private: private:
Expand Down
8 changes: 4 additions & 4 deletions src/optimizer/child_stats_deriver.cpp
Expand Up @@ -2,11 +2,11 @@
// //
// Peloton // Peloton
// //
// cost_and_stats_calculator.h // child_stats_deriver.cpp
// //
// Identification: src/optimizer/stats_calculator.cpp // Identification: src/optimizer/child_stats_deriver.cpp
// //
// Copyright (c) 2015-16, Carnegie Mellon University Database Group // Copyright (c) 2015-2018, Carnegie Mellon University Database Group
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//


Expand Down Expand Up @@ -67,7 +67,7 @@ void ChildStatsDeriver::PassDownColumn(expression::AbstractExpression *col) {
auto child_group = memo_->GetGroupByID(gexpr_->GetChildGroupId(idx)); auto child_group = memo_->GetGroupByID(gexpr_->GetChildGroupId(idx));
if (child_group->GetTableAliases().count(tv_expr->GetTableName()) && if (child_group->GetTableAliases().count(tv_expr->GetTableName()) &&
// If we have not derived the column stats yet // If we have not derived the column stats yet
child_group->HasColumnStats(tv_expr->GetColFullName())) { !child_group->HasColumnStats(tv_expr->GetColFullName())) {
output_[idx].insert(col); output_[idx].insert(col);
break; break;
} }
Expand Down
5 changes: 2 additions & 3 deletions src/optimizer/optimizer.cpp
Expand Up @@ -6,7 +6,7 @@
// //
// Identification: src/optimizer/optimizer.cpp // Identification: src/optimizer/optimizer.cpp
// //
// Copyright (c) 2015-16, Carnegie Mellon University Database Group // Copyright (c) 2015-2018, Carnegie Mellon University Database Group
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//


Expand Down Expand Up @@ -366,8 +366,7 @@ unique_ptr<planner::AbstractPlan> Optimizer::ChooseBestPlan(
PlanGenerator generator; PlanGenerator generator;
auto plan = generator.ConvertOpExpression(op, required_props, required_cols, auto plan = generator.ConvertOpExpression(op, required_props, required_cols,
output_cols, children_plans, output_cols, children_plans,
children_expr_map); children_expr_map, group->GetNumRows());

LOG_TRACE("Finish Choosing best plan for group %d", id); LOG_TRACE("Finish Choosing best plan for group %d", id);
return plan; return plan;
} }
Expand Down
13 changes: 6 additions & 7 deletions src/optimizer/optimizer_task.cpp
Expand Up @@ -2,7 +2,7 @@
// //
// Peloton // Peloton
// //
// rule.h // optimizer_task.cpp
// //
// Identification: src/optimizer/optimizer_task.cpp // Identification: src/optimizer/optimizer_task.cpp
// //
Expand Down Expand Up @@ -313,14 +313,13 @@ void OptimizeInputs::execute() {
cur_total_cost_ += child_best_expr->GetCost(i_prop); cur_total_cost_ += child_best_expr->GetCost(i_prop);
// Pruning // Pruning
if (cur_total_cost_ > context_->cost_upper_bound) break; if (cur_total_cost_ > context_->cost_upper_bound) break;
} else if (pre_child_idx_ != } else if (prev_child_idx_ !=
cur_child_idx_) { // First time to optimize child group cur_child_idx_) { // We haven't optimized child group
pre_child_idx_ = cur_child_idx_; prev_child_idx_ = cur_child_idx_;
PushTask(new OptimizeInputs(this)); PushTask(new OptimizeInputs(this));
PushTask(new OptimizeGroup( PushTask(new OptimizeGroup(
child_group, std::make_shared<OptimizeContext>( child_group, std::make_shared<OptimizeContext>(
context_->metadata, i_prop, context_->metadata, i_prop, context_->cost_upper_bound - cur_total_cost_)));
context_->cost_upper_bound - cur_total_cost_)));
return; return;
} else { // If we return from OptimizeGroup, then there is no expr for } else { // If we return from OptimizeGroup, then there is no expr for
// the context // the context
Expand Down Expand Up @@ -401,7 +400,7 @@ void OptimizeInputs::execute() {
} }


// Reset child idx and total cost // Reset child idx and total cost
pre_child_idx_ = -1; prev_child_idx_ = -1;
cur_child_idx_ = 0; cur_child_idx_ = 0;
cur_total_cost_ = 0; cur_total_cost_ = 0;
} }
Expand Down
8 changes: 5 additions & 3 deletions src/optimizer/plan_generator.cpp
Expand Up @@ -2,9 +2,9 @@
// //
// Peloton // Peloton
// //
// operator_to_plan_transformer.cpp // plan_generator.cpp
// //
// Identification: src/optimizer/operator_to_plan_transformer.cpp // Identification: src/optimizer/plan_generator.cpp
// //
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // Copyright (c) 2015-2018, Carnegie Mellon University Database Group
// //
Expand Down Expand Up @@ -59,14 +59,16 @@ unique_ptr<planner::AbstractPlan> PlanGenerator::ConvertOpExpression(
vector<expression::AbstractExpression *> required_cols, vector<expression::AbstractExpression *> required_cols,
vector<expression::AbstractExpression *> output_cols, vector<expression::AbstractExpression *> output_cols,
vector<unique_ptr<planner::AbstractPlan>> &children_plans, vector<unique_ptr<planner::AbstractPlan>> &children_plans,
vector<ExprMap> children_expr_map) { vector<ExprMap> children_expr_map,
int estimated_cardinality) {
required_props_ = move(required_props); required_props_ = move(required_props);
required_cols_ = move(required_cols); required_cols_ = move(required_cols);
output_cols_ = move(output_cols); output_cols_ = move(output_cols);
children_plans_ = move(children_plans); children_plans_ = move(children_plans);
children_expr_map_ = move(children_expr_map); children_expr_map_ = move(children_expr_map);
op->Op().Accept(this); op->Op().Accept(this);
BuildProjectionPlan(); BuildProjectionPlan();
output_plan_->SetCardinality(estimated_cardinality);
return move(output_plan_); return move(output_plan_);
} }


Expand Down
6 changes: 2 additions & 4 deletions src/optimizer/stats/table_stats.cpp
Expand Up @@ -120,12 +120,10 @@ double TableStats::GetCardinality(const std::string column_name) {
return column_stats->cardinality; return column_stats->cardinality;
} }


// Returns true if we have column stats for a specific column
bool TableStats::HasColumnStats(const std::string col_name) { bool TableStats::HasColumnStats(const std::string col_name) {
auto it = col_name_to_stats_map_.find(col_name); auto it = col_name_to_stats_map_.find(col_name);
if (it == col_name_to_stats_map_.end()) { return it != col_name_to_stats_map_.end();
return false;
}
return true;
} }


std::shared_ptr<ColumnStats> TableStats::GetColumnStats( std::shared_ptr<ColumnStats> TableStats::GetColumnStats(
Expand Down
21 changes: 11 additions & 10 deletions src/optimizer/stats_calculator.cpp
Expand Up @@ -2,11 +2,11 @@
// //
// Peloton // Peloton
// //
// cost_and_stats_calculator.h // stats_calculator.cpp
// //
// Identification: src/optimizer/stats_calculator.cpp // Identification: src/optimizer/stats_calculator.cpp
// //
// Copyright (c) 2015-16, Carnegie Mellon University Database Group // Copyright (c) 2015-2018, Carnegie Mellon University Database Group
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//


Expand Down Expand Up @@ -64,18 +64,19 @@ void StatsCalculator::Visit(const LogicalGet *op) {
AddBaseTableStats(col, table_stats, predicate_stats, false); AddBaseTableStats(col, table_stats, predicate_stats, false);
} }
} }
// Use predicates to update the stats accordingly // Use predicates to estimate cardinality. If we were unable to find any column stats from the catalog, default to 0
UpdateStatsForFilter( if (table_stats->GetColumnCount() == 0) {
table_stats->GetColumnCount() == 0 ? 0 : table_stats->num_rows, root_group->SetNumRows(0);
predicate_stats, op->predicates); } else {
root_group->SetNumRows(EstimateCardinalityForFilter(table_stats->num_rows, predicate_stats, op->predicates));
}
} }
// Add the stats to the group // Add the stats to the group
for (auto &column_name_stats_pair : required_stats) { for (auto &column_name_stats_pair : required_stats) {
auto &column_name = column_name_stats_pair.first; auto &column_name = column_name_stats_pair.first;
auto &column_stats = column_name_stats_pair.second; auto &column_stats = column_name_stats_pair.second;
column_stats->num_rows = root_group->GetNumRows(); column_stats->num_rows = root_group->GetNumRows();
memo_->GetGroupByID(gexpr_->GetGroupID()) root_group->AddStats(column_name, column_stats);
->AddStats(column_name, column_stats);
} }
} }


Expand Down Expand Up @@ -233,7 +234,7 @@ void StatsCalculator::AddBaseTableStats(
} }
} }


void StatsCalculator::UpdateStatsForFilter( size_t StatsCalculator::EstimateCardinalityForFilter(
size_t num_rows, size_t num_rows,
std::unordered_map<std::string, std::shared_ptr<ColumnStats>> std::unordered_map<std::string, std::shared_ptr<ColumnStats>>
&predicate_stats, &predicate_stats,
Expand All @@ -255,7 +256,7 @@ void StatsCalculator::UpdateStatsForFilter(
annotated_expr.expr.get()); annotated_expr.expr.get());
} }
// Update selectivity // Update selectivity
memo_->GetGroupByID(gexpr_->GetGroupID())->SetNumRows(num_rows * selectivity); return num_rows * selectivity;
} }


// Calculate the selectivity given the predicate and the stats of columns in the // Calculate the selectivity given the predicate and the stats of columns in the
Expand Down
5 changes: 3 additions & 2 deletions src/planner/abstract_plan.cpp
Expand Up @@ -6,7 +6,7 @@
// //
// Identification: src/planner/abstract_plan.cpp // Identification: src/planner/abstract_plan.cpp
// //
// Copyright (c) 2015-16, Carnegie Mellon University Database Group // Copyright (c) 2015-2018, Carnegie Mellon University Database Group
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//


Expand Down Expand Up @@ -50,7 +50,8 @@ std::ostream &operator<<(std::ostream &os, const AbstractPlan &plan) {
const std::string AbstractPlan::GetInfo() const { const std::string AbstractPlan::GetInfo() const {
std::ostringstream os; std::ostringstream os;
os << PlanNodeTypeToString(GetPlanNodeType()) os << PlanNodeTypeToString(GetPlanNodeType())
<< " [NumChildren=" << children_.size() << "]"; << " [NumChildren=" << children_.size() << "]"
<< " [Estimated Cardinality=" << GetCardinality() << "]";
return os.str(); return os.str();
} }


Expand Down
6 changes: 4 additions & 2 deletions test/CMakeLists.txt
Expand Up @@ -47,8 +47,9 @@ set(TESTING_UTIL_TXN ${PROJECT_SOURCE_DIR}/test/concurrency/testing_trans
set(TESTING_UTIL_STATS ${PROJECT_SOURCE_DIR}/test/statistics/testing_stats_util.cpp) set(TESTING_UTIL_STATS ${PROJECT_SOURCE_DIR}/test/statistics/testing_stats_util.cpp)
set(TESTING_UTIL_SQL ${PROJECT_SOURCE_DIR}/test/sql/testing_sql_util.cpp) set(TESTING_UTIL_SQL ${PROJECT_SOURCE_DIR}/test/sql/testing_sql_util.cpp)
set(TESTING_UTIL_INDEX ${PROJECT_SOURCE_DIR}/test/index/testing_index_util.cpp) set(TESTING_UTIL_INDEX ${PROJECT_SOURCE_DIR}/test/index/testing_index_util.cpp)
set(TESTING_UTIL_CODEGEN ${PROJECT_SOURCE_DIR}/test/codegen/testing_codegen_util.cpp) set(TESTING_UTIL_CODEGEN ${PROJECT_SOURCE_DIR}/test/codegen/testing_codegen_util.cpp)
set(TESTING_UTIL_FORECAST ${PROJECT_SOURCE_DIR}/test/brain/testing_forecast_util.cpp) set(TESTING_UTIL_FORECAST ${PROJECT_SOURCE_DIR}/test/brain/testing_forecast_util.cpp)
set(TESTING_UTIL_OPTIMIZER ${PROJECT_SOURCE_DIR}/test/optimizer/optimizer_test_util.cpp)


add_library(peloton-test-common EXCLUDE_FROM_ALL ${gmock_srcs} ${HARNESS} add_library(peloton-test-common EXCLUDE_FROM_ALL ${gmock_srcs} ${HARNESS}
${TESTING_UTIL_EXECUTOR} ${TESTING_UTIL_EXECUTOR}
Expand All @@ -60,6 +61,7 @@ add_library(peloton-test-common EXCLUDE_FROM_ALL ${gmock_srcs} ${HARNESS}
${TESTING_UTIL_SQL} ${TESTING_UTIL_SQL}
${TESTING_UTIL_CODEGEN} ${TESTING_UTIL_CODEGEN}
${TESTING_UTIL_FORECAST} ${TESTING_UTIL_FORECAST}
${TESTING_UTIL_OPTIMIZER}
) )


# --[ Add "make check" target # --[ Add "make check" target
Expand Down
45 changes: 45 additions & 0 deletions test/optimizer/cardinality_test.cpp
@@ -0,0 +1,45 @@
//===----------------------------------------------------------------------===//
//
// Peloton
//
// cardinality_test.cpp
//
// Identification: test/optimizer/cardinality_test.cpp
//
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

#include "optimizer_test_util.cpp"
#include <chrono>


namespace peloton {
namespace test {

class CardinalityTest : public OptimizerTestUtil {};

TEST_F(CardinalityTest, EstimatedCardinalityTest) {

const std::string test_table_name = "testtable";
const int num_rows = 10;
OptimizerTestUtil::CreateTable(test_table_name, num_rows);

auto plan = GeneratePlan("SELECT a from " + test_table_name + ";");

EXPECT_EQ(num_rows, plan->GetCardinality());
}

TEST_F(CardinalityTest, EstimatedCardinalityTestWithPredicate) {

const std::string test_table_name = "testtable";
const int num_rows = 10;
OptimizerTestUtil::CreateTable(test_table_name, num_rows);

auto plan = GeneratePlan("SELECT a from " + test_table_name + " WHERE " + "a < 10;");

EXPECT_GE(num_rows, plan->GetCardinality());
}

}
}

0 comments on commit 1fc8b55

Please sign in to comment.