Skip to content
This repository has been archived by the owner on Sep 27, 2019. It is now read-only.

Commit

Permalink
Merge f4d4e8f into 484d76d
Browse files Browse the repository at this point in the history
  • Loading branch information
newtoncx committed Apr 9, 2019
2 parents 484d76d + f4d4e8f commit c326654
Show file tree
Hide file tree
Showing 45 changed files with 1,666 additions and 619 deletions.
4 changes: 4 additions & 0 deletions src/include/common/internal_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1383,6 +1383,10 @@ enum class RuleType : uint32_t {
PULL_FILTER_THROUGH_MARK_JOIN,
PULL_FILTER_THROUGH_AGGREGATION,

// AST rewrite rules (logical -> logical)
// Removes ConstantValueExpression = ConstantValueExpression
COMP_EQUALITY_ELIMINATION,

// Place holder to generate number of rules compile time
NUM_RULES

Expand Down
183 changes: 183 additions & 0 deletions src/include/optimizer/absexpr_expression.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
//===----------------------------------------------------------------------===//
//
// Peloton
//
// absexpr_expression.h
//
// Identification: src/include/optimizer/absexpr_expression.h
//
//===----------------------------------------------------------------------===//

#pragma once

// AbstractExpression Definition
#include "expression/abstract_expression.h"
#include "expression/conjunction_expression.h"
#include "expression/comparison_expression.h"
#include "expression/constant_value_expression.h"

#include <memory>
#include <vector>

namespace peloton {
namespace optimizer {

// (TODO): rethink the AbsExpr_Container/Expression approach in comparion to abstract
// Most of the core rule/optimizer code relies on the concept of an Operator /
// OperatorExpression and the interface that the two functions respectively expose.
//
// The annoying part is that an AbstractExpression blends together an Operator
// and OperatorExpression. Second part, the AbstractExpression does not export the
// correct interface that the rest of the system depends on.
//
// As an extreme level of simplification (sort of hacky), an AbsExpr_Container is
// analogous to Operator and wraps a single AbstractExpression node. AbsExpr_Expression
// is analogous to OperatorExpression.
//
// AbsExpr_Container does *not* handle memory correctly w.r.t internal instantiations
// from Rule transformation. This is since Peloton itself mixes unique_ptrs and
// hands out raw pointers which makes adding a shared_ptr here extremely problematic.
// terrier uses only shared_ptr when dealing with AbstractExpression trees.

class AbsExpr_Container {
public:
AbsExpr_Container();

AbsExpr_Container(const expression::AbstractExpression *expr) {
node = expr;
}

// Return operator type
ExpressionType GetType() const {
if (IsDefined()) {
return node->GetExpressionType();
}
return ExpressionType::INVALID;
}

const expression::AbstractExpression *GetExpr() const {
return node;
}

// Operator contains Logical node
bool IsLogical() const {
return true;
}

// Operator contains Physical node
bool IsPhysical() const {
return false;
}

std::string GetName() const {
if (IsDefined()) {
return node->GetExpressionName();
}

return "Undefined";
}

hash_t Hash() const {
if (IsDefined()) {
return node->Hash();
}
return 0;
}

bool operator==(const AbsExpr_Container &r) {
if (IsDefined() && r.IsDefined()) {
// (TODO): need a better way to determine deep equality

// NOTE:
// Without proper equality determinations, the groups will
// not be assigned correctly. Arguably, terrier does this
// better because a blind ExactlyEquals on different types
// of ConstantValueExpression under Peloton will crash!

// For now, just return (false).
// I don't anticipate this will affect correctness, just
// performance, since duplicate trees will have to evaluated
// over and over again, rather than being able to "borrow"
// a previous tree's rewrite.
//
// Probably not worth to create a "validator" since porting
// this to terrier anyways (?). == does not check Value
// so it's broken. ExactlyEqual requires precondition checking.
return false;
} else if (!IsDefined() && !r.IsDefined()) {
return true;
}
return false;
}

// Operator contains physical or logical operator node
bool IsDefined() const {
return node != nullptr;
}

//(TODO): fix memory management once go to terrier
expression::AbstractExpression *Rebuild(std::vector<expression::AbstractExpression*> children) {
switch (GetType()) {
case ExpressionType::COMPARE_EQUAL:
case ExpressionType::COMPARE_NOTEQUAL:
case ExpressionType::COMPARE_LESSTHAN:
case ExpressionType::COMPARE_GREATERTHAN:
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
case ExpressionType::COMPARE_LIKE:
case ExpressionType::COMPARE_NOTLIKE:
case ExpressionType::COMPARE_IN:
case ExpressionType::COMPARE_DISTINCT_FROM: {
PELOTON_ASSERT(children.size() == 2);
return new expression::ComparisonExpression(GetType(), children[0], children[1]);
}
case ExpressionType::CONJUNCTION_AND:
case ExpressionType::CONJUNCTION_OR: {
PELOTON_ASSERT(children.size() == 2);
return new expression::ConjunctionExpression(GetType(), children[0], children[1]);
}
case ExpressionType::VALUE_CONSTANT: {
PELOTON_ASSERT(children.size() == 0);
auto cve = static_cast<const expression::ConstantValueExpression*>(node);
return new expression::ConstantValueExpression(cve->GetValue());
}
default: {
int type = static_cast<int>(GetType());
LOG_ERROR("Unimplemented Rebuild() for %d found", type);
return nullptr;
}
}
}

private:
const expression::AbstractExpression *node;
};

class AbsExpr_Expression {
public:
AbsExpr_Expression(AbsExpr_Container op): op(op) {};

void PushChild(std::shared_ptr<AbsExpr_Expression> op) {
children.push_back(op);
}

void PopChild() {
children.pop_back();
}

const std::vector<std::shared_ptr<AbsExpr_Expression>> &Children() const {
return children;
}

const AbsExpr_Container &Op() const {
return op;
}

private:
AbsExpr_Container op;
std::vector<std::shared_ptr<AbsExpr_Expression>> children;
};

} // namespace optimizer
} // namespace peloton

48 changes: 28 additions & 20 deletions src/include/optimizer/binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,63 +24,71 @@ namespace peloton {
namespace optimizer {

class Optimizer;

template <class Node, class OperatorType, class OperatorExpr>
class Memo;

//===--------------------------------------------------------------------===//
// Binding Iterator
//===--------------------------------------------------------------------===//
template <class Node, class OperatorType, class OperatorExpr>
class BindingIterator {
public:
BindingIterator(Memo& memo) : memo_(memo) {}
BindingIterator(Memo<Node,OperatorType,OperatorExpr>& memo) : memo_(memo) {}

virtual ~BindingIterator(){};

virtual bool HasNext() = 0;

virtual std::shared_ptr<OperatorExpression> Next() = 0;
virtual std::shared_ptr<OperatorExpr> Next() = 0;

protected:
Memo &memo_;
Memo<Node,OperatorType,OperatorExpr> &memo_;
};

class GroupBindingIterator : public BindingIterator {
template <class Node, class OperatorType, class OperatorExpr>
class GroupBindingIterator : public BindingIterator<Node,OperatorType,OperatorExpr> {
public:
GroupBindingIterator(Memo& memo, GroupID id,
std::shared_ptr<Pattern> pattern);
GroupBindingIterator(Memo<Node,OperatorType,OperatorExpr>& memo,
GroupID id,
std::shared_ptr<Pattern<OperatorType>> pattern);

bool HasNext() override;

std::shared_ptr<OperatorExpression> Next() override;
std::shared_ptr<OperatorExpr> Next() override;

private:
GroupID group_id_;
std::shared_ptr<Pattern> pattern_;
Group *target_group_;
std::shared_ptr<Pattern<OperatorType>> pattern_;
Group<Node,OperatorType,OperatorExpr> *target_group_;
size_t num_group_items_;

// Internal function for HasNext()
bool HasNextBinding();

size_t current_item_index_;
std::unique_ptr<BindingIterator> current_iterator_;
std::unique_ptr<BindingIterator<Node,OperatorType,OperatorExpr>> current_iterator_;
};

class GroupExprBindingIterator : public BindingIterator {
template <class Node, class OperatorType, class OperatorExpr>
class GroupExprBindingIterator : public BindingIterator<Node,OperatorType,OperatorExpr> {
public:
GroupExprBindingIterator(Memo& memo,
GroupExpression *gexpr,
std::shared_ptr<Pattern> pattern);
GroupExprBindingIterator(Memo<Node,OperatorType,OperatorExpr>& memo,
GroupExpression<Node,OperatorType,OperatorExpr> *gexpr,
std::shared_ptr<Pattern<OperatorType>> pattern);

bool HasNext() override;

std::shared_ptr<OperatorExpression> Next() override;
std::shared_ptr<OperatorExpr> Next() override;

private:
GroupExpression* gexpr_;
std::shared_ptr<Pattern> pattern_;
GroupExpression<Node,OperatorType,OperatorExpr>* gexpr_;
std::shared_ptr<Pattern<OperatorType>> pattern_;

bool first_;
bool has_next_;
std::shared_ptr<OperatorExpression> current_binding_;
std::vector<std::vector<std::shared_ptr<OperatorExpression>>>
children_bindings_;
std::shared_ptr<OperatorExpr> current_binding_;
std::vector<std::vector<std::shared_ptr<OperatorExpr>>> children_bindings_;
std::vector<size_t> children_bindings_pos_;
};

Expand Down
12 changes: 8 additions & 4 deletions src/include/optimizer/child_property_deriver.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
#pragma once
#include <memory>
#include "optimizer/operator_visitor.h"
#include "optimizer/operator_expression.h"

namespace peloton {

namespace optimizer {
template <class Node, class OpType, class OperatorExpr>
class Memo;
}

Expand All @@ -33,8 +35,10 @@ class ChildPropertyDeriver : public OperatorVisitor {
public:
std::vector<std::pair<std::shared_ptr<PropertySet>,
std::vector<std::shared_ptr<PropertySet>>>>
GetProperties(GroupExpression *gexpr,
std::shared_ptr<PropertySet> requirements, Memo *memo);

GetProperties(GroupExpression<Operator,OpType,OperatorExpression> *gexpr,
std::shared_ptr<PropertySet> requirements,
Memo<Operator,OpType,OperatorExpression> *memo);

void Visit(const DummyScan *) override;
void Visit(const PhysicalSeqScan *) override;
Expand Down Expand Up @@ -74,8 +78,8 @@ class ChildPropertyDeriver : public OperatorVisitor {
* @brief We need the memo and gexpr because some property may depend on
* child's schema
*/
Memo *memo_;
GroupExpression *gexpr_;
Memo<Operator,OpType,OperatorExpression> *memo_;
GroupExpression<Operator,OpType,OperatorExpression> *gexpr_;
};

} // namespace optimizer
Expand Down
5 changes: 4 additions & 1 deletion src/include/optimizer/cost_model/abstract_cost_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
#pragma once

#include "optimizer/operator_visitor.h"
#include "optimizer/operator_expression.h"

namespace peloton {
namespace optimizer {

template <class Node, class OperatorType, class OperatorExpr>
class Memo;

// Default cost when cost model cannot compute correct cost.
Expand All @@ -34,7 +36,8 @@ static constexpr double DEFAULT_OPERATOR_COST = 0.0025;

class AbstractCostModel : public OperatorVisitor {
public:
virtual double CalculateCost(GroupExpression *gexpr, Memo *memo,
virtual double CalculateCost(GroupExpression<Operator,OpType,OperatorExpression> *gexpr,
Memo<Operator,OpType,OperatorExpression> *memo,
concurrency::TransactionContext *txn) = 0;
};

Expand Down
11 changes: 7 additions & 4 deletions src/include/optimizer/cost_model/default_cost_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@
namespace peloton {
namespace optimizer {

template <class Node, class OperatorType, class OperatorExpr>
class Memo;

// Derive cost for a physical group expression
class DefaultCostModel : public AbstractCostModel {
public:
DefaultCostModel(){};

double CalculateCost(GroupExpression *gexpr, Memo *memo,
concurrency::TransactionContext *txn) {
double CalculateCost(GroupExpression<Operator,OpType,OperatorExpression> *gexpr,
Memo<Operator,OpType,OperatorExpression> *memo,
concurrency::TransactionContext *txn) {
gexpr_ = gexpr;
memo_ = memo;
txn_ = txn;
Expand Down Expand Up @@ -151,8 +154,8 @@ class DefaultCostModel : public AbstractCostModel {
return child_num_rows * DEFAULT_TUPLE_COST;
}

GroupExpression *gexpr_;
Memo *memo_;
GroupExpression<Operator,OpType,OperatorExpression> *gexpr_;
Memo<Operator,OpType,OperatorExpression> *memo_;
concurrency::TransactionContext *txn_;
double output_cost_ = 0;
};
Expand Down

0 comments on commit c326654

Please sign in to comment.