-
Notifications
You must be signed in to change notification settings - Fork 37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Minus #340
Minus #340
Changes from 15 commits
6064447
66274a5
fd630ae
5a51e14
303d321
8f33bd7
8b11c0e
fc34aaa
fe25ef6
aac286f
149c1d6
532d17b
9b34c48
db5c96f
387c563
6c6543f
4ca96a7
9337b9a
aa3100b
d6c7f36
c11f208
8f2d24e
c8a40d5
c94c622
b790137
d296ea1
d7dacb1
46fd433
642a6fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
// Copyright 2018, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Florian Kramer (florian.kramer@netpun.uni-freiburg.de) | ||
|
||
#include "Minus.h" | ||
|
||
#include "../util/Exception.h" | ||
#include "CallFixedSize.h" | ||
|
||
using std::string; | ||
|
||
// _____________________________________________________________________________ | ||
Minus::Minus(QueryExecutionContext* qec, | ||
std::shared_ptr<QueryExecutionTree> left, | ||
std::shared_ptr<QueryExecutionTree> right, | ||
const std::vector<array<size_t, 2>>& matchedColumns) | ||
: Operation(qec), | ||
_left(left), | ||
_right(right), | ||
_multiplicitiesComputed(false), | ||
_matchedColumns(matchedColumns) {} | ||
joka921 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// _____________________________________________________________________________ | ||
string Minus::asString(size_t indent) const { | ||
std::ostringstream os; | ||
for (size_t i = 0; i < indent; ++i) { | ||
os << " "; | ||
} | ||
os << "MINUS\n" << _left->asString(indent) << "\n"; | ||
os << _right->asString(indent) << " "; | ||
return os.str(); | ||
} | ||
|
||
// _____________________________________________________________________________ | ||
string Minus::getDescriptor() const { return "Minus"; } | ||
|
||
// _____________________________________________________________________________ | ||
void Minus::computeResult(ResultTable* result) { | ||
AD_CHECK(result); | ||
LOG(DEBUG) << "Minus result computation..." << endl; | ||
|
||
RuntimeInformation& runtimeInfo = getRuntimeInfo(); | ||
result->_sortedBy = resultSortedOn(); | ||
result->_data.setCols(getResultWidth()); | ||
|
||
const auto leftResult = _left->getResult(); | ||
const auto rightResult = _right->getResult(); | ||
|
||
runtimeInfo.addChild(_left->getRootOperation()->getRuntimeInfo()); | ||
runtimeInfo.addChild(_right->getRootOperation()->getRuntimeInfo()); | ||
|
||
LOG(DEBUG) << "Minus subresult computation done." << std::endl; | ||
|
||
// We have the same output columns as the left input, so we also | ||
// have the same output column types. | ||
result->_resultTypes = leftResult->_resultTypes; | ||
|
||
LOG(DEBUG) << "Computing minus of results of size " << leftResult->size() | ||
<< " and " << rightResult->size() << endl; | ||
|
||
int leftWidth = leftResult->_data.cols(); | ||
int rightWidth = rightResult->_data.cols(); | ||
CALL_FIXED_SIZE_2(leftWidth, rightWidth, computeMinus, leftResult->_data, | ||
rightResult->_data, _matchedColumns, &result->_data); | ||
LOG(DEBUG) << "Minus result computation done." << endl; | ||
} | ||
|
||
// _____________________________________________________________________________ | ||
ad_utility::HashMap<string, size_t> Minus::getVariableColumns() const { | ||
return _left->getVariableColumns(); | ||
} | ||
|
||
// _____________________________________________________________________________ | ||
size_t Minus::getResultWidth() const { return _left->getResultWidth(); } | ||
|
||
// _____________________________________________________________________________ | ||
vector<size_t> Minus::resultSortedOn() const { | ||
std::vector<size_t> sortedOn; | ||
// The result is sorted on all join columns from the left subtree. | ||
for (const auto& a : _matchedColumns) { | ||
sortedOn.push_back(a[0]); | ||
} | ||
return sortedOn; | ||
} | ||
joka921 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// _____________________________________________________________________________ | ||
float Minus::getMultiplicity(size_t col) { | ||
// This is an upper bound on the multiplicity as an arbitrary number | ||
// of rows might be deleted in this operation. | ||
return _left->getMultiplicity(col); | ||
} | ||
|
||
// _____________________________________________________________________________ | ||
size_t Minus::getSizeEstimate() { | ||
// This is an upper bound on the size as an arbitrary number | ||
// of rows might be deleted in this operation. | ||
return _left->getSizeEstimate(); | ||
} | ||
|
||
// _____________________________________________________________________________ | ||
size_t Minus::getCostEstimate() { | ||
size_t costEstimate = _left->getSizeEstimate() + _right->getSizeEstimate(); | ||
return _left->getCostEstimate() + _right->getCostEstimate() + costEstimate; | ||
} | ||
|
||
// _____________________________________________________________________________ | ||
template <int A_WIDTH, int B_WIDTH> | ||
void Minus::computeMinus(const IdTable& dynA, const IdTable& dynB, | ||
const vector<array<Id, 2>>& joinColumns, | ||
IdTable* dynResult) { | ||
// Substract dynB from dynA. The result should be all result mappings mu | ||
// for which all result mappings mu' in dynB are not compatible (one value | ||
// for a variable defined in both differs) or the domain of mu and mu' are | ||
// disjoint (mu' defines no solution for any variables for which mu defines a | ||
// solution). | ||
|
||
// The output is always the same size as the left input | ||
constexpr int OUT_WIDTH = A_WIDTH; | ||
|
||
// check for trivial cases | ||
if (dynA.size() == 0) { | ||
return; | ||
} | ||
|
||
if (dynB.size() == 0 || joinColumns.size() == 0) { | ||
// B is the empty set of solution mappings, so the result is A | ||
// Copy a into the result, allowing for optimizations for small width by | ||
// using the templated width types. | ||
// TODO<joka921> Check, if this direct assignment is efficient | ||
// (It should be, because it internally copies a std::vector<char> via | ||
// memcpy | ||
*dynResult = dynA; | ||
floriankramer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return; | ||
} | ||
joka921 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
IdTableView<A_WIDTH> a = dynA.asStaticView<A_WIDTH>(); | ||
IdTableView<B_WIDTH> b = dynB.asStaticView<B_WIDTH>(); | ||
IdTableStatic<OUT_WIDTH> result = dynResult->moveToStatic<OUT_WIDTH>(); | ||
|
||
std::vector<size_t> rightToLeftCols(b.cols(), | ||
std::numeric_limits<size_t>::max()); | ||
for (const auto& jc : joinColumns) { | ||
rightToLeftCols[jc[1]] = jc[0]; | ||
} | ||
|
||
/** | ||
* @brief A function to copy a row from a to the end of result. | ||
* @param ia The index of the row in a. | ||
*/ | ||
auto writeResult = [&result, &a](size_t ia) { | ||
result.emplace_back(); | ||
size_t backIdx = result.size() - 1; | ||
for (size_t col = 0; col < a.cols(); col++) { | ||
result(backIdx, col) = a(ia, col); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't we have efficient result.push_back(a(ia)) in the |
||
} | ||
}; | ||
|
||
size_t ia = 0, ib = 0; | ||
while (ia < a.size() && ib < b.size()) { | ||
// Join columns 0 are the primary sort columns | ||
while (a(ia, joinColumns[0][0]) < b(ib, joinColumns[0][1])) { | ||
// Write a result | ||
writeResult(ia); | ||
ia++; | ||
if (ia >= a.size()) { | ||
goto finish; | ||
} | ||
} | ||
while (b(ib, joinColumns[0][1]) < a(ia, joinColumns[0][0])) { | ||
ib++; | ||
if (ib >= b.size()) { | ||
goto finish; | ||
} | ||
} | ||
|
||
while (b(ib, joinColumns[0][1]) == a(ia, joinColumns[0][0])) { | ||
// check if the rest of the join columns also match | ||
RowComparison rowEq = isRowEqSkipFirst(a, b, ia, ib, joinColumns); | ||
switch (rowEq) { | ||
case RowComparison::EQUAL: { | ||
ia++; | ||
if (ia >= a.size()) { | ||
goto finish; | ||
} | ||
} break; | ||
case RowComparison::LEFT_SMALLER: { | ||
// ib does not discard ia, and there can not be another ib that | ||
// would discard ia. | ||
writeResult(ia); | ||
ia++; | ||
if (ia >= a.size()) { | ||
goto finish; | ||
} | ||
} break; | ||
case RowComparison::RIGHT_SMALLER: { | ||
ib++; | ||
if (ib >= b.size()) { | ||
goto finish; | ||
} | ||
} break; | ||
default: | ||
AD_CHECK(false); | ||
} | ||
} | ||
} | ||
finish: | ||
result.reserve(result.size() + (a.size() - ia)); | ||
while (ia < a.size()) { | ||
writeResult(ia); | ||
ia++; | ||
} | ||
floriankramer marked this conversation as resolved.
Show resolved
Hide resolved
Comment on lines
+207
to
+211
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't the |
||
*dynResult = result.moveToDynamic(); | ||
} | ||
|
||
template <int A_WIDTH, int B_WIDTH> | ||
Minus::RowComparison Minus::isRowEqSkipFirst( | ||
const IdTableView<A_WIDTH>& a, const IdTableView<B_WIDTH>& b, size_t ia, | ||
size_t ib, const vector<array<size_t, 2>>& joinColumns) { | ||
for (size_t i = 1; i < joinColumns.size(); ++i) { | ||
Id va{a(ia, joinColumns[i][0])}; | ||
Id vb{b(ib, joinColumns[i][1])}; | ||
if (va < vb) { | ||
return RowComparison::LEFT_SMALLER; | ||
} | ||
if (va > vb) { | ||
return RowComparison::RIGHT_SMALLER; | ||
} | ||
} | ||
return RowComparison::EQUAL; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
// Copyright 2018, University of Freiburg, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am sure that this should be 2020 or 2021:) |
||
// Chair of Algorithms and Data Structures. | ||
// Author: Florian Kramer (florian.kramer@netpun.uni-freiburg.de) | ||
#pragma once | ||
|
||
#include <array> | ||
#include <vector> | ||
|
||
#include "./Operation.h" | ||
#include "./QueryExecutionTree.h" | ||
|
||
class Minus : public Operation { | ||
enum class RowComparison { EQUAL, LEFT_SMALLER, RIGHT_SMALLER }; | ||
|
||
public: | ||
Minus(QueryExecutionContext* qec, std::shared_ptr<QueryExecutionTree> left, | ||
std::shared_ptr<QueryExecutionTree> right, | ||
const std::vector<array<size_t, 2>>& matchedColumns); | ||
|
||
virtual string asString(size_t indent = 0) const override; | ||
|
||
virtual string getDescriptor() const override; | ||
|
||
virtual size_t getResultWidth() const override; | ||
|
||
virtual vector<size_t> resultSortedOn() const override; | ||
|
||
ad_utility::HashMap<string, size_t> getVariableColumns() const; | ||
|
||
virtual void setTextLimit(size_t limit) override { | ||
_left->setTextLimit(limit); | ||
_right->setTextLimit(limit); | ||
} | ||
|
||
virtual bool knownEmptyResult() override { return _left->knownEmptyResult(); } | ||
|
||
virtual float getMultiplicity(size_t col) override; | ||
|
||
virtual size_t getSizeEstimate() override; | ||
|
||
virtual size_t getCostEstimate() override; | ||
|
||
vector<QueryExecutionTree*> getChildren() override { | ||
return {_left.get(), _right.get()}; | ||
} | ||
|
||
/** | ||
* @brief Joins a and b using the column defined int joinColumns, storing the | ||
* result in result. R should have width resultWidth (or be a vector | ||
* that should have resultWidth entries). | ||
* This method is made public here for unit testing purposes. | ||
**/ | ||
template <int A_WIDTH, int B_WIDTH> | ||
static void computeMinus(const IdTable& a, const IdTable& b, | ||
const vector<array<size_t, 2>>& matchedColumns, | ||
IdTable* result); | ||
|
||
private: | ||
/** | ||
* @brief Compares the two rows under the assumption that the first | ||
* entries of the rows are equal. | ||
*/ | ||
joka921 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
template <int A_WIDTH, int B_WIDTH> | ||
static RowComparison isRowEqSkipFirst( | ||
const IdTableView<A_WIDTH>& a, const IdTableView<B_WIDTH>& b, size_t ia, | ||
size_t ib, const vector<array<size_t, 2>>& matchedColumns); | ||
|
||
virtual void computeResult(ResultTable* result) override; | ||
|
||
std::shared_ptr<QueryExecutionTree> _left; | ||
std::shared_ptr<QueryExecutionTree> _right; | ||
|
||
vector<float> _multiplicities; | ||
size_t _sizeEstimate; | ||
bool _multiplicitiesComputed; | ||
|
||
std::vector<array<size_t, 2>> _matchedColumns; | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ADD Some more MINUS tests.