Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix getVariableColumns() for TwoColumnJoin fixes #256 #259

Merged
merged 2 commits into from
Jun 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
77 changes: 47 additions & 30 deletions src/engine/TwoColumnJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,32 +99,24 @@ void TwoColumnJoin::computeResult(ResultTable* result) {
(_right->getResultWidth() == 2 && _jc1Right == 0 && _jc2Right == 1)) {
bool rightFilter =
(_right->getResultWidth() == 2 && _jc1Right == 0 && _jc2Right == 1);
const auto& v = rightFilter ? _left : _right;
const auto leftResult = _left->getResult();
const auto rightResult = _right->getResult();
const auto& toFilter = rightFilter ? leftResult : rightResult;
RuntimeInformation& runtimeInfo = getRuntimeInfo();
runtimeInfo.addChild(_left->getRootOperation()->getRuntimeInfo());
runtimeInfo.addChild(_right->getRootOperation()->getRuntimeInfo());
const IdTable& filter =
rightFilter ? rightResult->_data : leftResult->_data;
size_t jc1 = rightFilter ? _jc1Left : _jc1Right;
size_t jc2 = rightFilter ? _jc2Left : _jc2Right;
result->_sortedBy = {jc1};
result->_data.setCols(v->getResultWidth());
result->_sortedBy = {jc1, jc2};
result->_data.setCols(toFilter->_data.cols());
result->_resultTypes.reserve(result->_data.cols());
result->_resultTypes.insert(result->_resultTypes.end(),
leftResult->_resultTypes.begin(),
leftResult->_resultTypes.end());
for (size_t col = 0; col < rightResult->_data.cols(); col++) {
if (col != _jc1Right && col != _jc2Right) {
result->_resultTypes.push_back(rightResult->_resultTypes[col]);
}
}

toFilter->_resultTypes.begin(),
toFilter->_resultTypes.end());
AD_CHECK_GE(result->_data.cols(), 2);

const auto& toFilter = rightFilter ? leftResult : rightResult;

int inWidth = toFilter->_data.cols();
int filterWidth = filter.cols();
CALL_FIXED_SIZE_2(inWidth, filterWidth, getEngine().filter, toFilter->_data,
Expand All @@ -140,26 +132,41 @@ void TwoColumnJoin::computeResult(ResultTable* result) {

// _____________________________________________________________________________
ad_utility::HashMap<string, size_t> TwoColumnJoin::getVariableColumns() const {
ad_utility::HashMap<string, size_t> retVal(_left->getVariableColumns());
size_t leftSize = _left->getResultWidth();
const auto& rightVarCols = _right->getVariableColumns();
for (const auto& rightVarCol : rightVarCols) {
if (rightVarCol.second < _jc1Right) {
if (rightVarCol.second < _jc2Right) {
retVal[rightVarCol.first] = leftSize + rightVarCol.second;
} else if (rightVarCol.second > _jc2Right) {
retVal[rightVarCol.first] = leftSize + rightVarCol.second - 1;
if ((_left->getResultWidth() == 2 && _jc1Left == 0 && _jc2Left == 1) ||
(_right->getResultWidth() == 2 && _jc1Right == 0 && _jc2Right == 1)) {
// This is for the implemented filter case from computeResult()
bool rightFilter =
(_right->getResultWidth() == 2 && _jc1Right == 0 && _jc2Right == 1);
const auto& toFilter = rightFilter ? _left : _right;
return toFilter->getVariableColumns();
} else {
// NOTE: While one can get the variable mapping of this else
// case it is currently not implemented (see computeResult())
// Still if we don't have this mapping the query optimizer thinks
// that variables are missing from the query if it tries to use
// TwoColumnJoin in the unsupported (super expensive variant)
// it then gives up and we don't find a working alternative.
ad_utility::HashMap<string, size_t> retVal(_left->getVariableColumns());
size_t leftSize = _left->getResultWidth();
const auto& rightVarCols = _right->getVariableColumns();
for (const auto& rightVarCol : rightVarCols) {
if (rightVarCol.second < _jc1Right) {
if (rightVarCol.second < _jc2Right) {
retVal[rightVarCol.first] = leftSize + rightVarCol.second;
} else if (rightVarCol.second > _jc2Right) {
retVal[rightVarCol.first] = leftSize + rightVarCol.second - 1;
}
}
}
if (rightVarCol.second > _jc1Right) {
if (rightVarCol.second < _jc2Right) {
retVal[rightVarCol.first] = leftSize + rightVarCol.second - 1;
} else if (rightVarCol.second > _jc2Right) {
retVal[rightVarCol.first] = leftSize + rightVarCol.second - 2;
if (rightVarCol.second > _jc1Right) {
if (rightVarCol.second < _jc2Right) {
retVal[rightVarCol.first] = leftSize + rightVarCol.second - 1;
} else if (rightVarCol.second > _jc2Right) {
retVal[rightVarCol.first] = leftSize + rightVarCol.second - 2;
}
}
}
return retVal;
}
return retVal;
}

// _____________________________________________________________________________
Expand All @@ -171,7 +178,17 @@ size_t TwoColumnJoin::getResultWidth() const {

// _____________________________________________________________________________
vector<size_t> TwoColumnJoin::resultSortedOn() const {
return {_jc1Left, _jc2Left};
bool rightFilter =
(_right->getResultWidth() == 2 && _jc1Right == 0 && _jc2Right == 1);
vector<size_t> sortedOn(2);
if (rightFilter) {
sortedOn[0] = _jc1Left;
sortedOn[1] = _jc2Left;
} else {
sortedOn[0] = _jc1Right;
sortedOn[1] = _jc2Right;
}
return sortedOn;
}

// _____________________________________________________________________________
Expand Down
5 changes: 4 additions & 1 deletion src/engine/TwoColumnJoin.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ class TwoColumnJoin : public Operation {
return _left->getSizeEstimate() + _left->getCostEstimate() +
_right->getSizeEstimate() + _right->getCostEstimate();
}
// PUNISH IF NO DIRECT JOIN IS AVAILABLE FOR FILTER
// The case where the above condition does not hold is currently
// not implemented so really don't use it!
// Important: The / 1000000 prevents overflow
// TODO(schnelle) this is pretty fragile
return std::numeric_limits<size_t>::max() / 1000000;
}

Expand Down