-
Notifications
You must be signed in to change notification settings - Fork 37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
First draft of a timeout for operations #346
Changes from 5 commits
4c1b6a4
8c35ef4
0f63f0d
302f755
e1d4ebf
4e4387d
b5f72be
9224e26
51b9b11
0965537
9dc43f2
010aaf0
909c896
9a6c3a7
553af99
9109715
daec136
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -174,19 +174,27 @@ struct resizeIfVec<vector<C>, C> { | |
* its already allocated storage. | ||
*/ | ||
template <int IN_WIDTH, int OUT_WIDTH> | ||
void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | ||
size_t blockEnd, const IdTableView<IN_WIDTH>& input, | ||
const vector<ResultTable::ResultType>& inputTypes, | ||
IdTableStatic<OUT_WIDTH>* result, size_t resultRow, | ||
const ResultTable* inTable, ResultTable* outTable, | ||
const Index& index, | ||
ad_utility::HashSet<size_t>& distinctHashSet) { | ||
|
||
void GroupBy::processGroup(const GroupBy::Aggregate& a, size_t blockStart, | ||
size_t blockEnd, const IdTableView<IN_WIDTH>& input, | ||
|
||
const vector<ResultTable::ResultType>& inputTypes, | ||
IdTableStatic<OUT_WIDTH>* result, size_t resultRow, | ||
const ResultTable* inTable, ResultTable* outTable, | ||
const Index& index, | ||
ad_utility::HashSet<size_t>& distinctHashSet) const { | ||
auto check = [this](size_t i) { | ||
if (i % 32768 == 0) { | ||
checkTimeout(); | ||
} | ||
}; | ||
switch (a._type) { | ||
case ParsedQuery::AggregateType::AVG: { | ||
float res = 0; | ||
if (inputTypes[a._inCol] == ResultTable::ResultType::VERBATIM) { | ||
if (a._distinct) { | ||
for (size_t i = blockStart; i <= blockEnd; i++) { | ||
check(i); | ||
const auto it = distinctHashSet.find(input(i, a._inCol)); | ||
if (it == distinctHashSet.end()) { | ||
distinctHashSet.insert(input(i, a._inCol)); | ||
|
@@ -196,6 +204,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
distinctHashSet.clear(); | ||
} else { | ||
for (size_t i = blockStart; i <= blockEnd; i++) { | ||
check(i); | ||
res += input(i, a._inCol); | ||
} | ||
} | ||
|
@@ -204,6 +213,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
float tmpF; | ||
if (a._distinct) { | ||
for (size_t i = blockStart; i <= blockEnd; i++) { | ||
check(i); | ||
const auto it = distinctHashSet.find(input(i, a._inCol)); | ||
if (it == distinctHashSet.end()) { | ||
distinctHashSet.insert(input(i, a._inCol)); | ||
|
@@ -214,6 +224,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
distinctHashSet.clear(); | ||
} else { | ||
for (size_t i = blockStart; i <= blockEnd; i++) { | ||
check(i); | ||
std::memcpy(&tmpF, &input(i, a._inCol), sizeof(float)); | ||
res += tmpF; | ||
} | ||
|
@@ -224,6 +235,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
} else { | ||
if (a._distinct) { | ||
for (size_t i = blockStart; i <= blockEnd; i++) { | ||
check(i); | ||
const auto it = distinctHashSet.find(input(i, a._inCol)); | ||
if (it == distinctHashSet.end()) { | ||
distinctHashSet.insert(input(i, a._inCol)); | ||
|
@@ -242,6 +254,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
distinctHashSet.clear(); | ||
} else { | ||
for (size_t i = blockStart; i <= blockEnd; i++) { | ||
check(i); | ||
// load the string, parse it as an xsd::int or float | ||
// TODO(schnelle): What's the correct way to handle OPTIONAL here | ||
std::string entity = | ||
|
@@ -265,6 +278,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
if (a._distinct) { | ||
size_t count = 0; | ||
for (size_t i = blockStart; i <= blockEnd; i++) { | ||
check(i); | ||
const auto it = distinctHashSet.find(input(i, a._inCol)); | ||
if (it == distinctHashSet.end()) { | ||
count++; | ||
|
@@ -283,6 +297,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
if (inputTypes[a._inCol] == ResultTable::ResultType::VERBATIM) { | ||
if (a._distinct) { | ||
for (size_t i = blockStart; i + 1 <= blockEnd; i++) { | ||
check(i); | ||
const auto it = distinctHashSet.find(input(i, a._inCol)); | ||
if (it == distinctHashSet.end()) { | ||
distinctHashSet.insert(input(i, a._inCol)); | ||
|
@@ -296,6 +311,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
distinctHashSet.clear(); | ||
} else { | ||
for (size_t i = blockStart; i + 1 <= blockEnd; i++) { | ||
check(i); | ||
out << input(i, a._inCol) << *delim; | ||
} | ||
out << input(blockEnd, a._inCol); | ||
|
@@ -304,6 +320,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
float f; | ||
if (a._distinct) { | ||
for (size_t i = blockStart; i + 1 <= blockEnd; i++) { | ||
check(i); | ||
const auto it = distinctHashSet.find(input(i, a._inCol)); | ||
if (it == distinctHashSet.end()) { | ||
distinctHashSet.insert(input(i, a._inCol)); | ||
|
@@ -319,6 +336,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
distinctHashSet.clear(); | ||
} else { | ||
for (size_t i = blockStart; i + 1 <= blockEnd; i++) { | ||
check(i); | ||
std::memcpy(&f, &input(i, a._inCol), sizeof(float)); | ||
out << f << *delim; | ||
} | ||
|
@@ -328,6 +346,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
} else if (inputTypes[a._inCol] == ResultTable::ResultType::TEXT) { | ||
if (a._distinct) { | ||
for (size_t i = blockStart; i + 1 <= blockEnd; i++) { | ||
check(i); | ||
const auto it = distinctHashSet.find(input(i, a._inCol)); | ||
if (it == distinctHashSet.end()) { | ||
distinctHashSet.insert(input(i, a._inCol)); | ||
|
@@ -341,13 +360,15 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
distinctHashSet.clear(); | ||
} else { | ||
for (size_t i = blockStart; i + 1 <= blockEnd; i++) { | ||
check(i); | ||
out << index.getTextExcerpt(input(i, a._inCol)) << *delim; | ||
} | ||
out << index.getTextExcerpt(input(blockEnd, a._inCol)); | ||
} | ||
} else if (inputTypes[a._inCol] == ResultTable::ResultType::LOCAL_VOCAB) { | ||
if (a._distinct) { | ||
for (size_t i = blockStart; i + 1 <= blockEnd; i++) { | ||
check(i); | ||
const auto it = distinctHashSet.find(input(i, a._inCol)); | ||
if (it == distinctHashSet.end()) { | ||
distinctHashSet.insert(input(i, a._inCol)); | ||
|
@@ -366,6 +387,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
distinctHashSet.clear(); | ||
} else { | ||
for (size_t i = blockStart; i + 1 <= blockEnd; i++) { | ||
check(i); | ||
// TODO(schnelle): What's the correct way to handle OPTIONAL here | ||
out << inTable->idToOptionalString(input(i, a._inCol)).value_or("") | ||
<< *delim; | ||
|
@@ -376,6 +398,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
} else { | ||
if (a._distinct) { | ||
for (size_t i = blockStart; i + 1 <= blockEnd; i++) { | ||
check(i); | ||
const auto it = distinctHashSet.find(input(i, a._inCol)); | ||
if (it == distinctHashSet.end()) { | ||
distinctHashSet.insert(input(i, a._inCol)); | ||
|
@@ -405,6 +428,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
distinctHashSet.clear(); | ||
} else { | ||
for (size_t i = blockStart; i + 1 <= blockEnd; i++) { | ||
check(i); | ||
// TODO(schnelle): What's the correct way to handle OPTIONAL here | ||
std::string entity = | ||
index.idToOptionalString(input(i, a._inCol)).value_or(""); | ||
|
@@ -583,12 +607,12 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | |
} | ||
|
||
template <int IN_WIDTH, int OUT_WIDTH> | ||
void doGroupBy(const IdTable& dynInput, | ||
const vector<ResultTable::ResultType>& inputTypes, | ||
const vector<size_t>& groupByCols, | ||
const vector<GroupBy::Aggregate>& aggregates, IdTable* dynResult, | ||
const ResultTable* inTable, ResultTable* outTable, | ||
const Index& index) { | ||
void GroupBy::doGroupBy(const IdTable& dynInput, | ||
const vector<ResultTable::ResultType>& inputTypes, | ||
const vector<size_t>& groupByCols, | ||
const vector<GroupBy::Aggregate>& aggregates, | ||
IdTable* dynResult, const ResultTable* inTable, | ||
ResultTable* outTable, const Index& index) const { | ||
LOG(DEBUG) << "Group by input size " << dynInput.size() << std::endl; | ||
if (dynInput.size() == 0) { | ||
return; | ||
|
@@ -621,6 +645,9 @@ void doGroupBy(const IdTable& dynInput, | |
size_t blockStart = 0; | ||
size_t blockEnd = 0; | ||
for (size_t pos = 1; pos < input.size(); pos++) { | ||
if (pos % 32768 == 0) { | ||
checkTimeout(); | ||
} | ||
bool rowMatchesCurrentBlock = true; | ||
for (size_t i = 0; i < currentGroupBlock.size(); i++) { | ||
if (input(pos, currentGroupBlock[i].first) != | ||
|
@@ -764,16 +791,23 @@ void GroupBy::computeResult(ResultTable* result) { | |
|
||
int inWidth = subresult->_data.cols(); | ||
int outWidth = result->_data.cols(); | ||
CALL_FIXED_SIZE_2(inWidth, outWidth, doGroupBy, subresult->_data, | ||
inputResultTypes, groupByCols, aggregates, &result->_data, | ||
subresult.get(), result, getIndex()); | ||
|
||
// Free the user data used by GROUP_CONCAT aggregates. | ||
for (Aggregate& a : aggregates) { | ||
if (a._type == ParsedQuery::AggregateType::GROUP_CONCAT) { | ||
delete static_cast<std::string*>(a._userData); | ||
|
||
auto cleanup = [&]() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Specify variables in capture explicitly when it'a a short list and/or a short lambda? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general you are right. In this case I would argue that the default capture by reference is not problematic, because we do not pass the lambda outside, and only use it as a "copied code block", but with NEVER using default captures you agree with one of my favourite C++ authors, so I am totally convinced. |
||
// Free the user data used by GROUP_CONCAT aggregates. | ||
for (Aggregate& a : aggregates) { | ||
if (a._type == ParsedQuery::AggregateType::GROUP_CONCAT) { | ||
delete static_cast<std::string*>(a._userData); | ||
} | ||
joka921 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
}; | ||
try { | ||
CALL_FIXED_SIZE_2(inWidth, outWidth, doGroupBy, subresult->_data, | ||
inputResultTypes, groupByCols, aggregates, &result->_data, | ||
subresult.get(), result, getIndex()); | ||
} catch (...) { | ||
cleanup(); | ||
throw; | ||
} | ||
|
||
cleanup(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you briefly explain which case is handled by this which was not properly handled before? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As soon as we had a groupConcat operation (which requires manual cleanup of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (which I would do in a separate, small PR) |
||
LOG(DEBUG) << "GroupBy result computation done." << std::endl; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -98,13 +98,23 @@ class GroupBy : public Operation { | |
ad_utility::HashMap<string, size_t> _varColMap; | ||
|
||
virtual void computeResult(ResultTable* result) override; | ||
}; | ||
|
||
// This method is declared here solely for unit testing purposes | ||
template <int IN_WIDTH, int OUT_WIDTH> | ||
void doGroupBy(const IdTable& dynInput, | ||
const vector<ResultTable::ResultType>& inputTypes, | ||
const vector<size_t>& groupByCols, | ||
const vector<GroupBy::Aggregate>& aggregates, IdTable* dynResult, | ||
const ResultTable* inTable, ResultTable* outTable, | ||
const Index& index); | ||
template <int IN_WIDTH, int OUT_WIDTH> | ||
void processGroup(const GroupBy::Aggregate& a, size_t blockStart, | ||
size_t blockEnd, const IdTableView<IN_WIDTH>& input, | ||
const vector<ResultTable::ResultType>& inputTypes, | ||
IdTableStatic<OUT_WIDTH>* result, size_t resultRow, | ||
const ResultTable* inTable, ResultTable* outTable, | ||
const Index& index, | ||
ad_utility::HashSet<size_t>& distinctHashSet) const; | ||
Comment on lines
+103
to
+109
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks new, but processGroup was used in GroupBy.cpp before? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
template <int IN_WIDTH, int OUT_WIDTH> | ||
void doGroupBy(const IdTable& dynInput, | ||
const vector<ResultTable::ResultType>& inputTypes, | ||
const vector<size_t>& groupByCols, | ||
const vector<GroupBy::Aggregate>& aggregates, | ||
IdTable* dynResult, const ResultTable* inTable, | ||
ResultTable* outTable, const Index& index) const; | ||
|
||
FRIEND_TEST(GroupByTest, doGroupBy); | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -280,7 +280,7 @@ void IndexScan::computePSOfreeS(ResultTable* result) const { | |
result->_resultTypes.push_back(ResultTable::ResultType::KB); | ||
result->_sortedBy = {0, 1}; | ||
const auto& idx = _executionContext->getIndex(); | ||
idx.scan(_predicate, &result->_data, idx._PSO); | ||
idx.scan(_predicate, &result->_data, idx._PSO, _timeoutTimer); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this needed as an argument here (and in the following)? Does idx.scan not have access to _timeoutTimer? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, the The real question behind this is: |
||
} | ||
|
||
// _____________________________________________________________________________ | ||
|
@@ -299,7 +299,7 @@ void IndexScan::computePOSfreeO(ResultTable* result) const { | |
result->_resultTypes.push_back(ResultTable::ResultType::KB); | ||
result->_sortedBy = {0, 1}; | ||
const auto& idx = _executionContext->getIndex(); | ||
idx.scan(_predicate, &result->_data, idx._POS); | ||
idx.scan(_predicate, &result->_data, idx._POS, _timeoutTimer); | ||
} | ||
|
||
// _____________________________________________________________________________ | ||
|
@@ -338,7 +338,7 @@ void IndexScan::computeSPOfreeP(ResultTable* result) const { | |
result->_resultTypes.push_back(ResultTable::ResultType::KB); | ||
result->_sortedBy = {0, 1}; | ||
const auto& idx = _executionContext->getIndex(); | ||
idx.scan(_subject, &result->_data, idx._SPO); | ||
idx.scan(_subject, &result->_data, idx._SPO, _timeoutTimer); | ||
} | ||
|
||
// _____________________________________________________________________________ | ||
|
@@ -357,7 +357,7 @@ void IndexScan::computeSOPfreeO(ResultTable* result) const { | |
result->_resultTypes.push_back(ResultTable::ResultType::KB); | ||
result->_sortedBy = {0, 1}; | ||
const auto& idx = _executionContext->getIndex(); | ||
idx.scan(_subject, &result->_data, idx._SOP); | ||
idx.scan(_subject, &result->_data, idx._SOP, _timeoutTimer); | ||
} | ||
|
||
// _____________________________________________________________________________ | ||
|
@@ -377,7 +377,7 @@ void IndexScan::computeOSPfreeS(ResultTable* result) const { | |
result->_resultTypes.push_back(ResultTable::ResultType::KB); | ||
result->_sortedBy = {0, 1}; | ||
const auto& idx = _executionContext->getIndex(); | ||
idx.scan(_object, &result->_data, idx._OSP); | ||
idx.scan(_object, &result->_data, idx._OSP, _timeoutTimer); | ||
} | ||
|
||
// _____________________________________________________________________________ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,7 @@ Join::Join(QueryExecutionContext* qec, std::shared_ptr<QueryExecutionTree> t1, | |
size_t t2JoinCol, bool keepJoinColumn) | ||
: Operation(qec) { | ||
// Make sure subtrees are ordered so that identical queries can be identified. | ||
if (t1.get()->asString() < t2.get()->asString()) { | ||
if (t1 && t2 && t1.get()->asString() < t2.get()->asString()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Throw exception if t1 or t2 is null |
||
_left = t1; | ||
_leftJoinCol = t1JoinCol; | ||
_right = t2; | ||
|
@@ -336,6 +336,8 @@ void Join::doComputeJoinWithFullScanDummyLeft(const IdTable& ndr, | |
// Do a scan. | ||
LOG(TRACE) << "Inner scan with ID: " << currentJoinId << endl; | ||
IdTable jr(2, _executionContext->getAllocator()); | ||
checkTimeout(); // the scan is a disk operation, so we can check the | ||
|
||
joka921 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
scan(currentJoinId, &jr); | ||
LOG(TRACE) << "Got #items: " << jr.size() << endl; | ||
// Build the cross product. | ||
|
@@ -376,6 +378,8 @@ void Join::doComputeJoinWithFullScanDummyRight(const IdTable& ndr, | |
} else { | ||
// Do a scan. | ||
LOG(TRACE) << "Inner scan with ID: " << currentJoinId << endl; | ||
checkTimeout(); // the scan is a disk operation, so we can check the | ||
// timeout frequently | ||
IdTable jr(2, _executionContext->getAllocator()); | ||
scan(currentJoinId, &jr); | ||
LOG(TRACE) << "Got #items: " << jr.size() << endl; | ||
|
@@ -525,13 +529,19 @@ void Join::join(const IdTable& dynA, size_t jc1, const IdTable& dynB, | |
while (i < a.size() && j < b.size()) { | ||
while (a(i, jc1) < b(j, jc2)) { | ||
++i; | ||
if (i % (1024 * 16) == 0) { | ||
checkTimeout(); | ||
} | ||
if (i >= a.size()) { | ||
goto finish; | ||
} | ||
} | ||
|
||
while (b(j, jc2) < a(i, jc1)) { | ||
++j; | ||
if (j % (1024 * 16) == 0) { | ||
checkTimeout(); | ||
} | ||
joka921 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (j >= b.size()) { | ||
goto finish; | ||
} | ||
|
@@ -559,12 +569,18 @@ void Join::join(const IdTable& dynA, size_t jc1, const IdTable& dynB, | |
} | ||
|
||
++j; | ||
if (j % (1024 * 4) == 0) { | ||
checkTimeout(); | ||
} | ||
if (j >= b.size()) { | ||
// The next i might still match | ||
break; | ||
} | ||
} | ||
++i; | ||
if (i % (1024 * 4) == 0) { | ||
checkTimeout(); | ||
} | ||
if (i >= a.size()) { | ||
goto finish; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's with the two empty lines here?