Skip to content

Commit

Permalink
Fixed illegal optimizer rule on multiple consecutive SORTs
Browse files Browse the repository at this point in the history
  • Loading branch information
hkernbach committed Aug 8, 2022
1 parent 08f4f83 commit 65122ee
Show file tree
Hide file tree
Showing 4 changed files with 294 additions and 57 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG
@@ -1,6 +1,9 @@
devel
-----

* BTS-907: Fixed some rare SortNode related optimizer issues, when at least two
or more SortNodes appeared in the AQL execution plan.

* Updated arangosync to v2.12.0-preview-3.

* Added new AQL function `VALUE` capable of accessing object attribute by a
Expand Down
19 changes: 15 additions & 4 deletions arangod/Aql/OptimizerRules.cpp
Expand Up @@ -1346,6 +1346,7 @@ void arangodb::aql::removeRedundantSortsRule(
auto other =
ExecutionNode::castTo<SortNode*>(current)->getSortInformation();

bool canContinueSearch = true;
switch (sortInfo.isCoveredBy(other)) {
case SortInformation::unequal: {
// different sort criteria
Expand All @@ -1355,20 +1356,25 @@ void arangodb::aql::removeRedundantSortsRule(

if (!other.isDeterministic) {
// if the sort is non-deterministic, we must not remove it
canContinueSearch = false;
break;
}

if (sortNode->isStable()) {
// we should not optimize predecessors of a stable sort (used
// in a COLLECT node)
// we should not optimize predecessors of a stable sort
// (used in a COLLECT node)
// the stable sort is for a reason, and removing any
// predecessors sorts might
// change the result
// predecessors sorts might change the result.
// We're not allowed to continue our search for further
// redundant SORTS in this iteration.
canContinueSearch = false;
break;
}

// remove sort that is a direct predecessor of a sort
toUnlink.emplace(current);
} else {
canContinueSearch = false;
}
break;
}
Expand All @@ -1381,7 +1387,9 @@ void arangodb::aql::removeRedundantSortsRule(
case SortInformation::ourselvesLessAccurate: {
// the sort at the start of the pipeline makes the sort at the end
// superfluous, so we'll remove it
// Related to: BTS-937
toUnlink.emplace(n);
canContinueSearch = false;
break;
}

Expand All @@ -1392,6 +1400,9 @@ void arangodb::aql::removeRedundantSortsRule(
break;
}
}
if (!canContinueSearch) {
break;
}
} else if (current->getType() == EN::FILTER) {
// ok: a filter does not depend on sort order
} else if (current->getType() == EN::CALCULATION) {
Expand Down
116 changes: 115 additions & 1 deletion tests/js/server/aql/aql-optimizer-collect-count.js
@@ -1,5 +1,5 @@
/*jshint globalstrict:false, strict:false, maxlen: 500 */
/*global assertTrue, assertEqual, assertNotEqual, assertNull, AQL_EXECUTE, AQL_EXPLAIN */
/*global assertTrue, assertEqual, assertNotEqual, assertNull, assertFalse, AQL_EXECUTE, AQL_EXPLAIN */

////////////////////////////////////////////////////////////////////////////////
/// @brief tests for COLLECT w/ COUNT
Expand Down Expand Up @@ -548,10 +548,124 @@ function optimizerCountTestSuite () {
};
}

function optimizerDoubleCollectTestSuite() {
const generateData = () => {
// Static data we will use in our AQL Queries.
// We do not need collection/document access or dynamic data.
return [
{"friend": {"name": "piotr"}, id: 10},
{"friend": {"name": "heiko"}, id: 11},
{"friend": {"name": "micha"}, id: 12},
{"friend": {"name": "micha"}, id: 13},
{"friend": {"name": "micha"}, id: 14},
{"friend": {"name": "piotr"}, id: 10},
{"friend": {"name": "micha"}, id: 12},
{"friend": {"name": "heiko"}, id: 11},
{"friend": {"name": "piotr"}, id: 10},
{"friend": {"name": "heiko"}, id: 9}
];
};

const hasDuplicates = (values) => {
let seen = new Set();
let checkedName;

let duplicatesFound = values.some(function (currentObject) {
checkedName = currentObject.name;
return seen.size === seen.add(currentObject.name).size;
});

return {duplicatesFound, checkedName};
};

return {
testDoubleCollectSort: function () {
// Forces SORT NULL on first COLLECT statement

const query = `
LET friends = ${JSON.stringify(generateData())}
FOR f in friends
COLLECT friend_temp = f.friend, id = f.id WITH COUNT INTO messageCount
COLLECT friend = friend_temp INTO countryData = {id, messageCount}
SORT friend.name
RETURN {"name": friend.name, countryData}
`;

const results = AQL_EXECUTE(query);
// Our result must contain 3. Rows with collected data
const duplicatesChecker = hasDuplicates(results.json);
assertFalse(duplicatesChecker.duplicatesFound, `Found duplicate entry for: "${duplicatesChecker.checkedName}"`);
assertEqual(3, results.json.length);
},

testDoubleCollectSortNullRemoveSecondCalculationRule: function () {
const query = `
LET friends = ${JSON.stringify(generateData())}
FOR f in friends
COLLECT friend_temp = f.friend, id = f.id WITH COUNT INTO messageCount SORT NULL
COLLECT friend = friend_temp INTO countryData = {id, messageCount}
SORT friend.name
RETURN {"name": friend.name, countryData}
`;

const results = db._query(query).toArray();

// Our result must contain 3. Rows with collected data
const duplicatesChecker = hasDuplicates(results);
assertFalse(duplicatesChecker.duplicatesFound, `Found duplicate entry for: "${duplicatesChecker.checkedName}"`);
assertEqual(3, results.length);
},

testDoubleCollectSortNull: function () {
// Forces SORT NULL on first COLLECT statement

const query = `
LET friends = ${JSON.stringify(generateData())}
FOR f in friends
COLLECT friend_temp = f.friend, id = f.id WITH COUNT INTO messageCount SORT NULL
COLLECT friend = friend_temp INTO countryData = {id, messageCount}
SORT friend.name
RETURN {"name": friend.name, countryData}
`;

const results = AQL_EXECUTE(query);
// Our result must contain 3. Rows with collected data
const duplicatesChecker = hasDuplicates(results.json);
assertFalse(duplicatesChecker.duplicatesFound, `Found duplicate entry for: "${duplicatesChecker.checkedName}"`);
assertEqual(3, results.json.length);
},

testDoubleCollectSortNullWithForcedHashMethod: function () {
// Forces SORT NULL on first COLLECT statement
// Forces HASH method on second COLLECT

const query = `
LET friends = ${JSON.stringify(generateData())}
FOR f in friends
COLLECT friend_temp = f.friend, id = f.id WITH COUNT INTO messageCount SORT NULL
COLLECT friend = friend_temp INTO countryData = {id, messageCount} OPTIONS {method: "hash"}
SORT friend.name
RETURN {"name": friend.name, countryData}
`;

const results = AQL_EXECUTE(query);
// Our result must contain 3. Rows with collected data
const duplicatesChecker = hasDuplicates(results.json);
assertFalse(duplicatesChecker.duplicatesFound, `Found duplicate entry for: "${duplicatesChecker.checkedName}"`);
assertEqual(3, results.json.length);
},
};
}

////////////////////////////////////////////////////////////////////////////////
/// @brief executes the test suite
////////////////////////////////////////////////////////////////////////////////

jsunity.run(optimizerCountTestSuite);
jsunity.run(optimizerDoubleCollectTestSuite);

return jsunity.done();

0 comments on commit 65122ee

Please sign in to comment.