Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for HAVING. Fixes #104. #130

Merged
merged 2 commits into from
Sep 28, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
30 changes: 30 additions & 0 deletions e2e/scientists_queries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -338,3 +338,33 @@ queries:
- contains_row: ["<Albert_Einstein>", "<Nobel_Prize_in_Physics>"]
- contains_row: ["<Albert_Fert>", "<Wolf_Prize_in_Physics>"]
- contains_row: ["<Albert_Overhauser>", "<National_Medal_of_Science_for_Physical_Science>"]
- query : having-predicate-religion
solutions:
- type: no-text
sparql: |
SELECT ?predicate (COUNT(?predicate) as ?count) WHERE {
?x <is-a> <Astronaut> .
?x ql:has-predicate ?predicate .
}
GROUP BY ?predicate
HAVING (?predicate < <Z) (?predicate = <Religion>)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So these are multiple HAVING clauses which are treated as AND right? I think we should document that somewhere besides the SPARQL standard. Also I think we should have a few more examples in the e2e Tests, especially also the very basic usage. I like using them as normal examples and to look up how some syntax works and thus having a few different cases is both a useful test and an example.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added another e2e test and expanded the GROUP BY paragraph in the readme.

checks:
- num_rows: 1
- num_cols: 2
- selected: ["?predicate", "?count"]
- contains_row: ["<Religion>", "5"]
- query : pattern-trick-automatic-having
solutions:
- type: no-text
sparql: |
SELECT ?predicate (COUNT(?predicate) as ?count) WHERE {
?x ql:has-predicate ?predicate .
FILTER (?predicate = <Gender>)
}
GROUP BY ?predicate
ORDER BY DESC(?count)
checks:
- num_rows: 1
- num_cols: 2
- selected: ["?predicate", "?count"]
- contains_row: ["<Gender>", "18589"]
141 changes: 86 additions & 55 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,19 @@ QueryExecutionTree QueryPlanner::createExecutionTree(ParsedQuery& pq) const {
final = groupByPlan;
}

// create filter operations for the having clauses
for (const SparqlFilter& filter : pq._havingClauses) {
SubtreePlan plan(_qec);
auto& tree = *plan._qet.get();
tree.setVariableColumns(final._qet.get()->getVariableColumnMap());
tree.setOperation(QueryExecutionTree::FILTER,
createFilterOperation(filter, final));
tree.setContextVars(final._qet.get()->getContextVars());
final = plan;
}

if (doGrouping) {
// Either the pattern trick or another form of grouping is in use
// Add the order by operation
if (pq._orderBy.size() > 0) {
SubtreePlan plan(_qec);
Expand Down Expand Up @@ -459,10 +471,13 @@ bool QueryPlanner::checkUsePatternTrick(
// trick is not going to be used.
if (usePatternTrick) {
// Check for filters on the ql:has-predicate triple's subject or
// object
// object.
// Filters that filter on the triple's object but have a static
// rhs will be transformed to a having clause later on.
for (const SparqlFilter& filter : pq->_rootGraphPattern._filters) {
if (filter._lhs == t._o || filter._lhs == t._s ||
filter._rhs == t._o || filter._rhs == t._s) {
if (!(filter._lhs == t._o && filter._rhs[0] != '?') &&
(filter._lhs == t._s || filter._rhs == t._o ||
filter._rhs == t._s)) {
usePatternTrick = false;
break;
}
Expand Down Expand Up @@ -501,6 +516,17 @@ bool QueryPlanner::checkUsePatternTrick(
// remove the triple from the graph
pq->_rootGraphPattern._whereClauseTriples.erase(
pq->_rootGraphPattern._whereClauseTriples.begin() + i);
// Transform filters on the ql:has-relation triple's object that
// have a static rhs to having clauses
for (int i = 0; i < pq->_rootGraphPattern._filters.size(); i++) {
const SparqlFilter& filter = pq->_rootGraphPattern._filters[i];
if (filter._lhs == t._o && filter._rhs[0] != '?') {
pq->_havingClauses.push_back(filter);
pq->_rootGraphPattern._filters.erase(
pq->_rootGraphPattern._filters.begin() + i);
i--;
}
}
}
}
}
Expand Down Expand Up @@ -1446,58 +1472,8 @@ void QueryPlanner::applyFiltersIfPossible(
newPlan._idsOfIncludedNodes = row[n]._idsOfIncludedNodes;
newPlan._isOptional = row[n]._isOptional;
auto& tree = *newPlan._qet.get();
if (isVariable(filters[i]._rhs)) {
std::shared_ptr<Operation> filter = std::make_shared<Filter>(
_qec, row[n]._qet, filters[i]._type,
row[n]._qet.get()->getVariableColumn(filters[i]._lhs),
row[n]._qet.get()->getVariableColumn(filters[i]._rhs));
tree.setOperation(QueryExecutionTree::FILTER, filter);
} else {
string compWith = filters[i]._rhs;
Id entityId = 0;
if (_qec) {
// TODO(schnelle): A proper SPARQL parser should have
// tagged/converted numeric values already. However our parser is
// currently far too crude for that
if (ad_utility::isXsdValue(compWith)) {
compWith = ad_utility::convertValueLiteralToIndexWord(compWith);
} else if (ad_utility::isNumeric(compWith)) {
compWith = ad_utility::convertNumericToIndexWord(compWith);
}
if (filters[i]._type == SparqlFilter::EQ ||
filters[i]._type == SparqlFilter::NE) {
if (!_qec->getIndex().getVocab().getId(compWith, &entityId)) {
entityId = std::numeric_limits<size_t>::max() - 1;
}
} else if (filters[i]._type == SparqlFilter::GE) {
entityId = _qec->getIndex().getVocab().getValueIdForGE(compWith);
} else if (filters[i]._type == SparqlFilter::GT) {
entityId = _qec->getIndex().getVocab().getValueIdForGT(compWith);
} else if (filters[i]._type == SparqlFilter::LT) {
entityId = _qec->getIndex().getVocab().getValueIdForLT(compWith);
} else if (filters[i]._type == SparqlFilter::LE) {
entityId = _qec->getIndex().getVocab().getValueIdForLE(compWith);
} else if (filters[i]._type == SparqlFilter::LANG_MATCHES ||
filters[i]._type == SparqlFilter::REGEX) {
entityId = std::numeric_limits<size_t>::max() - 1;
}
}
std::shared_ptr<Operation> filter(
new Filter(_qec, row[n]._qet, filters[i]._type,
row[n]._qet.get()->getVariableColumn(filters[i]._lhs),
std::numeric_limits<size_t>::max(), entityId));
if (_qec && (filters[i]._type == SparqlFilter::LANG_MATCHES ||
filters[i]._type == SparqlFilter::REGEX)) {
static_cast<Filter*>(filter.get())
->setRightHandSideString(filters[i]._rhs);
if (filters[i]._type == SparqlFilter::REGEX) {
static_cast<Filter*>(filter.get())
->setRegexIgnoreCase(filters[i]._regexIgnoreCase);
}
}
tree.setOperation(QueryExecutionTree::FILTER, filter);
}

tree.setOperation(QueryExecutionTree::FILTER,
createFilterOperation(filters[i], row[n]));
tree.setVariableColumns(row[n]._qet.get()->getVariableColumnMap());
tree.setContextVars(row[n]._qet.get()->getContextVars());
if (replace) {
Expand All @@ -1510,6 +1486,61 @@ void QueryPlanner::applyFiltersIfPossible(
}
}

// _____________________________________________________________________________
std::shared_ptr<Operation> QueryPlanner::createFilterOperation(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is much nicer as its own method, thanks!

const SparqlFilter& filter, const SubtreePlan& parent) const {
if (isVariable(filter._rhs)) {
std::shared_ptr<Operation> filterOp = std::make_shared<Filter>(
_qec, parent._qet, filter._type,
parent._qet.get()->getVariableColumn(filter._lhs),
parent._qet.get()->getVariableColumn(filter._rhs));
return filterOp;
} else {
string compWith = filter._rhs;
Id entityId = 0;
if (_qec) {
// TODO(schnelle): A proper SPARQL parser should have
// tagged/converted numeric values already. However our parser is
// currently far too crude for that
if (ad_utility::isXsdValue(compWith)) {
compWith = ad_utility::convertValueLiteralToIndexWord(compWith);
} else if (ad_utility::isNumeric(compWith)) {
compWith = ad_utility::convertNumericToIndexWord(compWith);
}
if (filter._type == SparqlFilter::EQ ||
filter._type == SparqlFilter::NE) {
if (!_qec->getIndex().getVocab().getId(compWith, &entityId)) {
entityId = std::numeric_limits<size_t>::max() - 1;
}
} else if (filter._type == SparqlFilter::GE) {
entityId = _qec->getIndex().getVocab().getValueIdForGE(compWith);
} else if (filter._type == SparqlFilter::GT) {
entityId = _qec->getIndex().getVocab().getValueIdForGT(compWith);
} else if (filter._type == SparqlFilter::LT) {
entityId = _qec->getIndex().getVocab().getValueIdForLT(compWith);
} else if (filter._type == SparqlFilter::LE) {
entityId = _qec->getIndex().getVocab().getValueIdForLE(compWith);
} else if (filter._type == SparqlFilter::LANG_MATCHES ||
filter._type == SparqlFilter::REGEX) {
entityId = std::numeric_limits<size_t>::max() - 1;
}
}
std::shared_ptr<Operation> filterOp(
new Filter(_qec, parent._qet, filter._type,
parent._qet.get()->getVariableColumn(filter._lhs),
std::numeric_limits<size_t>::max(), entityId));
if (_qec && (filter._type == SparqlFilter::LANG_MATCHES ||
filter._type == SparqlFilter::REGEX)) {
static_cast<Filter*>(filterOp.get())->setRightHandSideString(filter._rhs);
if (filter._type == SparqlFilter::REGEX) {
static_cast<Filter*>(filterOp.get())
->setRegexIgnoreCase(filter._regexIgnoreCase);
}
}
return filterOp;
}
}

// _____________________________________________________________________________
vector<vector<QueryPlanner::SubtreePlan>> QueryPlanner::fillDpTab(
const QueryPlanner::TripleGraph& tg, const vector<SparqlFilter>& filters,
Expand Down
3 changes: 3 additions & 0 deletions src/engine/QueryPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,9 @@ class QueryPlanner {
const vector<SparqlFilter>& filters,
bool replaceInsteadOfAddPlans) const;

std::shared_ptr<Operation> createFilterOperation(
const SparqlFilter& filter, const SubtreePlan& parent) const;

vector<vector<SubtreePlan>> fillDpTab(
const TripleGraph& graph, const vector<SparqlFilter>& fs,
const vector<SubtreePlan*>& children) const;
Expand Down
1 change: 1 addition & 0 deletions src/parser/ParsedQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ class ParsedQuery {
vector<SparqlPrefix> _prefixes;
vector<string> _selectedVariables;
GraphPattern _rootGraphPattern;
vector<SparqlFilter> _havingClauses;
size_t _numGraphPatterns;
vector<OrderKey> _orderBy;
vector<string> _groupByVariables;
Expand Down
26 changes: 17 additions & 9 deletions src/parser/SparqlParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ void SparqlParser::parseWhere(const string& str, ParsedQuery& query,
}
}
for (const string& filter : filters) {
addFilter(filter, currentPattern);
addFilter(filter, &currentPattern->_filters);
}
}

Expand Down Expand Up @@ -382,7 +382,7 @@ void SparqlParser::addWhereTriple(const string& str,
// _____________________________________________________________________________
void SparqlParser::parseSolutionModifiers(const string& str,
ParsedQuery& query) {
// Split the string at any whitespace but ignoe whitespace inside brackets
// Split the string at any whitespace but ignore whitespace inside brackets
// to allow for alias parsing.
auto tokens = ad_utility::splitWsWithEscape(str, '(', ')');
for (size_t i = 0; i < tokens.size(); ++i) {
Expand All @@ -391,7 +391,7 @@ void SparqlParser::parseSolutionModifiers(const string& str,
i += 1;
while (i + 1 < tokens.size() && tokens[i + 1] != "LIMIT" &&
tokens[i + 1] != "OFFSET" && tokens[i + 1] != "TEXTLIMIT" &&
tokens[i + 1] != "GROUP") {
tokens[i + 1] != "GROUP" && tokens[i + 1] != "HAVING") {
query._orderBy.emplace_back(OrderKey(tokens[i + 1]));
++i;
}
Expand All @@ -416,12 +416,20 @@ void SparqlParser::parseSolutionModifiers(const string& str,
i++;
}
}
if (tokens[i] == "HAVING" && i < tokens.size() - 1) {
while (i + 1 < tokens.size() && tokens[i + 1] != "LIMIT" &&
tokens[i + 1] != "OFFSET" && tokens[i + 1] != "TEXTLIMIT" &&
tokens[i + 1] != "GROUP" && tokens[i + 1] != "ORDER") {
addFilter(tokens[i + 1], &query._havingClauses);
i++;
}
}
}
}

// _____________________________________________________________________________
void SparqlParser::addFilter(const string& str,
ParsedQuery::GraphPattern* pattern) {
vector<SparqlFilter>* _filters) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is actually quite a bit clearer than the old version

size_t i = str.find('(');
AD_CHECK(i != string::npos);
size_t j = str.rfind(')');
Expand Down Expand Up @@ -451,7 +459,7 @@ void SparqlParser::addFilter(const string& str,
f._type = SparqlFilter::LANG_MATCHES;
f._lhs = lvar;
f._rhs = rhs.substr(1, rhs.size() - 2);
pattern->_filters.emplace_back(f);
_filters->emplace_back(f);
return;
}
if (pred == "regex") {
Expand All @@ -461,7 +469,7 @@ void SparqlParser::addFilter(const string& str,
f._rhs = rhs.substr(1, rhs.size() - 2);
f._regexIgnoreCase =
(parts.size() == 3 && ad_utility::strip(parts[2], ' ') == "\"i\"");
pattern->_filters.emplace_back(f);
_filters->emplace_back(f);
return;
}
if (pred == "prefix") {
Expand All @@ -482,8 +490,8 @@ void SparqlParser::addFilter(const string& str,
upper += rhs[rhs.size() - 2] + 1;
upper += rhs.back();
f2._rhs = upper;
pattern->_filters.emplace_back(f1);
pattern->_filters.emplace_back(f2);
_filters->emplace_back(f1);
_filters->emplace_back(f2);
return;
}
}
Expand Down Expand Up @@ -545,7 +553,7 @@ void SparqlParser::addFilter(const string& str,
AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED,
"Filter not supported yet: " + filter);
}
pattern->_filters.emplace_back(f);
_filters->emplace_back(f);
}

// _____________________________________________________________________________
Expand Down
2 changes: 1 addition & 1 deletion src/parser/SparqlParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class SparqlParser {
static void addPrefix(const string& str, ParsedQuery& query);
static void addWhereTriple(const string& str,
ParsedQuery::GraphPattern* pattern);
static void addFilter(const string& str, ParsedQuery::GraphPattern* pattern);
static void addFilter(const string& str, vector<SparqlFilter>* _filters);

static string stripAndLowercaseKeywordLiteral(const string& lit);
};