Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat subquery #155

Merged
merged 5 commits into from
Dec 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ e2e_data/*
.idea

# vim files
.swp
*.swp
39 changes: 39 additions & 0 deletions e2e/scientists_queries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -477,3 +477,42 @@ queries:
- selected: ["?object", "?count"]
- contains_row: ['<Inventor>', '1616']
- contains_row: ['<Astrologer>', '43']
- query : simple-subquery
type: no-text
sparql: |
SELECT ?a ?h WHERE {
?a <is-a> <Scientist> .
{
SELECT ?a WHERE {
?a <Height> ?h .
}
ORDER BY DESC(?h)
}
}
checks:
- num_rows: 134
- num_cols: 2
- selected: ["?a", "?h"]
- contains_row: ['<Daryl_Hannah>', 1.78]
- contains_row: ['<Marissa_Mayer>', 1.73]
- query : subquery-profession-avg-height
type: no-text
sparql: |
SELECT ?a ?o ?h ?avg WHERE {
?a <Profession> ?o .
?a <Height> ?h .
{
SELECT ?o (AVG(?h) as ?avg) WHERE {
?a <Profession> ?o .
?a <Height> ?h .
}
GROUP BY ?o
}
}
checks:
- num_rows: 994
- num_cols: 4
- selected: ["?a", "?o", "?h", "?avg"]
- contains_row: ['<Steve_Backshall>', '<Actor>', 1.8, 1.76627]
- contains_row: ['<Carl_Sagan>', '<Astrobiologist>', 1.8, 1.8]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess there is nothing in scientists which would allow to test this with ql:has-predicate? I'm currently looking into creating a smallish Wikidata subset which should allow us to create much better test queries.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used bash (for p in $(cat ./scientists.nt | cut -d' ' -f 2 | sort | uniq); do cat ./scientists.nt | cut -d' ' -f 1 | uniq | grep $p; done) to check for predicates that occur as subjects int the scientists nt file, but didn't find any.

6 changes: 3 additions & 3 deletions src/engine/GroupBy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ ad_utility::HashMap<string, size_t> GroupBy::getVariableColumns() const {
}

float GroupBy::getMultiplicity(size_t col) {
// group by should currently not be used in the optimizer
AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED,
"GroupBy does not yet compute multiplicities.");
// Group by should currently not be used in the optimizer, unless
// it is part of a subquery. In that case multiplicities may only be
// taken from the actual result
(void)col;
return 1;
}
Expand Down
2 changes: 1 addition & 1 deletion src/engine/OrderBy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ string OrderBy::asString(size_t indent) const {
// _____________________________________________________________________________
vector<size_t> OrderBy::resultSortedOn() const {
std::vector<size_t> sortedOn;
sortedOn.resize(_sortIndices.size());
sortedOn.reserve(_sortIndices.size());
for (const pair<size_t, bool>& p : _sortIndices) {
if (!p.second) {
// Only ascending columns count as sorted.
Expand Down
20 changes: 15 additions & 5 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,11 @@ QueryExecutionTree QueryPlanner::createExecutionTree(ParsedQuery& pq) const {
childrenToAdd.pop_back();
patternsToProcess.push_back(pattern);
for (const ParsedQuery::GraphPatternOperation* op : pattern->_children) {
childrenToAdd.insert(childrenToAdd.end(), op->_childGraphPatterns.begin(),
op->_childGraphPatterns.end());
if (op->_type != ParsedQuery::GraphPatternOperation::Type::SUBQUERY) {
childrenToAdd.insert(childrenToAdd.end(),
op->_childGraphPatterns.begin(),
op->_childGraphPatterns.end());
}
}
}

Expand Down Expand Up @@ -87,15 +90,16 @@ QueryExecutionTree QueryPlanner::createExecutionTree(ParsedQuery& pq) const {
// later on (so the same as a simple triple).
childPlans.clear();
std::vector<SubtreePlan> unionPlans;
for (const ParsedQuery::GraphPatternOperation* child : pattern->_children) {
std::vector<SubtreePlan> subqueryPlans;
for (ParsedQuery::GraphPatternOperation* child : pattern->_children) {
niklas88 marked this conversation as resolved.
Show resolved Hide resolved
switch (child->_type) {
case ParsedQuery::GraphPatternOperation::Type::OPTIONAL:
for (const ParsedQuery::GraphPattern* p :
child->_childGraphPatterns) {
childPlans.push_back(&patternPlans[p->_id]);
}
break;
case ParsedQuery::GraphPatternOperation::Type::UNION:
case ParsedQuery::GraphPatternOperation::Type::UNION: {
// the efficiency of the union operation is not dependent on the
// sorting of the inputs and its position is fixed so it does not
// need to be part of the optimization of the child.
Expand All @@ -112,7 +116,13 @@ QueryExecutionTree QueryPlanner::createExecutionTree(ParsedQuery& pq) const {
static_cast<Union*>(unionOp.get())->getVariableColumns());
tree.setOperation(QueryExecutionTree::UNION, unionOp);
childPlans.push_back(&unionPlans.back());
break;
} break;
case ParsedQuery::GraphPatternOperation::Type::SUBQUERY: {
subqueryPlans.emplace_back(_qec);
QueryExecutionTree tree = createExecutionTree(*child->_subquery);
*subqueryPlans.back()._qet.get() = tree;
childPlans.push_back(&subqueryPlans.back());
} break;
}
}

Expand Down
94 changes: 76 additions & 18 deletions src/parser/ParsedQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,15 @@ void ParsedQuery::expandPrefixes() {
GraphPattern* pattern = graphPatterns.back();
graphPatterns.pop_back();
for (GraphPatternOperation* p : pattern->_children) {
graphPatterns.insert(graphPatterns.end(), p->_childGraphPatterns.begin(),
p->_childGraphPatterns.end());
if (p->_type == GraphPatternOperation::Type::SUBQUERY) {
// Pass the prefixes to the subquery and expand them.
p->_subquery->_prefixes = _prefixes;
p->_subquery->expandPrefixes();
} else {
graphPatterns.insert(graphPatterns.end(),
p->_childGraphPatterns.begin(),
p->_childGraphPatterns.end());
}
}

for (auto& trip : pattern->_whereClauseTriples) {
Expand Down Expand Up @@ -391,7 +398,7 @@ void ParsedQuery::GraphPattern::toString(std::ostringstream& os,
ParsedQuery::GraphPatternOperation::GraphPatternOperation(
ParsedQuery::GraphPatternOperation::Type type,
std::initializer_list<ParsedQuery::GraphPattern*> children)
: _type(type) {
: _type(type), _childGraphPatterns() {
switch (_type) {
case Type::OPTIONAL:
if (children.size() != 1) {
Expand All @@ -405,48 +412,92 @@ ParsedQuery::GraphPatternOperation::GraphPatternOperation(
"Union expects two sub graph patterns.");
}
break;
default:
AD_THROW(ad_semsearch::Exception::CHECK_FAILED,
"This constructor should only be used for UNION and OPTIONAL "
"type operations.");
}
_childGraphPatterns.insert(_childGraphPatterns.end(), children.begin(),
children.end());
}

// _____________________________________________________________________________
ParsedQuery::GraphPatternOperation::GraphPatternOperation(
ParsedQuery::GraphPatternOperation::Type type)
: _type(type) {
switch (_type) {
case Type::SUBQUERY:
_subquery = nullptr;
break;
default:
AD_THROW(ad_semsearch::Exception::CHECK_FAILED,
"This constructor should only be used for SUBQUERY"
"type operations.");
}
}

// _____________________________________________________________________________
ParsedQuery::GraphPatternOperation::GraphPatternOperation(
GraphPatternOperation&& other)
: _type(other._type),
_childGraphPatterns(std::move(other._childGraphPatterns)) {
other._childGraphPatterns.clear();
: _type(other._type) {
if (_type == Type::SUBQUERY) {
_subquery = other._subquery;
other._subquery = nullptr;
} else {
new (&_childGraphPatterns) std::vector<GraphPattern*>();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you really need placement new to initialize an object in a union can't one just call the constructor explicitly?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is, that only the vector or the subquery may be initialized due to the union. As far as I know, any form of calling the constructor explicitly includes then using the assignment operator, which would then operate on uninitialized memory (as _childGraphPattern does not get default constructed in the union). I haven't done anything like this before though, so I'm happy for feedback on how to handle this in a better way.

Copy link
Member

@niklas88 niklas88 Dec 5, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the second example here the std::string in the union is constructed using operator=() wand the std::vector<int> is constructed with placement new just as in your code. So maybe both works and operator=() doesn't care if the memory is uninitialized but maybe that's only true for std::string? Looking at that code we may also need to add explicit destructor calls for the std::vector<GraphPattern*> though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like S s = "str"; only works because it also constructs. The following fails:

#include <iostream>
#include <string>
#include <vector>
 
union S
{
    std::string str;
    std::vector<int> vec;
};

int main()
{
    S s;
    s.str = {"Hello, world"};
    // at this point, reading from s.vec is undefined behavior
    std::cout << "s.str = " << s.str << '\n';
    s.str.~basic_string();
    new (&s.vec) std::vector<int>;
    // now, s.vec is the active member of the union
    s.vec.push_back(10);
    std::cout << s.vec.size() << '\n';
    s.vec.~vector();
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added the missing destructor calls for the vector

_childGraphPatterns = std::move(other._childGraphPatterns);
other._childGraphPatterns.clear();
}
}

// _____________________________________________________________________________
ParsedQuery::GraphPatternOperation::GraphPatternOperation(
const GraphPatternOperation& other)
: _type(other._type) {
_childGraphPatterns.reserve(other._childGraphPatterns.size());
for (const GraphPattern* child : other._childGraphPatterns) {
_childGraphPatterns.push_back(new GraphPattern(*child));
if (_type == Type::SUBQUERY) {
_subquery = new ParsedQuery(*other._subquery);
} else {
new (&_childGraphPatterns) std::vector<GraphPattern*>();
_childGraphPatterns.reserve(other._childGraphPatterns.size());
for (const GraphPattern* child : other._childGraphPatterns) {
_childGraphPatterns.push_back(new GraphPattern(*child));
}
}
}

// _____________________________________________________________________________
ParsedQuery::GraphPatternOperation& ParsedQuery::GraphPatternOperation::
operator=(const ParsedQuery::GraphPatternOperation& other) {
_type = other._type;
for (GraphPattern* p : _childGraphPatterns) {
delete p;
if (_type == Type::SUBQUERY) {
delete _subquery;
} else {
for (GraphPattern* p : _childGraphPatterns) {
delete p;
}
_childGraphPatterns.~vector<GraphPattern*>();
}
_childGraphPatterns.clear();
_childGraphPatterns.reserve(other._childGraphPatterns.size());
for (const GraphPattern* p : other._childGraphPatterns) {
_childGraphPatterns.push_back(new GraphPattern(*p));
_type = other._type;
if (_type == Type::SUBQUERY) {
_subquery = new ParsedQuery(*other._subquery);
} else {
new (&_childGraphPatterns) std::vector<GraphPattern*>();
_childGraphPatterns.reserve(other._childGraphPatterns.size());
for (const GraphPattern* p : other._childGraphPatterns) {
_childGraphPatterns.push_back(new GraphPattern(*p));
}
}
return *this;
}

// _____________________________________________________________________________
ParsedQuery::GraphPatternOperation::~GraphPatternOperation() {
for (GraphPattern* child : _childGraphPatterns) {
delete child;
if (_type == Type::SUBQUERY) {
delete _subquery;
niklas88 marked this conversation as resolved.
Show resolved Hide resolved
} else {
for (GraphPattern* child : _childGraphPatterns) {
delete child;
}
_childGraphPatterns.~vector<GraphPattern*>();
}
}

Expand All @@ -464,5 +515,12 @@ void ParsedQuery::GraphPatternOperation::toString(std::ostringstream& os,
os << " UNION ";
_childGraphPatterns[1]->toString(os, indentation);
break;
case Type::SUBQUERY:
if (_subquery != nullptr) {
os << _subquery->asString();
} else {
os << "Missing Subquery\n";
}
break;
}
}
8 changes: 6 additions & 2 deletions src/parser/ParsedQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,10 @@ class ParsedQuery {

class GraphPatternOperation {
public:
enum class Type { OPTIONAL, UNION };
enum class Type { OPTIONAL, UNION, SUBQUERY };
GraphPatternOperation(Type type,
std::initializer_list<GraphPattern*> children);
GraphPatternOperation(Type type);

// Move and copyconstructors to avoid double deletes on the trees children
GraphPatternOperation(GraphPatternOperation&& other);
Expand All @@ -152,7 +153,10 @@ class ParsedQuery {
void toString(std::ostringstream& os, int indentation = 0) const;

Type _type;
std::vector<GraphPattern*> _childGraphPatterns;
union {
std::vector<GraphPattern*> _childGraphPatterns;
ParsedQuery* _subquery;
};
};

// Groups triplets and filters. Represents a node in a tree (as graph patterns
Expand Down
52 changes: 52 additions & 0 deletions src/parser/SparqlParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)

#include "./SparqlParser.h"

#include <unordered_set>

#include "../global/Constants.h"
#include "../util/Conversions.h"
#include "../util/Exception.h"
Expand Down Expand Up @@ -207,6 +210,54 @@ void SparqlParser::parseWhere(const string& str, ParsedQuery& query,
continue;
}
} else if (inner[k] == '{') {
// look for a subquery
size_t h = k + 1;
while (std::isspace(inner[h])) {
h++;
}
if (ad_utility::getLowercase(inner.substr(h, 6)) == "select") {
size_t selectPos = h;
size_t endBracket = ad_utility::findClosingBracket(inner, k, '{', '}');
if (endBracket == size_t(-1)) {
throw ParseException(
"SELECT keyword found at " + std::to_string(selectPos) + " (" +
inner.substr(selectPos, 15) +
") without a closing bracket for the outer brackets.");
}
// look for a subquery
size_t openingBracket = inner.find('{', selectPos);
if (openingBracket == std::string::npos) {
throw ParseException("SELECT keyword found at " +
std::to_string(selectPos) + " (" +
inner.substr(selectPos, 15) +
") without an accompanying opening bracket.");
}
size_t closing_bracket =
ad_utility::findClosingBracket(inner, openingBracket, '{', '}');
if (closing_bracket == size_t(-1)) {
throw ParseException("The subquery at " + std::to_string(selectPos) +
"(" + inner.substr(selectPos, 15) +
") is missing a closing bracket.");
}
std::string subquery_string =
inner.substr(selectPos, endBracket - selectPos);
LOG(DEBUG) << "Found subquery: " << subquery_string << std::endl;

// create the subquery operation
ParsedQuery::GraphPatternOperation* u =
new ParsedQuery::GraphPatternOperation(
ParsedQuery::GraphPatternOperation::Type::SUBQUERY);
u->_subquery = new ParsedQuery(parse(subquery_string));
// Remove all manual ordering from the subquery as it would be changed
// by the parent query.
u->_subquery->_orderBy.clear();
currentPattern->_children.push_back(u);

// continue parsing after the union statement
start = endBracket + 1;
continue;
}

// look for a UNION
// find the opening and closing bracket of the left side
size_t leftOpeningBracket = k;
Expand Down Expand Up @@ -281,6 +332,7 @@ void SparqlParser::parseWhere(const string& str, ParsedQuery& query,
start = (posOfDelim == string::npos ? end + 1 : posOfDelim + 1);
continue;
}
} else if (inner[k] == 'S' || inner[k] == 's') {
}
while (k < inner.size()) {
if (!insideUri && !insideLiteral && !insideNsThing) {
Expand Down