Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat optional #43

Merged
merged 23 commits into from
Mar 15, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
28c6dc6
Added tree structure to ParsedQueries to allow for representation of …
floriankramer Jan 10, 2018
79ff462
added basic OptionalJoin
floriankramer Jan 18, 2018
7db469a
First partially working optional implementation
floriankramer Jan 19, 2018
d10fdb9
Added proper cross product computation to the optional join.
floriankramer Jan 19, 2018
467c2ee
Fixed handling of cases where one table is empty, added handling of v…
floriankramer Jan 26, 2018
461f68b
fixed wrong template arguments for test and reactivated optimized ord…
floriankramer Jan 26, 2018
661c95a
fixed corner case and added more optional join tests
floriankramer Jan 26, 2018
6112fda
added basic support for blank nodes in nt files
floriankramer Jan 28, 2018
8ba257e
fixed several bugs and cleaned up code
floriankramer Jan 31, 2018
8ce01a9
removed debugging code
floriankramer Jan 31, 2018
c7f7d8c
improved size and multiplicity estimation
floriankramer Feb 22, 2018
f184662
Implemented basic usage of the optimizer for optional queries
floriankramer Feb 26, 2018
7e24a9f
Fixed reference style
floriankramer Feb 28, 2018
8cfdc13
added basic cost estimation
floriankramer Feb 28, 2018
932c5ff
Fixed several bugs and changed multiplicity computation
floriankramer Feb 28, 2018
3db0667
Added sentinels to the optional join
floriankramer Mar 1, 2018
25d68c5
Made bools for empty sides template parameters for createOptionalResult
floriankramer Mar 1, 2018
6eda370
Fixed bug ignoring first join column, added more output
floriankramer Mar 2, 2018
4165b78
resolved todos
floriankramer Mar 2, 2018
625d827
Added flag to disable optional optimization and readded comments
floriankramer Mar 7, 2018
39e2425
Added unoptimized optionals option to SparqlEngineMain
floriankramer Mar 7, 2018
17fd126
Code cleanup
floriankramer Mar 14, 2018
b5610f0
fixed typos
floriankramer Mar 15, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/SparqlEngineMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ void printUsage(char *execName) {
cout << " " << std::setw(20) << "t, text" << std::setw(1) << " "
<< "Enables the usage of text." << endl;
cout << " " << std::setw(20) << "u, unopt-optional" << std::setw(1) << " "
<< "Always place optional joins at the root of the query execution tree."
<< "Always execute optional joins last."
<< endl;
cout.copyfmt(coutState);
}
Expand Down
113 changes: 58 additions & 55 deletions src/engine/Engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -389,25 +389,25 @@ class Engine {
static void createOptionalResult(const typename A::value_type* a,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do a and b have to be pointers, i.e. can they be null? Otherwise a reference may be nicer

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They can be null.

const typename B::value_type* b,
size_t sizeA,
int jcls_a, int jcls_b,
const std::vector<size_t>& jclAToB,
int joinColumBitmap_a, int joinColumBitmap_b,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's "column" so use either "..Col" or "..Column"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

const std::vector<size_t>& joinColumAToB,
unsigned int resultSize,
R& res) {
assert(!(aEmpty && bEmpty));
if (aEmpty) {
// Fill the columns of a with ID_NO_VALUE and the rest with b.
size_t i = 0;
for (size_t col = 0; col < sizeA; col++) {
if ((jcls_a & (1 << col)) == 0) {
if ((joinColumBitmap_a & (1 << col)) == 0) {
res[col] = ID_NO_VALUE;
} else {
// if this is one of the join columns use the value in b
res[col] = (*b)[jclAToB[col]];
res[col] = (*b)[joinColumAToB[col]];
}
i++;
}
for (size_t col = 0; col < b->size(); col++) {
if ((jcls_b & (1 << col)) == 0) {
if ((joinColumBitmap_b & (1 << col)) == 0) {
// only write the value if it is not one of the join columns in b
res[i] = (*b)[col];
i++;
Expand All @@ -429,7 +429,7 @@ class Engine {
i++;
}
for (size_t col = 0; col < b->size(); col++) {
if ((jcls_b & (1 << col)) == 0) {
if ((joinColumBitmap_b & (1 << col)) == 0) {
res[i] = (*b)[col];
i++;
}
Expand All @@ -444,13 +444,13 @@ class Engine {
* @param b
* @param aOptional
* @param bOptional
* @param jcls
* @param joinColumns
* @param result
*/
template<typename A, typename B, typename R, int K>
static void optionalJoin(const A& a, const B& b,
bool aOptional, bool bOptional,
const vector<array<size_t, 2>>& jcls,
const vector<array<size_t, 2>>& joinColumns,
vector<R> *result,
unsigned int resultSize) {
// check for trivial cases
Expand All @@ -460,39 +460,39 @@ class Engine {
return;
}

int jcls_a = 0;
int jcls_b = 0;
for (const array<size_t, 2>& jc : jcls) {
jcls_a |= (1 << jc[0]);
jcls_b |= (1 << jc[1]);
int joinColumBitmap_a = 0;
int joinColumBitmap_b = 0;
for (const array<size_t, 2>& jc : joinColumns) {
joinColumBitmap_a |= (1 << jc[0]);
joinColumBitmap_b |= (1 << jc[1]);
}

// When a is optional this is used to quickly determine
// in which column of b the value of a joined column can be found.
std::vector<size_t> jclAToB;
std::vector<size_t> joinColumAToB;
if (aOptional) {
uint32_t maxJoinColA = 0;
for (const array<size_t, 2>& jc : jcls) {
for (const array<size_t, 2>& jc : joinColumns) {
if (jc[0] > maxJoinColA) {
maxJoinColA = jc[0];
}
}
jclAToB.resize(maxJoinColA + 1);
for (const array<size_t, 2>& jc : jcls) {
jclAToB[jc[0]] = jc[1];
joinColumAToB.resize(maxJoinColA + 1);
for (const array<size_t, 2>& jc : joinColumns) {
joinColumAToB[jc[0]] = jc[1];
}
}

// Deal with one of the two tables beeing both empty and optional
if (a.size() == 0 && aOptional) {
size_t sizeA = resultSize - b[0].size() + jcls.size();
size_t sizeA = resultSize - b[0].size() + joinColumns.size();
for (size_t ib = 0; ib < b.size(); ib++) {
R res = newOptionalResult<R, K>()(resultSize);
createOptionalResult<A, B, R, true, false>(
static_cast<typename A::value_type*>(0), &b[ib],
nullptr, &b[ib],
sizeA,
jcls_a, jcls_b,
jclAToB, resultSize, res);
joinColumBitmap_a, joinColumBitmap_b,
joinColumAToB, resultSize, res);
result->push_back(res);
}
return;
Expand All @@ -501,16 +501,17 @@ class Engine {
for (size_t ia = 0; ia < a.size(); ia++) {
R res = newOptionalResult<R, K>()(resultSize);
createOptionalResult<A, B, R, false, true>(
&a[ia], static_cast<typename B::value_type*>(0),
&a[ia], nullptr,
a[ia].size(),
jcls_a, jcls_b,
jclAToB, resultSize, res);
joinColumBitmap_a, joinColumBitmap_b,
joinColumAToB, resultSize, res);
result->push_back(res);
}
return;
}

// add sentinels
// Cast away constness so we can add sentinels that will be removed
// in the end and create and add those sentinels.
A& l1 = const_cast<A&>(a);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment should explain why it's ok to cast away the const here

B& l2 = const_cast<B&>(b);
Id sentVal = std::numeric_limits<Id>::max() - 1;
Expand All @@ -529,57 +530,59 @@ class Engine {
size_t ia = 0, ib = 0;
while (ia < a.size() - 1 && ib < b.size() - 1) {
// Join columns 0 are the primary sort columns
while (a[ia][jcls[0][0]] < b[ib][jcls[0][1]]) {
while (a[ia][joinColumns[0][0]] < b[ib][joinColumns[0][1]]) {
if (bOptional) {
R res = newOptionalResult<R, K>()(resultSize);
createOptionalResult<A, B, R, false, true>(
&a[ia], static_cast<typename B::value_type*>(0),
&a[ia], nullptr,
a[ia].size(),
jcls_a, jcls_b,
jclAToB, resultSize, res);
joinColumBitmap_a, joinColumBitmap_b,
joinColumAToB, resultSize, res);
result->push_back(res);
}
ia++;
}
while (b[ib][jcls[0][1]] < a[ia][jcls[0][0]]) {
while (b[ib][joinColumns[0][1]] < a[ia][joinColumns[0][0]]) {
if (aOptional) {
R res = newOptionalResult<R, K>()(resultSize);
createOptionalResult<A, B, R, true, false>(
static_cast<typename A::value_type*>(0), &b[ib],
nullptr, &b[ib],
a[ia].size(),
jcls_a, jcls_b,
jclAToB, resultSize, res);
joinColumBitmap_a, joinColumBitmap_b,
joinColumAToB, resultSize, res);
result->push_back(res);
}
ib++;
}

// check if the rest of the join columns also match
matched = true;
for (size_t jclIndex = 0; jclIndex < jcls.size(); jclIndex++) {
const array<size_t, 2>& jc = jcls[jclIndex];
if (a[ia][jc[0]] < b[ib][jc[1]]) {
for (size_t joinColIndex = 0;
joinColIndex < joinColumns.size();
joinColIndex++) {
const array<size_t, 2>& joinColumn = joinColumns[joinColIndex];
if (a[ia][joinColumn[0]] < b[ib][joinColumn[1]]) {
if (bOptional) {
R res = newOptionalResult<R, K>()(resultSize);
createOptionalResult<A, B, R, false, true>(
&a[ia], static_cast<typename B::value_type*>(0),
&a[ia], nullptr,
a[ia].size(),
jcls_a, jcls_b,
jclAToB, resultSize, res);
joinColumBitmap_a, joinColumBitmap_b,
joinColumAToB, resultSize, res);
result->push_back(res);
}
ia++;
matched = false;
break;
}
if (b[ib][jc[1]] < a[ia][jc[0]]) {
if (b[ib][joinColumn[1]] < a[ia][joinColumn[0]]) {
if (aOptional) {
R res = newOptionalResult<R, K>()(resultSize);
createOptionalResult<A, B, R, true, false>(
static_cast<typename A::value_type*>(0), &b[ib],
nullptr, &b[ib],
a[ia].size(),
jcls_a, jcls_b,
jclAToB, resultSize, res);
joinColumBitmap_a, joinColumBitmap_b,
joinColumAToB, resultSize, res);
result->push_back(res);
}
ib++;
Expand All @@ -599,13 +602,13 @@ class Engine {
createOptionalResult<A, B, R, false, false>(
&a[ia], &b[ib],
a[ia].size(),
jcls_a, jcls_b,
jclAToB, resultSize, res);
joinColumBitmap_a, joinColumBitmap_b,
joinColumAToB, resultSize, res);
result->push_back(res);
ib++;

// do the rows still match?
for (const array<size_t, 2>& jc : jcls) {
for (const array<size_t, 2>& jc : joinColumns) {
if (ib == b.size() || a[ia][jc[0]] != b[ib][jc[1]]) {
matched = false;
break;
Expand All @@ -615,7 +618,7 @@ class Engine {
ia++;
// Check if the next row in a also matches the initial row in b
matched = true;
for (const array<size_t, 2>& jc : jcls) {
for (const array<size_t, 2>& jc : joinColumns) {
if (ia == a.size() || a[ia][jc[0]] != b[initIb][jc[1]]) {
matched = false;
break;
Expand All @@ -631,7 +634,7 @@ class Engine {
// remove the sentinels
l1.pop_back();
l2.pop_back();
if (result->back()[jcls[0][0]] == sentVal) {
if (result->back()[joinColumns[0][0]] == sentVal) {
result->pop_back();
}

Expand All @@ -641,10 +644,10 @@ class Engine {
while (ib < b.size()) {
R res = newOptionalResult<R, K>()(resultSize);
createOptionalResult<A, B, R, true, false>(
static_cast<typename A::value_type*>(0), &b[ib],
nullptr, &b[ib],
a[0].size(),
jcls_a, jcls_b,
jclAToB, resultSize, res);
joinColumBitmap_a, joinColumBitmap_b,
joinColumAToB, resultSize, res);
result->push_back(res);
++ib;
}
Expand All @@ -653,10 +656,10 @@ class Engine {
while (ia < a.size()) {
R res = newOptionalResult<R, K>()(resultSize);
createOptionalResult<A, B, R, false, true>(
&a[ia], static_cast<typename B::value_type*>(0),
&a[ia], nullptr,
a[ia].size(),
jcls_a, jcls_b,
jclAToB, resultSize, res);
joinColumBitmap_a, joinColumBitmap_b,
joinColumAToB, resultSize, res);
result->push_back(res);
++ia;
}
Expand Down
1 change: 1 addition & 0 deletions src/engine/OptionalJoin.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright 2016, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file is entirely your work, right? So add your name

// Florian Kramer (florian.kramer@netpun.uni-freiburg.de)

#include "./OptionalJoin.h"

Expand Down
1 change: 1 addition & 0 deletions src/engine/OptionalJoin.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright 2016, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also your code

// Florian Kramer (florian.kramer@netpun.uni-freiburg.de)
#pragma once

#include <list>
Expand Down
43 changes: 30 additions & 13 deletions src/parser/ParsedQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ string ParsedQuery::asString() const {
os << "\n}";

// WHERE
os << "\nWHERE: ";
_rootGraphPattern.toString(os);
os << "\nWHERE: \n";
_rootGraphPattern.toString(os, 1);

os << "\nLIMIT: " << (_limit.size() > 0 ? _limit : "no limit specified");
os << "\nTEXTLIMIT: "
Expand Down Expand Up @@ -209,21 +209,38 @@ ParsedQuery::GraphPattern::operator=(const ParsedQuery::GraphPattern& other) {
}

// _____________________________________________________________________________
void ParsedQuery::GraphPattern::toString(std::ostringstream& os) const {
void ParsedQuery::GraphPattern::toString(std::ostringstream& os,
int indentation) const {
for (int j = 1; j < indentation; ++j) os << " ";
if (_optional) {
os << "\nOPTIONAL ";
os << "OPTIONAL ";
}
os << "{\n";
for (size_t i = 0; i < _whereClauseTriples.size(); ++i) {
os << "\n\t" << _whereClauseTriples[i].asString();
if (i + 1 < _whereClauseTriples.size()) { os << ','; }
os << "{";
for (size_t i = 0; i + 1 < _whereClauseTriples.size(); ++i) {
os << "\n";
for (int j = 0; j < indentation; ++j) os << " ";
os << _whereClauseTriples[i].asString() << ',';
}
for (size_t i = 0; i < _filters.size(); ++i) {
os << "\n\t" << _filters[i].asString();
if (i + 1 < _filters.size()) { os << ','; }
if (_whereClauseTriples.size() > 0) {
os << "\n";
for (int j = 0; j < indentation; ++j) os << " ";
os << _whereClauseTriples.back().asString();
}
for (size_t i = 0; i + 1 < _filters.size(); ++i) {
os << "\n";
for (int j = 0; j < indentation; ++j) os << " ";
os << _filters[i].asString() << ',';
}
if (_filters.size() > 0) {
os << "\n";
for (int j = 0; j < indentation; ++j) os << " ";
os << _filters.back().asString();
}
for (GraphPattern *child : _children) {
child->toString(os);
os << "\n";
child->toString(os, indentation + 1);
}
os << "\n}";
os << "\n";
for (int j = 1; j < indentation; ++j) os << " ";
os <<"}";
}
2 changes: 1 addition & 1 deletion src/parser/ParsedQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class ParsedQuery {
GraphPattern(const GraphPattern& other);
GraphPattern& operator=(const GraphPattern& other);
virtual ~GraphPattern();
void toString(std::ostringstream& os) const;
void toString(std::ostringstream& os, int indentation = 0) const;

vector<SparqlTriple> _whereClauseTriples;
vector<SparqlFilter> _filters;
Expand Down