Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat optional #43

Merged
merged 23 commits into from
Mar 15, 2018
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
28c6dc6
Added tree structure to ParsedQueries to allow for representation of …
floriankramer Jan 10, 2018
79ff462
added basic OptionalJoin
floriankramer Jan 18, 2018
7db469a
First partially working optional implementation
floriankramer Jan 19, 2018
d10fdb9
Added proper cross product computation to the optional join.
floriankramer Jan 19, 2018
467c2ee
Fixed handling of cases where one table is empty, added handling of v…
floriankramer Jan 26, 2018
461f68b
fixed wrong template arguments for test and reactivated optimized ord…
floriankramer Jan 26, 2018
661c95a
fixed corner case and added more optional join tests
floriankramer Jan 26, 2018
6112fda
added basic support for blank nodes in nt files
floriankramer Jan 28, 2018
8ba257e
fixed several bugs and cleaned up code
floriankramer Jan 31, 2018
8ce01a9
removed debugging code
floriankramer Jan 31, 2018
c7f7d8c
improved size and multiplicity estimation
floriankramer Feb 22, 2018
f184662
Implemented basic usage of the optimizer for optional queries
floriankramer Feb 26, 2018
7e24a9f
Fixed reference style
floriankramer Feb 28, 2018
8cfdc13
added basic cost estimation
floriankramer Feb 28, 2018
932c5ff
Fixed several bugs and changed multiplicity computation
floriankramer Feb 28, 2018
3db0667
Added sentinels to the optional join
floriankramer Mar 1, 2018
25d68c5
Made bools for empty sides template parameters for createOptionalResult
floriankramer Mar 1, 2018
6eda370
Fixed bug ignoring first join column, added more output
floriankramer Mar 2, 2018
4165b78
resolved todos
floriankramer Mar 2, 2018
625d827
Added flag to disable optional optimization and readded comments
floriankramer Mar 7, 2018
39e2425
Added unoptimized optionals option to SparqlEngineMain
floriankramer Mar 7, 2018
17fd126
Code cleanup
floriankramer Mar 14, 2018
b5610f0
fixed typos
floriankramer Mar 15, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ add_subdirectory(third_party/stxxl)
# apply STXXL CXXFLAGS
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${STXXL_CXX_FLAGS}")
# add STXXL includes path
include_directories(${STXXL_INCLUDE_DIRS})
include_directories(SYSTEM ${STXXL_INCLUDE_DIRS})

message(STATUS ---)
message(STATUS "CXX_FLAGS are : " ${CMAKE_CXX_FLAGS})
Expand Down
73 changes: 58 additions & 15 deletions src/ServerMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,48 @@ using std::cerr;

// Available options.
struct option options[] = {
{"index", required_argument, NULL, 'i'},
{"port", required_argument, NULL, 'p'},
{"text", no_argument, NULL, 't'},
{"on-disk-literals", no_argument, NULL, 'l'},
{"all-permutations", no_argument, NULL, 'a'},
{NULL, 0, NULL, 0}
{"index", required_argument, NULL, 'i'},
{"port", required_argument, NULL, 'p'},
{"text", no_argument, NULL, 't'},
{"on-disk-literals", no_argument, NULL, 'l'},
{"all-permutations", no_argument, NULL, 'a'},
{"unopt-optional", no_argument, NULL, 'u'},
{"help", no_argument, NULL, 'h'},
{NULL, 0, NULL, 0}
};

void printUsage() {
cout << "Usage: ./ServerMain -p <PORT> -i <index> (-t)" << endl;
void printUsage(char *execName) {
std::ios coutState(nullptr);
coutState.copyfmt(cout);
cout << std::setfill(' ') << std::left;

cout << "Usage: " << execName << " -p <PORT> -i <index> [OPTIONS]"
<< endl << endl;
cout << "Options" << endl;
cout << " " << std::setw(20) << "a, all-permutations" << std::setw(1)
<< " "
<< "Load all six permuations of the index instead of only two." << endl;
cout << " " << std::setw(20) << "h, help" << std::setw(1) << " "
<< "Show this help and exit." << endl;
cout << " " << std::setw(20) << "i, index" << std::setw(1) << " "
<< "The location of the index files." << endl;
cout << " " << std::setw(20) << "l, on-disk-literals" << std::setw(1)
<< " "
<< "Indicates that the literals can be found on disk with the index."
<< endl;
cout << " " << std::setw(20) << "p, port" << std::setw(1) << " "
<< "The port on which to run the web interface." << endl;
cout << " " << std::setw(20) << "t, text" << std::setw(1) << " "
<< "Enables the usage of text." << endl;
cout << " " << std::setw(20) << "u, unopt-optional" << std::setw(1) << " "
<< "Always place optional joins at the root of the query execution tree."
<< endl;
cout.copyfmt(coutState);
}

// Main function.
int main(int argc, char** argv) {
cout << endl << EMPH_ON << "ServerMain, version " << __DATE__
<< " " << __TIME__<< EMPH_OFF << endl << endl;

char* locale = setlocale(LC_CTYPE, "en_US.utf8");
cout << "Set locale LC_CTYPE to: " << locale << endl;

std::locale loc;
ad_utility::ReadableNumberFacet facet(1);
Expand All @@ -57,12 +80,13 @@ int main(int argc, char** argv) {
bool text = false;
bool onDiskLiterals = false;
bool allPermutations = false;
bool optimizeOptionals = true;
int port = -1;

optind = 1;
// Process command line arguments.
while (true) {
int c = getopt_long(argc, argv, "i:p:tla", options, NULL);
int c = getopt_long(argc, argv, "i:p:tlauh", options, NULL);
if (c == -1) break;
switch (c) {
case 'i':
Expand All @@ -80,23 +104,42 @@ int main(int argc, char** argv) {
case 'a':
allPermutations = true;
break;
case 'u':
optimizeOptionals = false;
break;
case 'h':
printUsage(argv[0]);
exit(0);
break;
default:
cout << endl
<< "! ERROR in processing options (getopt returned '" << c
<< "' = 0x" << std::setbase(16) << static_cast<int> (c) << ")"
<< endl << endl;
printUsage(argv[0]);
exit(1);
}
}

if (index.size() == 0 || port == -1) {
printUsage();
if (index.size() == 0) {
cerr << "ERROR: No index specified, but an index is required." << endl;
}
if (port == -1) {
cerr << "ERROR: No port specified, but the port is required." << endl;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

}
printUsage(argv[0]);
exit(1);
}

cout << endl << EMPH_ON << "ServerMain, version " << __DATE__
<< " " << __TIME__<< EMPH_OFF << endl << endl;
cout << "Set locale LC_CTYPE to: " << locale << endl;

try {
Server server(port);
server.initialize(index, text, allPermutations, onDiskLiterals);
server.initialize(index, text, allPermutations, onDiskLiterals,
optimizeOptionals);
server.run();
} catch(const ad_semsearch::Exception& e) {
LOG(ERROR) << e.getFullErrorMessage() << '\n';
Expand Down
92 changes: 71 additions & 21 deletions src/SparqlEngineMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,62 @@ using std::cerr;

// Available options.
struct option options[] = {
{"queryfile", required_argument, NULL, 'q'},
{"interactive", no_argument, NULL, 'I'},
{"index", required_argument, NULL, 'i'},
{"text", no_argument, NULL, 't'},
{"cost-factors", required_argument, NULL, 'c'},
{"on-disk-literals", no_argument, NULL, 'l'},
{"all-permutations", no_argument, NULL, 'a'},
{NULL, 0, NULL, 0}
{"index", required_argument, NULL, 'i'},
{"text", no_argument, NULL, 't'},
{"queryfile", required_argument, NULL, 'q'},
{"interactive", no_argument, NULL, 'I'},
{"cost-factors", required_argument, NULL, 'c'},
{"on-disk-literals", no_argument, NULL, 'l'},
{"all-permutations", no_argument, NULL, 'a'},
{"unopt-optional", no_argument, NULL, 'u'},
{"help", no_argument, NULL, 'h'},
{NULL, 0, NULL, 0}
};

void processQuery(QueryExecutionContext& qec, const string& query);
void processQuery(QueryExecutionContext& qec, const string& query,
bool optimizeOptionals);
void printUsage(char *execName);

void printUsage(char *execName) {
std::ios coutState(nullptr);
coutState.copyfmt(cout);
cout << std::setfill(' ') << std::left;

cout << "Usage: " << execName << " -i <index> [OPTIONS]"
<< endl << endl;
cout << "Options" << endl;
cout << " " << std::setw(20) << "a, all-permutations" << std::setw(1)
<< " "
<< "Load all six permuations of the index instead of only two." << endl;
cout << " " << std::setw(20) << "c, cost-factors" << std::setw(1)
<< " "
<< "Path to a file containing cost factors." << endl;
cout << " " << std::setw(20) << "h, help" << std::setw(1) << " "
<< "Show this help and exit." << endl;
cout << " " << std::setw(20) << "i, index" << std::setw(1) << " "
<< "The location of the index files." << endl;
cout << " " << std::setw(20) << "I, interactive" << std::setw(1)
<< " " << "Use stdin to read the queries." << endl;
cout << " " << std::setw(20) << "l, on-disk-literals" << std::setw(1)
<< " "
<< "Indicates that the literals can be found on disk with the index."
<< endl;
cout << " " << std::setw(20) << "q, queryfile" << std::setw(1)
<< " "
<< "Path to a file containing one query per line." << endl;
cout << " " << std::setw(20) << "t, text" << std::setw(1) << " "
<< "Enables the usage of text." << endl;
cout << " " << std::setw(20) << "u, unopt-optional" << std::setw(1) << " "
<< "Always execute optional joins last."
<< endl;
cout.copyfmt(coutState);
}


// Main function.
int main(int argc, char** argv) {
cout.sync_with_stdio(false);
std::cout << std::endl << EMPH_ON
<< "SparqlEngineMain, version " << __DATE__
<< " " << __TIME__ << EMPH_OFF << std::endl << std::endl;

char* locale = setlocale(LC_CTYPE, "en_US.utf8");
cout << "Set locale LC_CTYPE to: " << locale << endl;


//std::locale loc;
//ad_utility::ReadableNumberFacet facet(1);
Expand All @@ -60,11 +94,12 @@ int main(int argc, char** argv) {
bool interactive = false;
bool onDiskLiterals = false;
bool allPermutations = false;
bool optimizeOptionals = true;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hannah said this currently slows down queries? I assume this only happens if there is an OPTIONAL? Is this the right default at the moment?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The slowdown happens due to the query optimizer effectively having to optimize a query with another triple, representing the optional part. For queries with many triples (around 9 or more) the slowdown starts becoming noticeable. For queries with fewer triples the change in speed is minmal, and using the optimizer can of course lead to significantly faster execution time. If the default use case includes queries which, on average, have many triples changing the setting to false by default would be better.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, this case is actually more non-trivial than I thought.


optind = 1;
// Process command line arguments.
while (true) {
int c = getopt_long(argc, argv, "q:Ii:tc:la", options, NULL);
int c = getopt_long(argc, argv, "q:Ii:tc:lahu", options, NULL);
if (c == -1) break;
switch (c) {
case 'q':
Expand All @@ -88,20 +123,34 @@ int main(int argc, char** argv) {
case 'a':
allPermutations = true;
break;
case 'h':
printUsage(argv[0]);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for cleaning this up!

exit(0);
break;
case 'u':
optimizeOptionals = false;
break;
default:
cout << endl
<< "! ERROR in processing options (getopt returned '" << c
<< "' = 0x" << std::setbase(16) << c << ")"
<< endl << endl;
printUsage(argv[0]);
exit(1);
}
}

if (indexName.size() == 0) {
cout << "Missing required argument --index (-i)..." << endl;
cerr << "Missing required argument --index (-i)..." << endl;
printUsage(argv[0]);
exit(1);
}

std::cout << std::endl << EMPH_ON
<< "SparqlEngineMain, version " << __DATE__
<< " " << __TIME__ << EMPH_OFF << std::endl << std::endl;
cout << "Set locale LC_CTYPE to: " << locale << endl;

try {
Engine engine;
Index index;
Expand Down Expand Up @@ -134,13 +183,13 @@ int main(int argc, char** argv) {
os << line << '\n';
}
if (os.str() == "") { return 0; }
processQuery(qec, os.str());
processQuery(qec, os.str(), optimizeOptionals);
}
} else {
std::ifstream qf(queryfile);
string line;
while (std::getline(qf, line)) {
processQuery(qec, line);
processQuery(qec, line, optimizeOptionals);
}
}
} catch (const std::exception& e) {
Expand All @@ -153,13 +202,14 @@ int main(int argc, char** argv) {
return 0;
}

void processQuery(QueryExecutionContext& qec, const string& query) {
void processQuery(QueryExecutionContext& qec, const string& query,
bool optimizeOptionals) {
ad_utility::Timer t;
t.start();
SparqlParser sp;
ParsedQuery pq = sp.parse(query);
pq.expandPrefixes();
QueryPlanner qp(&qec);
QueryPlanner qp(&qec, optimizeOptionals);
ad_utility::Timer timer;
timer.start();
auto qet = qp.createExecutionTree(pq);
Expand Down
3 changes: 2 additions & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ add_library(engine
ScanningJoin.cpp ScanningJoin.h
QueryPlanningCostFactors.cpp QueryPlanningCostFactors.h
TwoColumnJoin.cpp TwoColumnJoin.h
OptionalJoin.cpp OptionalJoin.h
)

target_link_libraries(engine index parser)
target_link_libraries(engine index parser)
2 changes: 1 addition & 1 deletion src/engine/Engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,4 +245,4 @@ template void Engine::join(
size_t joinColumn1,
const vector<array<Id, 2>>& b,
size_t joinColumn2,
vector<array<Id, 5>>* result);
vector<array<Id, 5>>* result);