Skip to content

Commit

Permalink
Completely Refactored the scan methods and permutations
Browse files Browse the repository at this point in the history
- The templated Permutation class now owns the Permutations meta data and file
- The scan method now is templated on the permutations and takes it as an argument
- This removes a lot of duplicated code in the Index class.
- Idea for further improvements: The scan methods could also be inside the Permutations,
  this further reduces the complexity per file
  • Loading branch information
joka921 committed Mar 28, 2019
1 parent 3ed791d commit 863e7aa
Show file tree
Hide file tree
Showing 16 changed files with 437 additions and 832 deletions.
8 changes: 5 additions & 3 deletions e2e/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ PROJECT_DIR=$(readlink -f -- "$(dirname ${BASH_SOURCE[0]})/..")
# Change to the project directory so we can use simple relative paths
echo "Changing to project directory: $PROJECT_DIR"
pushd $PROJECT_DIR
BINARY_DIR=$(readlink -f -- ./build)
BINARY_DIR=$(readlink -f -- ./cmake-build-debug/)

if [ ! -e $BINARY_DIR ]; then
BINARY_DIR=$(readlink -f -- .)
fi
echo "Binary dir is $BINARY_DIR"
function bail {
echo "$*"
exit 1
Expand Down Expand Up @@ -57,7 +59,7 @@ if [ "$1" != "no-index" ]; then
rm -f "$INDEX.*"
pushd "$BINARY_DIR"
echo "Building index $INDEX"
./IndexBuilderMain -a -l -i "$INDEX" \
./IndexBuilderMain -l -i "$INDEX" \
-n "$INPUT.nt" \
-w "$INPUT.wordsfile.tsv" \
-d "$INPUT.docsfile.tsv" || bail "Building Index failed"
Expand All @@ -68,7 +70,7 @@ fi
# then we can't easily get the SERVER_PID out of that subshell
pushd "$BINARY_DIR"
echo "Launching server from path $(pwd)"
./ServerMain -i "$INDEX" -p 9099 -t -a &> server_log.txt &
./ServerMain -i "$INDEX" -p 9099 -t &> server_log.txt &
SERVER_PID=$!
popd

Expand Down
12 changes: 2 additions & 10 deletions src/ServerMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ using std::vector;
#define EMPH_OFF "\033[22m"

// Available options.
struct option options[] = {{"all-permutations", no_argument, NULL, 'a'},
{"help", no_argument, NULL, 'h'},
struct option options[] = {{"help", no_argument, NULL, 'h'},
{"index", required_argument, NULL, 'i'},
{"worker-threads", required_argument, NULL, 'j'},
{"on-disk-literals", no_argument, NULL, 'l'},
Expand All @@ -42,9 +41,6 @@ void printUsage(char* execName) {
cout << "Usage: " << execName << " -p <PORT> -i <index> [OPTIONS]" << endl
<< endl;
cout << "Options" << endl;
cout << " " << std::setw(20) << "a, all-permutations" << std::setw(1)
<< " "
<< "Load all six permuations of the index instead of only two." << endl;
cout << " " << std::setw(20) << "h, help" << std::setw(1) << " "
<< "Show this help and exit." << endl;
cout << " " << std::setw(20) << "i, index" << std::setw(1) << " "
Expand Down Expand Up @@ -74,7 +70,6 @@ int main(int argc, char** argv) {
// filled / set depending on the options.
string index = "";
bool text = false;
bool allPermutations = false;
int port = -1;
int numThreads = 1;
bool usePatterns = true;
Expand All @@ -97,9 +92,6 @@ int main(int argc, char** argv) {
case 't':
text = true;
break;
case 'a':
allPermutations = true;
break;
case 'j':
numThreads = atoi(optarg);
break;
Expand Down Expand Up @@ -142,7 +134,7 @@ int main(int argc, char** argv) {

try {
Server server(port, numThreads);
server.initialize(index, text, allPermutations, usePatterns);
server.initialize(index, text, usePatterns);
server.run();
} catch (const std::exception& e) {
// This code should never be reached as all exceptions should be handled
Expand Down
12 changes: 2 additions & 10 deletions src/SparqlEngineMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ using std::string;
#define EMPH_OFF "\033[22m"

// Available options.
struct option options[] = {{"all-permutations", no_argument, NULL, 'a'},
{"cost-factors", required_argument, NULL, 'c'},
struct option options[] = {{"cost-factors", required_argument, NULL, 'c'},
{"help", no_argument, NULL, 'h'},
{"index", required_argument, NULL, 'i'},
{"interactive", no_argument, NULL, 'I'},
Expand All @@ -45,9 +44,6 @@ void printUsage(char* execName) {

cout << "Usage: " << execName << " -i <index> [OPTIONS]" << endl << endl;
cout << "Options" << endl;
cout << " " << std::setw(20) << "a, all-permutations" << std::setw(1)
<< " "
<< "Load all six permuations of the index instead of only two." << endl;
cout << " " << std::setw(20) << "c, cost-factors" << std::setw(1) << " "
<< "Path to a file containing cost factors." << endl;
cout << " " << std::setw(20) << "h, help" << std::setw(1) << " "
Expand Down Expand Up @@ -86,7 +82,6 @@ int main(int argc, char** argv) {
bool text = false;
bool interactive = false;
bool onDiskLiterals = false;
bool allPermutations = false;
bool usePatterns = false;

optind = 1;
Expand All @@ -113,9 +108,6 @@ int main(int argc, char** argv) {
case 'l':
onDiskLiterals = true;
break;
case 'a':
allPermutations = true;
break;
case 'h':
printUsage(argv[0]);
exit(0);
Expand Down Expand Up @@ -150,7 +142,7 @@ int main(int argc, char** argv) {
Index index;
index.setUsePatterns(usePatterns);
index.setOnDiskLiterals(onDiskLiterals);
index.createFromOnDiskIndex(indexName, allPermutations);
index.createFromOnDiskIndex(indexName);
if (text) {
index.addTextFromOnDiskIndex();
}
Expand Down
52 changes: 31 additions & 21 deletions src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ void IndexScan::computePSOboundS(ResultTable* result) const {
result->_data.setCols(1);
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_sortedBy = {0};
_executionContext->getIndex().scanPSO(_predicate, _subject, &result->_data);
const auto& idx = _executionContext->getIndex();
idx.scan(_predicate, _subject, &result->_data, idx._PSO);
}

// _____________________________________________________________________________
Expand All @@ -181,15 +182,17 @@ void IndexScan::computePSOfreeS(ResultTable* result) const {
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_sortedBy = {0, 1};
_executionContext->getIndex().scanPSO(_predicate, &result->_data);
const auto& idx = _executionContext->getIndex();
idx.scan(_predicate, &result->_data, idx._PSO);
}

// _____________________________________________________________________________
void IndexScan::computePOSboundO(ResultTable* result) const {
result->_data.setCols(1);
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_sortedBy = {0};
_executionContext->getIndex().scanPOS(_predicate, _object, &result->_data);
const auto& idx = _executionContext->getIndex();
idx.scan(_predicate, _object, &result->_data, idx._POS);
}

// _____________________________________________________________________________
Expand All @@ -198,7 +201,8 @@ void IndexScan::computePOSfreeO(ResultTable* result) const {
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_sortedBy = {0, 1};
_executionContext->getIndex().scanPOS(_predicate, &result->_data);
const auto& idx = _executionContext->getIndex();
idx.scan(_predicate, &result->_data, idx._POS);
}

// _____________________________________________________________________________
Expand Down Expand Up @@ -240,15 +244,17 @@ void IndexScan::computeSPOfreeP(ResultTable* result) const {
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_sortedBy = {0, 1};
_executionContext->getIndex().scanSPO(_subject, &result->_data);
const auto& idx = _executionContext->getIndex();
idx.scan(_subject, &result->_data, idx._SPO);
}

// _____________________________________________________________________________
void IndexScan::computeSOPboundO(ResultTable* result) const {
result->_data.setCols(1);
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_sortedBy = {0};
_executionContext->getIndex().scanSOP(_subject, _object, &result->_data);
const auto& idx = _executionContext->getIndex();
idx.scan(_subject, _object, &result->_data, idx._SOP);
}

// _____________________________________________________________________________
Expand All @@ -257,7 +263,8 @@ void IndexScan::computeSOPfreeO(ResultTable* result) const {
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_sortedBy = {0, 1};
_executionContext->getIndex().scanSOP(_subject, &result->_data);
const auto& idx = _executionContext->getIndex();
idx.scan(_subject, &result->_data, idx._SOP);
}

// _____________________________________________________________________________
Expand All @@ -266,7 +273,8 @@ void IndexScan::computeOPSfreeP(ResultTable* result) const {
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_sortedBy = {0, 1};
_executionContext->getIndex().scanOPS(_object, &result->_data);
const auto& idx = _executionContext->getIndex();
idx.scan(_object, &result->_data, idx._OPS);
}

// _____________________________________________________________________________
Expand All @@ -275,7 +283,8 @@ void IndexScan::computeOSPfreeS(ResultTable* result) const {
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_resultTypes.push_back(ResultTable::ResultType::KB);
result->_sortedBy = {0, 1};
_executionContext->getIndex().scanOSP(_object, &result->_data);
const auto& idx = _executionContext->getIndex();
idx.scan(_object, &result->_data, idx._OSP);
}

// _____________________________________________________________________________
Expand All @@ -285,42 +294,43 @@ void IndexScan::determineMultiplicities() {
if (getResultWidth() == 1) {
_multiplicity.emplace_back(1);
} else {
const auto& idx = getIndex();
switch (_type) {
case PSO_FREE_S:
_multiplicity = getIndex().getPSOMultiplicities(_predicate);
_multiplicity = idx.getMultiplicities(_predicate, idx._PSO);
break;
case POS_FREE_O:
_multiplicity = getIndex().getPOSMultiplicities(_predicate);
_multiplicity = idx.getMultiplicities(_predicate, idx._POS);
break;
case SPO_FREE_P:
_multiplicity = getIndex().getSPOMultiplicities(_subject);
_multiplicity = idx.getMultiplicities(_subject, idx._SPO);
break;
case SOP_FREE_O:
_multiplicity = getIndex().getSOPMultiplicities(_subject);
_multiplicity = idx.getMultiplicities(_subject, idx._SOP);
break;
case OSP_FREE_S:
_multiplicity = getIndex().getOSPMultiplicities(_object);
_multiplicity = idx.getMultiplicities(_object, idx._OSP);
break;
case OPS_FREE_P:
_multiplicity = getIndex().getOPSMultiplicities(_object);
_multiplicity = idx.getMultiplicities(_object, idx._OPS);
break;
case FULL_INDEX_SCAN_SPO:
_multiplicity = getIndex().getSPOMultiplicities();
_multiplicity = idx.getMultiplicities(idx._SPO);
break;
case FULL_INDEX_SCAN_SOP:
_multiplicity = getIndex().getSOPMultiplicities();
_multiplicity = idx.getMultiplicities(idx._SOP);
break;
case FULL_INDEX_SCAN_PSO:
_multiplicity = getIndex().getPSOMultiplicities();
_multiplicity = idx.getMultiplicities(idx._PSO);
break;
case FULL_INDEX_SCAN_POS:
_multiplicity = getIndex().getPOSMultiplicities();
_multiplicity = idx.getMultiplicities(idx._POS);
break;
case FULL_INDEX_SCAN_OSP:
_multiplicity = getIndex().getOSPMultiplicities();
_multiplicity = idx.getMultiplicities(idx._OSP);
break;
case FULL_INDEX_SCAN_OPS:
_multiplicity = getIndex().getOPSMultiplicities();
_multiplicity = idx.getMultiplicities(idx._OPS);
break;
default:
AD_THROW(ad_semsearch::Exception::ASSERT_FAILED,
Expand Down
32 changes: 20 additions & 12 deletions src/engine/Join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)

#include "./Join.h"
#include <functional>
#include <sstream>
#include <unordered_set>
#include "./QueryExecutionTree.h"
Expand Down Expand Up @@ -274,24 +275,33 @@ Join::ScanMethodType Join::getScanMethod(
ScanMethodType scanMethod;
IndexScan& scan =
*static_cast<IndexScan*>(fullScanDummyTree->getRootOperation().get());

// this works because the join operations execution Context never changes
// during its lifetime
const auto& idx = _executionContext->getIndex();
const auto scanLambda = [&idx](const auto& perm) {
return
[&idx, &perm](Id id, IdTable* idTable) { idx.scan(id, idTable, perm); };
};

switch (scan.getType()) {
case IndexScan::FULL_INDEX_SCAN_SPO:
scanMethod = &Index::scanSPO;
scanMethod = scanLambda(idx._SPO);
break;
case IndexScan::FULL_INDEX_SCAN_SOP:
scanMethod = &Index::scanSOP;
scanMethod = scanLambda(idx._SOP);
break;
case IndexScan::FULL_INDEX_SCAN_PSO:
scanMethod = &Index::scanPSO;
scanMethod = scanLambda(idx._PSO);
break;
case IndexScan::FULL_INDEX_SCAN_POS:
scanMethod = &Index::scanPOS;
scanMethod = scanLambda(idx._POS);
break;
case IndexScan::FULL_INDEX_SCAN_OSP:
scanMethod = &Index::scanOSP;
scanMethod = scanLambda(idx._OSP);
break;
case IndexScan::FULL_INDEX_SCAN_OPS:
scanMethod = &Index::scanOPS;
scanMethod = scanLambda(idx._OPS);
break;
default:
AD_THROW(ad_semsearch::Exception::CHECK_FAILED,
Expand All @@ -308,7 +318,6 @@ void Join::doComputeJoinWithFullScanDummyLeft(const IdTable& ndr,
if (ndr.size() == 0) {
return;
}
const auto* index = &getIndex();
const ScanMethodType scan = getScanMethod(_left);
// Iterate through non-dummy.
Id currentJoinId = ndr(0, _rightJoinCol);
Expand All @@ -322,7 +331,7 @@ void Join::doComputeJoinWithFullScanDummyLeft(const IdTable& ndr,
// Do a scan.
LOG(TRACE) << "Inner scan with ID: " << currentJoinId << endl;
IdTable jr(2);
(index->*scan)(currentJoinId, &jr);
scan(currentJoinId, &jr);
LOG(TRACE) << "Got #items: " << jr.size() << endl;
// Build the cross product.
appendCrossProduct(jr.begin(), jr.end(), joinItemFrom, joinItemEnd, res);
Expand All @@ -335,7 +344,7 @@ void Join::doComputeJoinWithFullScanDummyLeft(const IdTable& ndr,
// Do the scan for the final element.
LOG(TRACE) << "Inner scan with ID: " << currentJoinId << endl;
IdTable jr(2);
(index->*scan)(currentJoinId, &jr);
scan(currentJoinId, &jr);
LOG(TRACE) << "Got #items: " << jr.size() << endl;
// Build the cross product.
appendCrossProduct(jr.begin(), jr.end(), joinItemFrom, joinItemEnd, res);
Expand All @@ -351,7 +360,6 @@ void Join::doComputeJoinWithFullScanDummyRight(const IdTable& ndr,
}
// Get the scan method (depends on type of dummy tree), use a function ptr.
const ScanMethodType scan = getScanMethod(_right);
const auto* index = &getIndex();
// Iterate through non-dummy.
Id currentJoinId = ndr(0, _leftJoinCol);
auto joinItemFrom = ndr.begin();
Expand All @@ -364,7 +372,7 @@ void Join::doComputeJoinWithFullScanDummyRight(const IdTable& ndr,
// Do a scan.
LOG(TRACE) << "Inner scan with ID: " << currentJoinId << endl;
IdTable jr(2);
(index->*scan)(currentJoinId, &jr);
scan(currentJoinId, &jr);
LOG(TRACE) << "Got #items: " << jr.size() << endl;
// Build the cross product.
appendCrossProduct(joinItemFrom, joinItemEnd, jr.begin(), jr.end(), res);
Expand All @@ -377,7 +385,7 @@ void Join::doComputeJoinWithFullScanDummyRight(const IdTable& ndr,
// Do the scan for the final element.
LOG(TRACE) << "Inner scan with ID: " << currentJoinId << endl;
IdTable jr(2);
(index->*scan)(currentJoinId, &jr);
scan(currentJoinId, &jr);
LOG(TRACE) << "Got #items: " << jr.size() << endl;
// Build the cross product.
appendCrossProduct(joinItemFrom, joinItemEnd, jr.begin(), jr.end(), res);
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Join.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class Join : public Operation {

void computeResultForJoinWithFullScanDummy(ResultTable* result) const;

typedef void (Index::*ScanMethodType)(Id, IdTable*) const;
using ScanMethodType = std::function<void(Id, IdTable*)>;

ScanMethodType getScanMethod(
std::shared_ptr<QueryExecutionTree> fullScanDummyTree) const;
Expand Down
4 changes: 2 additions & 2 deletions src/engine/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@

// _____________________________________________________________________________
void Server::initialize(const string& ontologyBaseName, bool useText,
bool allPermutations, bool usePatterns) {
bool usePatterns) {
LOG(INFO) << "Initializing server..." << std::endl;

_index.setUsePatterns(usePatterns);

// Init the index.
_index.createFromOnDiskIndex(ontologyBaseName, allPermutations);
_index.createFromOnDiskIndex(ontologyBaseName);
if (useText) {
_index.addTextFromOnDiskIndex();
}
Expand Down

0 comments on commit 863e7aa

Please sign in to comment.