Skip to content

Commit

Permalink
Updated the engine to ecplicitly use subject pattern data.
Browse files Browse the repository at this point in the history
  • Loading branch information
floriankramer committed Jan 22, 2021
1 parent f063a59 commit ab7b49f
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 65 deletions.
45 changes: 26 additions & 19 deletions src/engine/CountAvailablePredicates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,21 @@ size_t CountAvailablePredicates::getSizeEstimate() {
// for the type of optimizations the optimizer can currently do.
size_t num_distinct = _subtree->getSizeEstimate() /
_subtree->getMultiplicity(_subjectColumnIndex);
return num_distinct /
getIndex().getPatternIndex().getHasPredicateMultiplicityPredicates();
return num_distinct / getIndex()
.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateMultiplicityPredicates;
} else {
// Predicates are counted for all entities. In this case the size estimate
// should be accurate.
return getIndex().getPatternIndex().getHasPredicateFullSize() /
getIndex().getPatternIndex().getHasPredicateMultiplicityPredicates();
return getIndex()
.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateSize /
getIndex()
.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateMultiplicityPredicates;
}
}

Expand Down Expand Up @@ -223,8 +231,10 @@ void CountAvailablePredicates::computePatternTrickAllEntities(
const std::vector<Id>& predicateGlobalIds) {
IdTableStatic<2> result = dynResult->moveToStatic<2>();
LOG(DEBUG) << "For all entities." << std::endl;
ad_utility::HashMap<PredicateId, size_t> predicateCounts;
ad_utility::HashMap<size_t, size_t> patternCounts;
std::vector<size_t> predicateCounts(predicateGlobalIds.size(), 0);

// Every pattern will be counted at least once
std::vector<size_t> patternCounts(patterns.size(), 0);

size_t maxId = std::max(hasPattern.size(), hasPredicate.size());
for (size_t i = 0; i < maxId; i++) {
Expand All @@ -236,34 +246,31 @@ void CountAvailablePredicates::computePatternTrickAllEntities(
std::tie(predicateData, numPredicates) = hasPredicate[i];
if (numPredicates > 0) {
for (size_t i = 0; i < numPredicates; i++) {
Id predicate = predicateGlobalIds[predicateData[i]];
auto it = predicateCounts.find(predicate);
if (it == predicateCounts.end()) {
predicateCounts[predicate] = 1;
} else {
it->second++;
}
Id predicate = predicateData[i];
predicateCounts[predicate]++;
}
}
}
}

LOG(DEBUG) << "Using " << patternCounts.size()
<< " patterns for computing the result." << std::endl;
for (const auto& it : patternCounts) {
std::pair<PredicateId*, size_t> pattern = patterns[it.first];
for (size_t pattern_id = 0; pattern_id < patternCounts.size(); ++pattern_id) {
std::pair<PredicateId*, size_t> pattern = patterns[pattern_id];
for (size_t i = 0; i < pattern.second; i++) {
predicateCounts[predicateGlobalIds[pattern.first[i]]] += it.second;
predicateCounts[pattern.first[i]] += patternCounts[pattern_id];
}
}
result.reserve(predicateCounts.size());
for (const auto& it : predicateCounts) {
result.push_back({it.first, static_cast<Id>(it.second)});
for (size_t predicate_local_id = 0;
predicate_local_id < predicateCounts.size(); ++predicate_local_id) {
result.push_back({predicateGlobalIds[predicate_local_id],
predicateCounts[predicate_local_id]});
}
*dynResult = result.moveToDynamic();
}

template <int WIDTH>
template <int WIDTH, typename PredicateId>
void CountAvailablePredicates::computePatternTrick(
const IdTable& dynInput, IdTable* dynResult,
const vector<PatternID>& hasPattern,
Expand Down
74 changes: 48 additions & 26 deletions src/engine/HasPredicateScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,38 +128,44 @@ float HasPredicateScan::getMultiplicity(size_t col) {
if (col == 0) {
return getIndex()
.getPatternIndex()
.getHasPredicateMultiplicityEntities();
.getSubjectMetaData()
.fullHasPredicateMultiplicityEntities;
}
break;
case ScanType::FREE_O:
if (col == 0) {
return getIndex()
.getPatternIndex()
.getHasPredicateMultiplicityPredicates();
.getSubjectMetaData()
.fullHasPredicateMultiplicityPredicates;
}
break;
case ScanType::FULL_SCAN:
if (col == 0) {
return getIndex()
.getPatternIndex()
.getHasPredicateMultiplicityEntities();
.getSubjectMetaData()
.fullHasPredicateMultiplicityEntities;
} else if (col == 1) {
return getIndex()
.getPatternIndex()
.getHasPredicateMultiplicityPredicates();
.getSubjectMetaData()
.fullHasPredicateMultiplicityPredicates;
}
break;
case ScanType::SUBQUERY_S:
if (col < getResultWidth() - 1) {
return _subtree->getMultiplicity(col) *
getIndex()
.getPatternIndex()
.getHasPredicateMultiplicityPredicates();
.getSubjectMetaData()
.fullHasPredicateMultiplicityPredicates;
} else {
return _subtree->getMultiplicity(_subtreeColIndex) *
getIndex()
.getPatternIndex()
.getHasPredicateMultiplicityPredicates();
.getSubjectMetaData()
.fullHasPredicateMultiplicityPredicates;
}
break;
}
Expand All @@ -169,31 +175,44 @@ float HasPredicateScan::getMultiplicity(size_t col) {
size_t HasPredicateScan::getSizeEstimate() {
switch (_type) {
case ScanType::FREE_S:
return static_cast<size_t>(
getIndex().getPatternIndex().getHasPredicateMultiplicityEntities());
return static_cast<size_t>(getIndex()
.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateMultiplicityEntities);
case ScanType::FREE_O:
return static_cast<size_t>(
getIndex().getPatternIndex().getHasPredicateMultiplicityPredicates());
return static_cast<size_t>(getIndex()
.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateMultiplicityPredicates);
case ScanType::FULL_SCAN:
return getIndex().getPatternIndex().getHasPredicateFullSize();
return getIndex()
.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateSize;
case ScanType::SUBQUERY_S:

size_t nofDistinctLeft = std::max(
size_t(1),
static_cast<size_t>(_subtree->getSizeEstimate() /
_subtree->getMultiplicity(_subtreeColIndex)));
size_t nofDistinctRight =
std::max(size_t(1),
static_cast<size_t>(
getIndex().getPatternIndex().getHasPredicateFullSize() /
getIndex()
.getPatternIndex()
.getHasPredicateMultiplicityPredicates()));
size_t nofDistinctRight = std::max(
size_t(1),
static_cast<size_t>(getIndex()
.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateSize /
getIndex()
.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateMultiplicityPredicates));
size_t nofDistinctInResult = std::min(nofDistinctLeft, nofDistinctRight);

double jcMultiplicityInResult =
_subtree->getMultiplicity(_subtreeColIndex) *
getIndex().getPatternIndex().getHasPredicateMultiplicityPredicates();
getIndex()
.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateMultiplicityPredicates;
return std::max(size_t(1), static_cast<size_t>(jcMultiplicityInResult *
nofDistinctInResult));
}
Expand Down Expand Up @@ -283,13 +302,16 @@ void HasPredicateScan::computeResult(
} break;
case ScanType::FULL_SCAN:
runtimeInfo.setDescriptor("HasPredicateScan full scan");
HasPredicateScan::computeFullScan(
result, pattern_data->hasPattern(), pattern_data->hasPredicate(),
pattern_data->patterns(),
_executionContext->getIndex()
.getPatternIndex()
.getPredicateGlobalIds(),
getIndex().getPatternIndex().getHasPredicateFullSize());
HasPredicateScan::computeFullScan(result, pattern_data->hasPattern(),
pattern_data->hasPredicate(),
pattern_data->patterns(),
_executionContext->getIndex()
.getPatternIndex()
.getPredicateGlobalIds(),
getIndex()
.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateSize);
break;
case ScanType::SUBQUERY_S:

Expand Down
31 changes: 21 additions & 10 deletions test/HasPredicateScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,16 @@ TEST(HasPredicateScan, freeS) {
// Maps pattern ids to patterns
vector<vector<Id>> patternsSrc = {{0, 2, 3}, {1, 3, 4, 2, 0}};

std::vector<Id> predicateGlobalIds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

// These are used to store the relations and patterns in contiguous blocks
// of memory.
CompactStringVector<Id, Id> hasRelation(hasRelationSrc);
CompactStringVector<size_t, Id> patterns(patternsSrc);

// Find all entities that are in a triple with predicate 3
HasPredicateScan::computeFreeS(&resultTable, 3, hasPattern, hasRelation,
patterns);
HasPredicateScan::computeFreeS<Id>(&resultTable, 3, hasPattern, hasRelation,
patterns, predicateGlobalIds);
IdTable& result = resultTable._data;

// the result set does not guarantee any sorting so we have to sort manually
Expand Down Expand Up @@ -112,14 +114,16 @@ TEST(HasPredicateScan, freeO) {
// Maps pattern ids to patterns
vector<vector<Id>> patternsSrc = {{0, 2, 3}, {1, 3, 4, 2, 0}};

std::vector<Id> predicateGlobalIds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

// These are used to store the relations and patterns in contiguous blocks
// of memory.
CompactStringVector<Id, Id> hasRelation(hasRelationSrc);
CompactStringVector<size_t, Id> patterns(patternsSrc);

// Find all predicates for entity 3 (pattern 1)
HasPredicateScan::computeFreeO(&resultTable, 3, hasPattern, hasRelation,
patterns);
patterns, predicateGlobalIds);
IdTable& result = resultTable._data;

ASSERT_EQ(5u, result.size());
Expand All @@ -133,7 +137,7 @@ TEST(HasPredicateScan, freeO) {

// Find all predicates for entity 6 (has-relation entry 6)
HasPredicateScan::computeFreeO(&resultTable, 6, hasPattern, hasRelation,
patterns);
patterns, predicateGlobalIds);

ASSERT_EQ(2u, result.size());
ASSERT_EQ(3u, result[0][0]);
Expand All @@ -153,14 +157,16 @@ TEST(HasPredicateScan, fullScan) {
// Maps pattern ids to patterns
vector<vector<Id>> patternsSrc = {{0, 2, 3}, {1, 3, 4, 2, 0}};

std::vector<Id> predicateGlobalIds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

// These are used to store the relations and patterns in contiguous blocks
// of memory.
CompactStringVector<Id, Id> hasRelation(hasRelationSrc);
CompactStringVector<size_t, Id> patterns(patternsSrc);

// Query for all relations
HasPredicateScan::computeFullScan(&resultTable, hasPattern, hasRelation,
patterns, 16);
patterns, predicateGlobalIds, 16);
IdTable& result = resultTable._data;

ASSERT_EQ(16u, result.size());
Expand Down Expand Up @@ -216,6 +222,8 @@ TEST(HasPredicateScan, subtreeS) {
// Maps pattern ids to patterns
vector<vector<Id>> patternsSrc = {{0, 2, 3}, {1, 3, 4, 2, 0}};

std::vector<Id> predicateGlobalIds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

// These are used to store the relations and patterns in contiguous blocks
// of memory.
CompactStringVector<Id, Id> hasRelation(hasRelationSrc);
Expand All @@ -240,7 +248,7 @@ TEST(HasPredicateScan, subtreeS) {
int out_width = 3;
CALL_FIXED_SIZE_2(in_width, out_width, HasPredicateScan::computeSubqueryS,
&resultTable._data, subresult->_data, 1, hasPattern,
hasRelation, patterns);
hasRelation, patterns, predicateGlobalIds);

IdTable& result = resultTable._data;

Expand Down Expand Up @@ -304,16 +312,19 @@ TEST(CountAvailablePredicates, patternTrickTest) {
// Maps pattern ids to patterns
vector<vector<Id>> patternsSrc = {{0, 2, 3}, {1, 3, 4, 2, 0}};

std::vector<Id> predicateGlobalIds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

// These are used to store the relations and patterns in contiguous blocks
// of memory.
CompactStringVector<Id, Id> hasRelation(hasRelationSrc);
CompactStringVector<size_t, Id> patterns(patternsSrc);

RuntimeInformation runtimeInfo;
try {
CALL_FIXED_SIZE_1(
input.cols(), CountAvailablePredicates::computePatternTrick, input,
&result, hasPattern, hasRelation, patterns, 0, &runtimeInfo);
CALL_FIXED_SIZE_1(input.cols(),
CountAvailablePredicates::computePatternTrick, input,
&result, hasPattern, hasRelation, patterns,
predicateGlobalIds, 0, &runtimeInfo);
} catch (const std::runtime_error& e) {
// More verbose output in the case of an exception occuring.
std::cout << e.what() << std::endl;
Expand Down Expand Up @@ -359,7 +370,7 @@ TEST(CountAvailablePredicates, patternTrickTest) {
result.clear();
try {
CountAvailablePredicates::computePatternTrickAllEntities(
&result, hasPattern, hasRelation, patterns);
&result, hasPattern, hasRelation, patterns, predicateGlobalIds);
} catch (const std::runtime_error& e) {
// More verbose output in the case of an exception occuring.
std::cout << e.what() << std::endl;
Expand Down
22 changes: 12 additions & 10 deletions test/IndexTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,11 +406,12 @@ TEST_F(CreatePatternsFixture, createPatterns) {
ASSERT_EQ(0u, pattern_data->hasPattern()[2]);
ASSERT_EQ(NO_PATTERN, pattern_data->hasPattern()[1]);

ASSERT_FLOAT_EQ(
4.0 / 2, index.getPatternIndex().getHasPredicateMultiplicityEntities());
ASSERT_FLOAT_EQ(
4.0 / 3,
index.getPatternIndex().getHasPredicateMultiplicityPredicates());
ASSERT_FLOAT_EQ(4.0 / 2, index.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateMultiplicityEntities);
ASSERT_FLOAT_EQ(4.0 / 3, index.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateMultiplicityPredicates);
}
{
LOG(DEBUG) << "Testing createPatterns with existing index..." << std::endl;
Expand All @@ -436,11 +437,12 @@ TEST_F(CreatePatternsFixture, createPatterns) {
ASSERT_EQ(0u, pattern_data->hasPattern()[2]);
ASSERT_EQ(NO_PATTERN, pattern_data->hasPattern()[1]);

ASSERT_FLOAT_EQ(
4.0 / 2, index.getPatternIndex().getHasPredicateMultiplicityEntities());
ASSERT_FLOAT_EQ(
4.0 / 3,
index.getPatternIndex().getHasPredicateMultiplicityPredicates());
ASSERT_FLOAT_EQ(4.0 / 2, index.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateMultiplicityEntities);
ASSERT_FLOAT_EQ(4.0 / 3, index.getPatternIndex()
.getSubjectMetaData()
.fullHasPredicateMultiplicityPredicates);
}
}

Expand Down

0 comments on commit ab7b49f

Please sign in to comment.