Skip to content

Commit

Permalink
Refactoring preliminaries for lazy operations (Part 1) (#1352)
Browse files Browse the repository at this point in the history
This PR contains all the changes from the infrastructure for lazy operation evaluation (#1350)  that are simple and repetitive, but touch many files. In particular:

* Rename the `ResultTable` class to `Result` (a TODO suggested by @hannahbast some time ago).
* Add a new parameter `bool requestLaziness` to `Operation::computeResult`. This parameter is currently unused.
  • Loading branch information
RobinTF committed May 23, 2024
1 parent d63028c commit f9c3132
Show file tree
Hide file tree
Showing 72 changed files with 294 additions and 284 deletions.
6 changes: 3 additions & 3 deletions src/engine/Bind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ std::vector<QueryExecutionTree*> Bind::getChildren() {
}

// _____________________________________________________________________________
ResultTable Bind::computeResult() {
Result Bind::computeResult([[maybe_unused]] bool requestLaziness) {
using std::endl;
LOG(DEBUG) << "Get input to BIND operation..." << endl;
shared_ptr<const ResultTable> subRes = _subtree->getResult();
std::shared_ptr<const Result> subRes = _subtree->getResult();
LOG(DEBUG) << "Got input to Bind operation." << endl;
IdTable idTable{getExecutionContext()->getAllocator()};

Expand Down Expand Up @@ -114,7 +114,7 @@ ResultTable Bind::computeResult() {
template <size_t IN_WIDTH, size_t OUT_WIDTH>
void Bind::computeExpressionBind(
IdTable* outputIdTable, LocalVocab* outputLocalVocab,
const ResultTable& inputResultTable,
const Result& inputResultTable,
sparqlExpression::SparqlExpression* expression) const {
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(),
Expand Down
4 changes: 2 additions & 2 deletions src/engine/Bind.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ class Bind : public Operation {
[[nodiscard]] vector<ColumnIndex> resultSortedOn() const override;

private:
ResultTable computeResult() override;
Result computeResult([[maybe_unused]] bool requestLaziness) override;

// Implementation for the binding of arbitrary expressions.
template <size_t IN_WIDTH, size_t OUT_WIDTH>
void computeExpressionBind(
IdTable* outputIdTable, LocalVocab* outputLocalVocab,
const ResultTable& inputResultTable,
const Result& inputResultTable,
sparqlExpression::SparqlExpression* expression) const;

[[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ add_subdirectory(sparqlExpressions)
add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp)
qlever_target_link_libraries(SortPerformanceEstimator)
add_library(engine
Engine.cpp QueryExecutionTree.cpp Operation.cpp ResultTable.cpp LocalVocab.cpp
Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp
IndexScan.cpp Join.cpp Sort.cpp
Distinct.cpp OrderBy.cpp Filter.cpp
Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp
Expand Down
16 changes: 9 additions & 7 deletions src/engine/CartesianProductJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,11 @@ void CartesianProductJoin::writeResultColumn(std::span<Id> targetColumn,
}
}
// ____________________________________________________________________________
ResultTable CartesianProductJoin::computeResult() {
Result CartesianProductJoin::computeResult(
[[maybe_unused]] bool requestLaziness) {
IdTable result{getExecutionContext()->getAllocator()};
result.setNumColumns(getResultWidth());
std::vector<std::shared_ptr<const ResultTable>> subResults;
std::vector<std::shared_ptr<const Result>> subResults;

// We don't need to fully materialize the child results if we have a LIMIT
// specified and an OFFSET of 0.
Expand All @@ -154,21 +155,22 @@ ResultTable CartesianProductJoin::computeResult() {
}
subResults.push_back(child.getResult());
// Early stopping: If one of the results is empty, we can stop early.
if (subResults.back()->size() == 0) {
if (subResults.back()->idTable().size() == 0) {
break;
}
// Example for the following calculation: If we have a LIMIT of 1000 and
// the first child already has a result of size 100, then the second child
// needs to evaluate only its first 10 results. The +1 is because integer
// divisions are rounded down by default.
if (limitIfPresent.has_value()) {
limitIfPresent.value()._limit =
limitIfPresent.value()._limit.value() / subResults.back()->size() + 1;
limitIfPresent.value()._limit = limitIfPresent.value()._limit.value() /
subResults.back()->idTable().size() +
1;
}
}

auto sizesView = std::views::transform(
subResults, [](const auto& child) { return child->size(); });
subResults, [](const auto& child) { return child->idTable().size(); });
auto totalResultSize = std::accumulate(sizesView.begin(), sizesView.end(),
1UL, std::multiplies{});

Expand Down Expand Up @@ -210,7 +212,7 @@ ResultTable CartesianProductJoin::computeResult() {
auto subResultsDeref = std::views::transform(
subResults, [](auto& x) -> decltype(auto) { return *x; });
return {std::move(result), resultSortedOn(),
ResultTable::getMergedLocalVocab(subResultsDeref)};
Result::getMergedLocalVocab(subResultsDeref)};
}

// ____________________________________________________________________________
Expand Down
2 changes: 1 addition & 1 deletion src/engine/CartesianProductJoin.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class CartesianProductJoin : public Operation {

private:
//! Compute the result of the query-subtree rooted at this element..
ResultTable computeResult() override;
Result computeResult([[maybe_unused]] bool requestLaziness) override;

// Copy each element from the `inputColumn` `groupSize` times to the
// `targetColumn`. Repeat until the `targetColumn` is copletely filled. Skip
Expand Down
5 changes: 3 additions & 2 deletions src/engine/CountAvailablePredicates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ size_t CountAvailablePredicates::getCostEstimate() {
}

// _____________________________________________________________________________
ResultTable CountAvailablePredicates::computeResult() {
Result CountAvailablePredicates::computeResult(
[[maybe_unused]] bool requestLaziness) {
LOG(DEBUG) << "CountAvailablePredicates result computation..." << std::endl;
IdTable idTable{getExecutionContext()->getAllocator()};
idTable.setNumColumns(2);
Expand Down Expand Up @@ -137,7 +138,7 @@ ResultTable CountAvailablePredicates::computeResult() {
patterns);
return {std::move(idTable), resultSortedOn(), LocalVocab{}};
} else {
std::shared_ptr<const ResultTable> subresult = subtree_->getResult();
std::shared_ptr<const Result> subresult = subtree_->getResult();
LOG(DEBUG) << "CountAvailablePredicates subresult computation done."
<< std::endl;

Expand Down
6 changes: 3 additions & 3 deletions src/engine/CountAvailablePredicates.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
using std::string;
using std::vector;

// This Operation takes a ResultTable with at least one column containing ids,
// and a column index referring to such a column. It then creates a ResultTable
// This Operation takes a Result with at least one column containing ids,
// and a column index referring to such a column. It then creates a Result
// containing two columns, the first one filled with the ids of all predicates
// for which there is an entry in the index with one of the entities in the
// specified input column as its subject. The second output column contains a
Expand Down Expand Up @@ -103,6 +103,6 @@ class CountAvailablePredicates : public Operation {
void computePatternTrickAllEntities(
IdTable* result, const CompactVectorOfStrings<Id>& patterns) const;

ResultTable computeResult() override;
Result computeResult([[maybe_unused]] bool requestLaziness) override;
[[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override;
};
5 changes: 3 additions & 2 deletions src/engine/Distinct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <sstream>

#include "engine/CallFixedSize.h"
#include "engine/Engine.h"
#include "engine/QueryExecutionTree.h"

using std::endl;
Expand Down Expand Up @@ -37,10 +38,10 @@ VariableToColumnMap Distinct::computeVariableToColumnMap() const {
}

// _____________________________________________________________________________
ResultTable Distinct::computeResult() {
Result Distinct::computeResult([[maybe_unused]] bool requestLaziness) {
IdTable idTable{getExecutionContext()->getAllocator()};
LOG(DEBUG) << "Getting sub-result for distinct result computation..." << endl;
shared_ptr<const ResultTable> subRes = _subtree->getResult();
std::shared_ptr<const Result> subRes = _subtree->getResult();

LOG(DEBUG) << "Distinct result computation..." << endl;
idTable.setNumColumns(subRes->idTable().numColumns());
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Distinct.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class Distinct : public Operation {
[[nodiscard]] string getCacheKeyImpl() const override;

private:
virtual ResultTable computeResult() override;
virtual Result computeResult([[maybe_unused]] bool requestLaziness) override;

VariableToColumnMap computeVariableToColumnMap() const override;
};
26 changes: 12 additions & 14 deletions src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ cppcoro::generator<QueryExecutionTree::StringTriple>
ExportQueryExecutionTrees::constructQueryResultToTriples(
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
LimitOffsetClause limitAndOffset, std::shared_ptr<const ResultTable> res,
LimitOffsetClause limitAndOffset, std::shared_ptr<const Result> res,
CancellationHandle cancellationHandle) {
for (size_t i : getRowIndices(limitAndOffset, res->idTable())) {
ConstructQueryExportContext context{i, *res, qet.getVariableColumns(),
Expand Down Expand Up @@ -57,7 +57,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
LimitOffsetClause limitAndOffset,
std::shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle) {
resultTable->logResultSize();
auto generator = ExportQueryExecutionTrees::constructQueryResultToTriples(
Expand Down Expand Up @@ -91,8 +91,7 @@ nlohmann::json
ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON(
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
const LimitOffsetClause& limitAndOffset,
std::shared_ptr<const ResultTable> res,
const LimitOffsetClause& limitAndOffset, std::shared_ptr<const Result> res,
CancellationHandle cancellationHandle) {
auto generator = constructQueryResultToTriples(qet, constructTriples,
limitAndOffset, std::move(res),
Expand All @@ -110,7 +109,7 @@ ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON(
nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray(
const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset,
const QueryExecutionTree::ColumnIndicesAndTypes& columns,
std::shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle) {
AD_CORRECTNESS_CHECK(resultTable != nullptr);
const IdTable& data = resultTable->idTable();
Expand Down Expand Up @@ -268,7 +267,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON(
const QueryExecutionTree& qet,
const parsedQuery::SelectClause& selectClause,
const LimitOffsetClause& limitAndOffset,
shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle) {
using nlohmann::json;

Expand Down Expand Up @@ -388,7 +387,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON(
const QueryExecutionTree& qet,
const parsedQuery::SelectClause& selectClause,
const LimitOffsetClause& limitAndOffset,
shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle) {
AD_CORRECTNESS_CHECK(resultTable != nullptr);
LOG(DEBUG) << "Resolving strings for finished binary result...\n";
Expand Down Expand Up @@ -418,7 +417,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream(

// This call triggers the possibly expensive computation of the query result
// unless the result is already cached.
shared_ptr<const ResultTable> resultTable = qet.getResult();
std::shared_ptr<const Result> resultTable = qet.getResult();
resultTable->logResultSize();
LOG(DEBUG) << "Converting result IDs to their corresponding strings ..."
<< std::endl;
Expand Down Expand Up @@ -563,7 +562,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::
selectClause.getSelectedVariablesAsStrings();
// This call triggers the possibly expensive computation of the query result
// unless the result is already cached.
shared_ptr<const ResultTable> resultTable = qet.getResult();
std::shared_ptr<const Result> resultTable = qet.getResult();

// In the XML format, the variables don't include the question mark.
auto varsWithoutQuestionMark = std::views::transform(
Expand Down Expand Up @@ -605,8 +604,7 @@ ad_utility::streams::stream_generator
ExportQueryExecutionTrees::constructQueryResultToStream(
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
LimitOffsetClause limitAndOffset,
std::shared_ptr<const ResultTable> resultTable,
LimitOffsetClause limitAndOffset, std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle) {
static_assert(format == MediaType::octetStream || format == MediaType::csv ||
format == MediaType::tsv || format == MediaType::sparqlXml);
Expand Down Expand Up @@ -638,11 +636,11 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON(
const ParsedQuery& query, const QueryExecutionTree& qet,
const ad_utility::Timer& requestTimer, uint64_t maxSend,
CancellationHandle cancellationHandle) {
shared_ptr<const ResultTable> resultTable = qet.getResult();
std::shared_ptr<const Result> resultTable = qet.getResult();
resultTable->logResultSize();
auto timeResultComputation = requestTimer.msecs();

size_t resultSize = resultTable->size();
size_t resultSize = resultTable->idTable().size();

nlohmann::json j;

Expand Down Expand Up @@ -725,7 +723,7 @@ nlohmann::json ExportQueryExecutionTrees::computeSelectQueryResultAsSparqlJSON(
AD_THROW(
"SPARQL-compliant JSON format is only supported for SELECT queries");
}
shared_ptr<const ResultTable> resultTable = qet.getResult();
std::shared_ptr<const Result> resultTable = qet.getResult();
resultTable->logResultSize();
nlohmann::json j;
auto limitAndOffset = query._limitOffset;
Expand Down
13 changes: 6 additions & 7 deletions src/engine/ExportQueryExecutionTrees.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ class ExportQueryExecutionTrees {
const QueryExecutionTree& qet,
const parsedQuery::SelectClause& selectClause,
const LimitOffsetClause& limitAndOffset,
shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle);

/**
Expand All @@ -132,31 +132,30 @@ class ExportQueryExecutionTrees {
static nlohmann::json idTableToQLeverJSONArray(
const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset,
const QueryExecutionTree::ColumnIndicesAndTypes& columns,
std::shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle);

// ___________________________________________________________________________
static nlohmann::json constructQueryResultBindingsToQLeverJSON(
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
const LimitOffsetClause& limitAndOffset,
std::shared_ptr<const ResultTable> res,
CancellationHandle cancellationHandle);
std::shared_ptr<const Result> res, CancellationHandle cancellationHandle);

// Generate an RDF graph for a CONSTRUCT query.
static cppcoro::generator<QueryExecutionTree::StringTriple>
constructQueryResultToTriples(
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
LimitOffsetClause limitAndOffset, std::shared_ptr<const ResultTable> res,
LimitOffsetClause limitAndOffset, std::shared_ptr<const Result> res,
CancellationHandle cancellationHandle);

// ___________________________________________________________________________
static nlohmann::json selectQueryResultToSparqlJSON(
const QueryExecutionTree& qet,
const parsedQuery::SelectClause& selectClause,
const LimitOffsetClause& limitAndOffset,
shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle);

// ___________________________________________________________________________
Expand All @@ -165,7 +164,7 @@ class ExportQueryExecutionTrees {
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
LimitOffsetClause limitAndOffset,
std::shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle);

// _____________________________________________________________________________
Expand Down
6 changes: 3 additions & 3 deletions src/engine/Filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ string Filter::getDescriptor() const {
}

// _____________________________________________________________________________
ResultTable Filter::computeResult() {
Result Filter::computeResult([[maybe_unused]] bool requestLaziness) {
LOG(DEBUG) << "Getting sub-result for Filter result computation..." << endl;
shared_ptr<const ResultTable> subRes = _subtree->getResult();
std::shared_ptr<const Result> subRes = _subtree->getResult();
LOG(DEBUG) << "Filter result computation..." << endl;
checkCancellation();

Expand All @@ -63,7 +63,7 @@ ResultTable Filter::computeResult() {
// _____________________________________________________________________________
template <size_t WIDTH>
void Filter::computeFilterImpl(IdTable* outputIdTable,
const ResultTable& inputResultTable) {
const Result& inputResultTable) {
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(),
inputResultTable.idTable(), getExecutionContext()->getAllocator(),
Expand Down
4 changes: 2 additions & 2 deletions src/engine/Filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ class Filter : public Operation {
return _subtree->getVariableColumns();
}

ResultTable computeResult() override;
Result computeResult([[maybe_unused]] bool requestLaziness) override;

template <size_t WIDTH>
void computeFilterImpl(IdTable* outputIdTable,
const ResultTable& inputResultTable);
const Result& inputResultTable);
};
8 changes: 4 additions & 4 deletions src/engine/GroupBy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,9 @@ void GroupBy::processGroup(
* @param blockEnd Where the group ends.
* @param input The input Table.
* @param result
* @param inTable The input ResultTable, which is required for its local
* @param inTable The input Result, which is required for its local
* vocabulary
* @param outTable The output ResultTable, the vocabulary of which needs to be
* @param outTable The output Result, the vocabulary of which needs to be
* expanded for GROUP_CONCAT aggregates
* @param distinctHashSet An empty hash set. This is only passed in as an
* argument to allow for efficient reusage of its
Expand Down Expand Up @@ -309,7 +309,7 @@ void GroupBy::doGroupBy(const IdTable& dynInput,
*dynResult = std::move(result).toDynamic();
}

ResultTable GroupBy::computeResult() {
Result GroupBy::computeResult([[maybe_unused]] bool requestLaziness) {
LOG(DEBUG) << "GroupBy result computation..." << std::endl;

IdTable idTable{getExecutionContext()->getAllocator()};
Expand All @@ -335,7 +335,7 @@ ResultTable GroupBy::computeResult() {
auto hashMapOptimizationParams =
checkIfHashMapOptimizationPossible(aggregates);

std::shared_ptr<const ResultTable> subresult;
std::shared_ptr<const Result> subresult;
if (hashMapOptimizationParams.has_value()) {
const auto* child = _subtree->getRootOperation()->getChildren().at(0);
// Skip sorting
Expand Down
Loading

0 comments on commit f9c3132

Please sign in to comment.