Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

The actual logic for expression evaluation. #489

Merged
merged 17 commits into from
Oct 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 17 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,23 @@ set(CMAKE_C_STANDARD_REQUIRED ON)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Coroutines require an additional compiler flag that is called differently
# on clang and g++
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag(-fcoroutines HAS_COROUTINES)
if (HAS_COROUTINES)
add_compile_options(-fcoroutines)
else()
add_compile_options(-fcoroutines-ts)
endif()

# Check compiler versions:
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
# Check version. If empty, warn. If too old, error out:
if ("${CMAKE_CXX_COMPILER_VERSION}" STREQUAL "")
message(WARNING "GCC Compiler version is unknown, proceed at your own risk!")
elseif (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
message(FATAL_ERROR "GCC compiler version must be at least 4.8!")
endif ()
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lc++abi")
endif ()


## Build targets for address sanitizer
# AddressSanitize
set(CMAKE_C_FLAGS_ASAN
Expand Down Expand Up @@ -121,6 +128,8 @@ set(BUILD_TESTING OFF CACHE BOOL "Don't build tests for abseil" FORCE)
add_subdirectory(third_party/abseil-cpp EXCLUDE_FROM_ALL)
include_directories(third_party/abseil-cpp/)



if (USE_PARALLEL)
include(FindOpenMP)
if (OPENMP_FOUND)
Expand Down Expand Up @@ -209,7 +218,7 @@ configure_file(src/web/script.js script.js)
add_executable(IndexBuilderMain src/index/IndexBuilderMain.cpp)
target_link_libraries(IndexBuilderMain index ${CMAKE_THREAD_LIBS_INIT})

add_executable(CreatePatternsMain src/index/CreatePatternsMain.cpp src/util/jthread.h)
add_executable(CreatePatternsMain src/index/CreatePatternsMain.cpp)
target_link_libraries(CreatePatternsMain index ${CMAKE_THREAD_LIBS_INIT})

add_executable(SparqlEngineMain src/SparqlEngineMain.cpp)
Expand Down
1 change: 1 addition & 0 deletions src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
add_subdirectory(sparqlExpressions)
add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp SortPerformanceEstimator.h)
add_library(engine
Engine.h
Expand Down
190 changes: 190 additions & 0 deletions src/engine/sparqlExpressions/AggregateExpression.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// Copyright 2021, University of Freiburg, Chair of Algorithms and Data
// Structures. Author: Johannes Kalmbach <kalmbacj@cs.uni-freiburg.de>

//
// Created by johannes on 28.09.21.
//

#ifndef QLEVER_AGGREGATEEXPRESSION_H
#define QLEVER_AGGREGATEEXPRESSION_H

#include "SparqlExpression.h"
namespace sparqlExpression {

// This can be used as the `FinalOperation` parameter to an
// `AggregateExpression` if there is nothing to be done on the final result.
inline auto noop = []<typename T>(T&& result, size_t) {
return std::forward<T>(result);
};

// An expression that aggregates its input using the `AggregateOperation` and
// then executes the `FinalOperation` (possibly the `noop` lambda from above) on
// the result.
namespace detail {
template <typename AggregateOperation, typename FinalOperation = decltype(noop)>
class AggregateExpression : public SparqlExpression {
public:
// __________________________________________________________________________
AggregateExpression(bool distinct, Ptr&& child,
AggregateOperation aggregateOp = AggregateOperation{})
: _distinct(distinct),
_child{std::move(child)},
_aggregateOp{std::move(aggregateOp)} {}

// __________________________________________________________________________
ExpressionResult evaluate(EvaluationContext* context) const override {
auto childResult = _child->evaluate(context);

return ad_utility::visitWithVariantsAndParameters(
evaluateOnChildOperand, _aggregateOp, FinalOperation{}, context,
_distinct, std::move(childResult));
}

// _________________________________________________________________________
std::span<SparqlExpression::Ptr> children() override { return {&_child, 1}; }

// _________________________________________________________________________
vector<std::string> getUnaggregatedVariables() override {
// This is an aggregate, so it never leaves any unaggregated variables.
return {};
}

// __________________________________________________________________________
[[nodiscard]] string getCacheKey(
const VariableToColumnMap& varColMap) const override {
return std::string(typeid(*this).name()) + std::to_string(_distinct) + "(" +
_child->getCacheKey(varColMap) + ")";
}

// __________________________________________________________________________
[[nodiscard]] std::optional<string> getVariableForNonDistinctCountOrNullopt()
const override {
// This behavior is not correct for the `COUNT` aggreate. The count is
// therefore implemented in a separate `CountExpression` class, which
// overrides this function.
return std::nullopt;
}

// This is the visitor for the `evaluateAggregateExpression` function below.
// It works on a `SingleExpressionResult` rather than on the
// `ExpressionResult` variant.
inline static const auto evaluateOnChildOperand =
[]<SingleExpressionResult Operand>(
const AggregateOperation& aggregateOperation,
const FinalOperation& finalOperation, EvaluationContext* context,
bool distinct, Operand&& operand) -> ExpressionResult {
// Perform the more efficient calculation on `SetOfInterval`s if it is
// possible.
if constexpr (detail::isCalculationWithSetOfIntervalsAllowed<
AggregateOperation, Operand>) {
return aggregateOperation._functionForSetOfIntervals(
std::forward<Operand>(operand));
}

// The number of inputs we aggregate over.
auto inputSize = getResultSize(*context, operand);

// Aggregates are unary expressions, therefore we have only one value getter
// for the single operand.
static_assert(
std::tuple_size_v<typename AggregateOperation::ValueGetters> == 1);
const auto& valueGetter = std::get<0>(aggregateOperation._valueGetters);

if (!distinct) {
auto values = valueGetterGenerator(
valueGetter, std::forward<Operand>(operand), inputSize, context);
auto it = values.begin();
auto result = *it;
for (++it; it != values.end(); ++it) {
result =
aggregateOperation._function(std::move(result), std::move(*it));
}
result = finalOperation(std::move(result), inputSize);
return result;
} else {
// The operands *without* applying the `valueGetter`.
auto operands =
makeGenerator(std::forward<Operand>(operand), inputSize, context);

// For distinct we must put the operands into the hash set before
// applying the `valueGetter`. For example, COUNT(?x), where ?x matches
// three different strings, the value getter always returns `1`, but
// we still have three distinct inputs.
auto it = operands.begin();
auto result = valueGetter(*it, context);
ad_utility::HashSetWithMemoryLimit<typename decltype(
operands)::value_type>
uniqueHashSet({*it}, inputSize, context->_allocator);
for (++it; it != operands.end(); ++it) {
if (uniqueHashSet.insert(*it).second) {
result = aggregateOperation._function(
std::move(result), valueGetter(std::move(*it), context));
}
}
result = finalOperation(std::move(result), uniqueHashSet.size());
return result;
}
};

protected:
bool _distinct;
Ptr _child;
AggregateOperation _aggregateOp;
};

// The Aggregate expressions.

template <typename... Ts>
using AGG_OP = Operation<2, FunctionAndValueGetters<Ts...>>;

template <typename... Ts>
using AGG_EXP =
AggregateExpression<Operation<2, FunctionAndValueGetters<Ts...>>>;

// COUNT
/// For the count expression, we have to manually overwrite one member function
/// for the pattern trick.
inline auto count = [](const auto& a, const auto& b) -> int64_t {
return a + b;
};
using CountExpressionBase = AGG_EXP<decltype(count), IsValidValueGetter>;
class CountExpression : public CountExpressionBase {
using CountExpressionBase::CountExpressionBase;
[[nodiscard]] std::optional<string> getVariableForNonDistinctCountOrNullopt()
const override {
if (this->_distinct) {
return std::nullopt;
}
return _child->getVariableOrNullopt();
}
};

// SUM
inline auto add = [](const auto& a, const auto& b) { return a + b; };
using SumExpression = AGG_EXP<decltype(add), NumericValueGetter>;

// AVG
inline auto averageFinalOp = [](const auto& aggregation, size_t numElements) {
return numElements ? static_cast<double>(aggregation) /
static_cast<double>(numElements)
: std::numeric_limits<double>::quiet_NaN();
};
using AvgExpression =
detail::AggregateExpression<AGG_OP<decltype(add), NumericValueGetter>>;

// MIN
inline auto minLambda = [](const auto& a, const auto& b) {
return a < b ? a : b;
};
using MinExpression = AGG_EXP<decltype(minLambda), NumericValueGetter>;

// MAX
inline auto maxLambda = [](const auto& a, const auto& b) {
return a > b ? a : b;
};
using MaxExpression = AGG_EXP<decltype(maxLambda), NumericValueGetter>;

} // namespace detail
} // namespace sparqlExpression

#endif // QLEVER_AGGREGATEEXPRESSION_H
10 changes: 10 additions & 0 deletions src/engine/sparqlExpressions/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
add_library(sparqlExpressions
SparqlExpressionTypes.h
SparqlExpression.h
AggregateExpression.h
GroupConcatExpression.h
SparqlExpressionGenerators.h
SparqlExpressionValueGetters.h SparqlExpressionValueGetters.cpp
NaryExpression.h
SetOfIntervals.h SetOfIntervals.cpp
LiteralExpression.h GroupConcatExpression.h)
61 changes: 61 additions & 0 deletions src/engine/sparqlExpressions/GroupConcatExpression.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2021, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach<joka921> (johannes.kalmbach@gmail.com)

#ifndef QLEVER_GROUPCONCATEXPRESSION_H
#define QLEVER_GROUPCONCATEXPRESSION_H

#include "AggregateExpression.h"

namespace sparqlExpression {
/// The GROUP_CONCAT Expression
class GroupConcatExpression : public SparqlExpression {
public:
GroupConcatExpression(bool distinct, Ptr&& child, std::string separator)
: _separator{std::move(separator)} {
auto performConcat = [separator = _separator](string&& a,
const string& b) -> string {
if (a.empty()) [[unlikely]] {
return b;
} else [[likely]] {
a.append(separator);
a.append(b);
return std::move(a);
}
};

using OP = AGOP<decltype(performConcat), StringValueGetter>;
auto groupConcatOp = OP{performConcat};
using AGG_EXP = AggregateExpression<OP>;
_actualExpression = std::make_unique<AGG_EXP>(distinct, std::move(child),
std::move(groupConcatOp));
}

// __________________________________________________________________________
ExpressionResult evaluate(EvaluationContext* context) const override {
// The child is already set up to perform all the work.
return _actualExpression->evaluate(context);
}

// _________________________________________________________________________
std::span<SparqlExpression::Ptr> children() override {
return {&_actualExpression, 1};
}

vector<std::string> getUnaggregatedVariables() override {
// This is an aggregation, so it never leaves any unaggregated variables.
return {};
}

[[nodiscard]] string getCacheKey(
const VariableToColumnMap& varColMap) const override {
return "["s + _separator + "]" + _actualExpression->getCacheKey(varColMap);
}

private:
Ptr _actualExpression;
std::string _separator;
};
} // namespace sparqlExpression

#endif // QLEVER_GROUPCONCATEXPRESSION_H