Skip to content

Commit

Permalink
Integrated the SparqlExpressions into QLever.
Browse files Browse the repository at this point in the history
  • Loading branch information
joka921 committed Oct 27, 2021
1 parent 7a47878 commit a743fc4
Show file tree
Hide file tree
Showing 14 changed files with 242 additions and 1,004 deletions.
228 changes: 76 additions & 152 deletions src/engine/Bind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include "Bind.h"

#include "../util/Exception.h"
#include "./sparqlExpressions/SparqlExpression.h"
#include "./sparqlExpressions/SparqlExpressionGenerators.h"
#include "CallFixedSize.h"
#include "QueryExecutionTree.h"

Expand All @@ -25,25 +27,9 @@ size_t Bind::getCostEstimate() {
float Bind::getMultiplicity(size_t col) {
// this is the newly added column
if (col == getResultWidth() - 1) {
// if we rename a column, we also preserve the multiplicity of this column
if (auto ptr = std::get_if<GraphPatternOperation::Bind::Rename>(
&(_bind._expressionVariant))) {
auto incol = _subtree->getVariableColumn(ptr->_var);
return _subtree->getMultiplicity(incol);
}
// only one value in the new column, high multiplicity
if (std::get_if<GraphPatternOperation::Bind::Constant>(
&(_bind._expressionVariant))) {
return _subtree->getSizeEstimate();
}

// If binary operation, we make the simplifying assumption that all results
// values are different (which will indeed often be the case).
if (std::get_if<GraphPatternOperation::Bind::BinaryOperation>(
&(_bind._expressionVariant))) {
return 1;
}
throw std::runtime_error("Unknown type of BIND in getMultiplicity");
// TODO<joka921> get a better multiplicty estimate for BINDs which are
// variable renames or Constants
return 1;
}

// one of the columns that was only copied from the input.
Expand Down Expand Up @@ -73,7 +59,12 @@ string Bind::asString(size_t indent) const {

auto m = getVariableColumns();
auto strings = _bind.strings();
os << "BIND (" << _bind.operationName() << ") on";
// TODO<joka921> Proper asString() method for the Expressions
os << "BIND ("
<< "Complex expression"
<< ") on";
// the random string to prevent false caching
os << _bind._expressionVariant.asString(m);

for (const auto& ptr : strings) {
auto s = *ptr;
Expand Down Expand Up @@ -129,148 +120,41 @@ void Bind::computeResult(ResultTable* result) {
int inwidth = subRes->_data.cols();
int outwidth = getResultWidth();

if (auto ptr = std::get_if<GraphPatternOperation::Bind::BinaryOperation>(
&_bind._expressionVariant)) {
std::array<size_t, 2> columns{_subtree->getVariableColumn(ptr->_var1),
_subtree->getVariableColumn(ptr->_var2)};
array<ResultTable::ResultType, 2> inTypes{subRes->_resultTypes[columns[0]],
subRes->_resultTypes[columns[1]]};
// Currently the result type for a Binary Operation is always float, this
// will be changed with proper datatype support.
result->_resultTypes.push_back(ResultTable::ResultType::FLOAT);
CALL_FIXED_SIZE_2(inwidth, outwidth, Bind::computeBinaryOperationBind,
&result->_data, subRes->_data, columns, inTypes,
ptr->_binaryOperator[0], _subtree->getQec()->getIndex());
} else if (auto ptr = std::get_if<GraphPatternOperation::Bind::Rename>(
&_bind._expressionVariant)) {
size_t inColumn{_subtree->getVariableColumn(ptr->_var)};
// copying a column also copies the result type
result->_resultTypes.push_back(subRes->_resultTypes[inColumn]);
CALL_FIXED_SIZE_2(inwidth, outwidth, Bind::computeRenameBind,
&result->_data, subRes->_data, inColumn);
} else if (auto ptr = std::get_if<GraphPatternOperation::Bind::Constant>(
&_bind._expressionVariant)) {
result->_resultTypes.push_back(ptr->_type);
Id value;
if (ptr->_type == ResultTable::ResultType::VERBATIM) {
value = ptr->_intValue;
} else if (ptr->_type == ResultTable::ResultType::KB) {
if (!_executionContext->getIndex().getVocab().getId(ptr->_kbValue,
&value)) {
throw std::runtime_error("BIND constant " + ptr->_kbValue +
" is not part of the knowledge base. This is "
"currently unsupported");
}
} else {
throw std::runtime_error(
"BIND currently only supported for integer constant and entities "
"from the KB."
"This should never happen, please report this");
}
CALL_FIXED_SIZE_2(inwidth, outwidth, Bind::computeConstantBind,
&result->_data, subRes->_data, value);
} else {
AD_THROW(ad_semsearch::Exception::BAD_QUERY,
"Currently only three types of BIND are implemented: Integer "
"constant, rename, and binary operation.");
}
result->_resultTypes.emplace_back();
CALL_FIXED_SIZE_2(inwidth, outwidth, computeExpressionBind, result,
&(result->_resultTypes.back()), *subRes,
_bind._expressionVariant.getImpl());

result->_sortedBy = resultSortedOn();

LOG(DEBUG) << "BIND result computation done." << endl;
}

// _____________________________________________________________________________
template <int IN_WIDTH, int OUT_WIDTH>
void Bind::computeBinaryOperationBind(
IdTable* dynRes, const IdTable& inputDyn, std::array<size_t, 2> columns,
array<ResultTable::ResultType, 2> inputTypes, char binaryOperator,
const Index& index) {
const auto input = inputDyn.asStaticView<IN_WIDTH>();
auto result = dynRes->moveToStatic<OUT_WIDTH>();

const auto inSize = input.size();
result.reserve(inSize);
const auto inCols = input.cols();

// Lambda for the binary operation.
const float NO_VALUE = std::numeric_limits<float>::quiet_NaN();
std::function<float(float, float)> binaryOperations[4] = {
[](float v1, float v2) { return v1 + v2; },
[](float v1, float v2) { return v1 - v2; },
[](float v1, float v2) { return v1 * v2; },
[](float v1, float v2) { return v1 / v2; }};
size_t i = "+-*/"s.find(binaryOperator);
AD_CHECK(i != std::string::npos);
auto binaryOperation = binaryOperations[i];

// Iterate of all rows.
for (size_t i = 0; i < inSize; ++i) {
result.emplace_back();
for (size_t j = 0; j < inCols; ++j) {
result(i, j) = input(i, j);
void Bind::computeExpressionBind(
ResultTable* outResult, ResultTable::ResultType* resultType,
const ResultTable& inResult,
sparqlExpression::SparqlExpression* expression) const {
sparqlExpression::VariableToColumnAndResultTypeMap columnMap;
for (const auto& [variable, columnIndex] : getVariableColumns()) {
if (columnIndex < inResult.width()) {
columnMap[variable] =
std::pair(columnIndex, inResult.getResultType(columnIndex));
}
// Iterate over the two values of the binary operation.
float value1 = NO_VALUE;
float value2 = NO_VALUE;
for (size_t colIdx = 0; colIdx < columns.size(); ++colIdx) {
float& value = colIdx == 0 ? value1 : value2;
// CASE 1: Verbatim value (like value from a COUNT).
if (inputTypes[colIdx] == ResultTable::ResultType::VERBATIM) {
value = input(i, columns[colIdx]);
// CASE 2: Value stored as float.
} else if (inputTypes[colIdx] == ResultTable::ResultType::FLOAT) {
std::memcpy(&value, &input(i, columns[colIdx]), sizeof(float));
// CASE 3: Not a value.
} else if (inputTypes[colIdx] == ResultTable::ResultType::TEXT ||
inputTypes[colIdx] == ResultTable::ResultType::LOCAL_VOCAB) {
// CASE 4: RDF value which needs to be parsed first.
} else {
std::string entity =
index.idToOptionalString(input(i, columns[colIdx])).value_or("");
if (!ad_utility::startsWith(entity, VALUE_FLOAT_PREFIX)) {
break;
} else {
value = ad_utility::convertIndexWordToFloat(entity);
}
}
}
// Perform the operation. Result is NO_VALUE if one of the operands is
// NO_VALUE or if division by zero.
bool invalid = value1 == NO_VALUE || value2 == NO_VALUE ||
(binaryOperator == '/' && value2 == .0f);
float opResult = invalid ? NO_VALUE : binaryOperation(value1, value2);
std::memcpy(&result(i, inCols), &opResult, sizeof(float));
}
*dynRes = result.moveToDynamic();
}

template <int IN_WIDTH, int OUT_WIDTH>
void Bind::computeRenameBind(IdTable* dynRes, const IdTable& inputDyn,
size_t column) {
const auto input = inputDyn.asStaticView<IN_WIDTH>();
auto res = dynRes->moveToStatic<OUT_WIDTH>();
sparqlExpression::EvaluationContext evaluationInput(
*getExecutionContext(), columnMap, inResult._data,
getExecutionContext()->getAllocator(), *inResult._localVocab);

const auto inSize = input.size();
res.reserve(inSize);
const auto inCols = input.cols();
// copy the input to the first cols;
for (size_t i = 0; i < inSize; ++i) {
res.emplace_back();
for (size_t j = 0; j < inCols; ++j) {
res(i, j) = input(i, j);
}
// simply copy
res(i, inCols) = input(i, column);
}
*dynRes = res.moveToDynamic();
}
sparqlExpression::ExpressionResult expressionResult =
expression->evaluate(&evaluationInput);

template <int IN_WIDTH, int OUT_WIDTH>
void Bind::computeConstantBind(IdTable* dynRes, const IdTable& inputDyn,
size_t targetVal) {
const auto input = inputDyn.asStaticView<IN_WIDTH>();
auto res = dynRes->moveToStatic<OUT_WIDTH>();
const auto input = inResult._data.asStaticView<IN_WIDTH>();
auto res = outResult->_data.moveToStatic<OUT_WIDTH>();

// first initialize the first columns (they remain identical)
const auto inSize = input.size();
res.reserve(inSize);
const auto inCols = input.cols();
Expand All @@ -280,8 +164,48 @@ void Bind::computeConstantBind(IdTable* dynRes, const IdTable& inputDyn,
for (size_t j = 0; j < inCols; ++j) {
res(i, j) = input(i, j);
}
// simply copy
res(i, inCols) = targetVal;
}
*dynRes = res.moveToDynamic();

auto visitor = [&]<sparqlExpression::SingleExpressionResult T>(
T&& singleResult) mutable {
constexpr static bool isVariable =
std::is_same_v<T, sparqlExpression::Variable>;
constexpr static bool isStrongId =
std::is_same_v<T, sparqlExpression::StrongIdWithResultType>;
if constexpr (isVariable) {
auto column = getVariableColumns().at(singleResult._variable);
for (size_t i = 0; i < inSize; ++i) {
res(i, inCols) = res(i, column);
}
*resultType = evaluationInput._variableToColumnAndResultTypeMap
.at(singleResult._variable)
.second;
} else if constexpr (isStrongId) {
for (size_t i = 0; i < inSize; ++i) {
res(i, inCols) = singleResult._id._value;
}
*resultType = singleResult._type;
} else {
bool isConstant = sparqlExpression::isConstantResult<T>;

auto expanded = sparqlExpression::detail::makeGenerator(
std::forward<T>(singleResult), inSize, &evaluationInput);
*resultType =
sparqlExpression::detail::expressionResultTypeToQleverResultType<T>();
bool isFirst = true;

size_t i = 0;
for (auto&& singleResultValue : expanded) {
res(i, inCols) = sparqlExpression::detail::constantExpressionResultToId(
singleResultValue, *(outResult->_localVocab),
isConstant && !isFirst);
isFirst = false;
i++;
}
}
};

std::visit(visitor, std::move(expressionResult));

outResult->_data = res.moveToDynamic();
}
22 changes: 6 additions & 16 deletions src/engine/Bind.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "../parser/ParsedQuery.h"
#include "Operation.h"
#include "sparqlExpressions/SparqlExpressionPimpl.h"

/// BIND operation, currently only supports a very limited subset of expressions
class Bind : public Operation {
Expand Down Expand Up @@ -40,23 +41,12 @@ class Bind : public Operation {

void computeResult(ResultTable* result) override;

// Implementation for binding the sum of two columns: BIND (?x + ?y As ?z)
// Implementation for the binding of arbitrary expressions
template <int IN_WIDTH, int OUT_WIDTH>
static void computeBinaryOperationBind(
IdTable* dynRes, const IdTable& inputDyn, std::array<size_t, 2> columns,
array<ResultTable::ResultType, 2> inputTypes, char binaryOperator,
const Index& index);

// Implementation for renaming a columns: BIND (?x As ?y)
template <int IN_WIDTH, int OUT_WIDTH>
static void computeRenameBind(IdTable* dynRes, const IdTable& inputDyn,
size_t column);

// Bind a constant value from the knowledge base to the new columns : BIND (42
// As ?x)
template <int IN_WIDTH, int OUT_WIDTH>
static void computeConstantBind(IdTable* dynRes, const IdTable& inputDyn,
size_t targetVal);
void computeExpressionBind(
ResultTable* outRes, ResultTable::ResultType* resultType,
const ResultTable& inputDyn,
sparqlExpression::SparqlExpression* expression) const;
};

#endif // QLEVER_BIND_H
2 changes: 1 addition & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ add_library(engine
Minus.h Minus.cpp
ResultType.h)

target_link_libraries(engine index parser SortPerformanceEstimator absl::flat_hash_set ${ICU_LIBRARIES})
target_link_libraries(engine index parser sparqlExpressions SortPerformanceEstimator absl::flat_hash_set ${ICU_LIBRARIES})

0 comments on commit a743fc4

Please sign in to comment.