Skip to content

Commit

Permalink
Added a new values operation.
Browse files Browse the repository at this point in the history
  • Loading branch information
floriankramer committed Jun 18, 2019
1 parent 649b434 commit 363544e
Show file tree
Hide file tree
Showing 9 changed files with 338 additions and 26 deletions.
41 changes: 41 additions & 0 deletions e2e/scientists_queries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -650,3 +650,44 @@ queries:
- contains_row: ["<Aachen>", 8]
- contains_row: ["<Aarhus>", 6]
- order_numeric: {"dir" : "ASC", "var": "?count"}
- query: simple-values
type: no-text
sparql: |
SELECT ?a WHERE {
VALUES ?a { <Albert_Einstein>}
}
checks:
- num_cols: 1
- num_rows: 1
- selected: ["?a"]
- contains_row: ["<Albert_Einstein>"]
- query: values-empty-join
type: no-text
sparql: |
SELECT ?a ?b ?c WHERE {
VALUES ?a { <Albert_Einstein>}
VALUES (?a ?b) {
(<Marie_Curie> <Joseph_Jacobson>) (<Freiherr> <Lord_of_the_Isles>)
}
}
checks:
- num_cols: 3
- num_rows: 0
- selected: ["?a", "?b", "?c"]
- query: values-join
type: no-text
sparql: |
SELECT ?a ?b WHERE {
VALUES (?a ?b) {
(<Gerard_De_Geer> <Freiherr>)
(<Charles,_Prince_of_Wales> <Lord_of_the_Isles>)
}
?a <Title> ?b .
?a <Country_of_nationality> <United_Kingdom>
}
checks:
- num_cols: 2
- num_rows: 1
- selected: ["?a", "?b"]
- contains_row: ["<Charles,_Prince_of_Wales>", "<Lord_of_the_Isles>"]

1 change: 1 addition & 0 deletions src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ add_library(engine
Union.cpp Union.h
MultiColumnJoin.cpp MultiColumnJoin.h
TransitivePath.cpp TransitivePath.h
Values.cpp Values.h
IdTable.h
)

Expand Down
3 changes: 2 additions & 1 deletion src/engine/QueryExecutionTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class QueryExecutionTree {
HAS_RELATION_SCAN = 14,
UNION = 15,
MULTICOLUMN_JOIN = 16,
TRANSITIVE_PATH = 17
TRANSITIVE_PATH = 17,
VALUES = 18
};

void setOperation(OperationType type, std::shared_ptr<Operation> op);
Expand Down
43 changes: 28 additions & 15 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "TransitivePath.h"
#include "TwoColumnJoin.h"
#include "Union.h"
#include "Values.h"

// _____________________________________________________________________________
QueryPlanner::QueryPlanner(QueryExecutionContext* qec)
Expand Down Expand Up @@ -308,7 +309,8 @@ std::vector<QueryPlanner::SubtreePlan> QueryPlanner::optimize(
// Cycles have to be avoided (by previously removing a triple and using it
// as a filter later on).
std::vector<SubtreePlan> lastRow =
fillDpTab(tg, pattern->_filters, childPlans).back();
fillDpTab(tg, pattern->_filters, childPlans, pattern->_inlineValues)
.back();

if (pattern == rootPattern) {
return lastRow;
Expand Down Expand Up @@ -840,16 +842,28 @@ QueryPlanner::TripleGraph QueryPlanner::createTripleGraph(
// _____________________________________________________________________________
vector<QueryPlanner::SubtreePlan> QueryPlanner::seedWithScansAndText(
const QueryPlanner::TripleGraph& tg,
const vector<const QueryPlanner::SubtreePlan*>& children) {
const vector<const QueryPlanner::SubtreePlan*>& children,
const vector<SparqlValues>& values) {
vector<SubtreePlan> seeds;
// add all child plans as seeds
uint32_t idShift = tg._nodeMap.size();
uint64_t idShift = tg._nodeMap.size();
for (const SubtreePlan* plan : children) {
SubtreePlan newIdPlan = *plan;
// give the plan a unique id bit
newIdPlan._idsOfIncludedNodes = size_t(1) << idShift;
newIdPlan._idsOfIncludedNodes = uint64_t(1) << idShift;
newIdPlan._idsOfIncludedFilters = 0;
seeds.push_back(newIdPlan);
seeds.emplace_back(newIdPlan);
idShift++;
}
for (const SparqlValues& val : values) {
SubtreePlan valuesPlan(_qec);
std::shared_ptr<Values> op = std::make_shared<Values>(_qec, val);
valuesPlan._qet->setOperation(QueryExecutionTree::OperationType::VALUES,
op);
valuesPlan._qet->setVariableColumns(op->getVariableColumns());
valuesPlan._idsOfIncludedNodes = uint64_t(1) << idShift;
valuesPlan._idsOfIncludedFilters = 0;
seeds.emplace_back(valuesPlan);
idShift++;
}
for (size_t i = 0; i < tg._nodeMap.size(); ++i) {
Expand Down Expand Up @@ -2118,16 +2132,16 @@ std::shared_ptr<Operation> QueryPlanner::createFilterOperation(
// _____________________________________________________________________________
vector<vector<QueryPlanner::SubtreePlan>> QueryPlanner::fillDpTab(
const QueryPlanner::TripleGraph& tg, const vector<SparqlFilter>& filters,
const vector<const QueryPlanner::SubtreePlan*>& children) {
LOG(TRACE) << "Fill DP table... (there are "
<< tg._nodeMap.size() + children.size() << " triples to join)"
<< std::endl;
const vector<const QueryPlanner::SubtreePlan*>& children,
const vector<SparqlValues>& values) {
size_t numSeeds = tg._nodeMap.size() + children.size() + values.size();
LOG(TRACE) << "Fill DP table... (there are " << numSeeds
<< " operations to join)" << std::endl;
vector<vector<SubtreePlan>> dpTab;
dpTab.emplace_back(seedWithScansAndText(tg, children));
applyFiltersIfPossible(dpTab.back(), filters,
tg._nodeMap.size() + children.size() == 1);
dpTab.emplace_back(seedWithScansAndText(tg, children, values));
applyFiltersIfPossible(dpTab.back(), filters, numSeeds == 1);

for (size_t k = 2; k <= tg._nodeMap.size() + children.size(); ++k) {
for (size_t k = 2; k <= numSeeds; ++k) {
LOG(TRACE) << "Producing plans that unite " << k << " triples."
<< std::endl;
dpTab.emplace_back(vector<SubtreePlan>());
Expand All @@ -2137,8 +2151,7 @@ vector<vector<QueryPlanner::SubtreePlan>> QueryPlanner::fillDpTab(
continue;
}
dpTab[k - 1].insert(dpTab[k - 1].end(), newPlans.begin(), newPlans.end());
applyFiltersIfPossible(dpTab.back(), filters,
tg._nodeMap.size() + children.size() == k);
applyFiltersIfPossible(dpTab.back(), filters, numSeeds == k);
}
if (dpTab[k - 1].size() == 0) {
AD_THROW(ad_semsearch::Exception::BAD_QUERY,
Expand Down
6 changes: 4 additions & 2 deletions src/engine/QueryPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ class QueryPlanner {
*/
vector<SubtreePlan> seedWithScansAndText(
const TripleGraph& tg,
const vector<const QueryPlanner::SubtreePlan*>& children);
const vector<const QueryPlanner::SubtreePlan*>& children,
const vector<SparqlValues>& values);

/**
* @brief Returns a subtree plan that will compute the values for the
Expand Down Expand Up @@ -276,7 +277,8 @@ class QueryPlanner {

vector<vector<SubtreePlan>> fillDpTab(
const TripleGraph& graph, const vector<SparqlFilter>& fs,
const vector<const SubtreePlan*>& children);
const vector<const SubtreePlan*>& children,
const vector<SparqlValues>& values);

size_t getTextLimit(const string& textLimitString) const;

Expand Down
153 changes: 153 additions & 0 deletions src/engine/Values.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// Copyright 2019, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Florian Kramer (florian.kramer@netpun.uni-freiburg.de)

#include "Values.h"

#include <sstream>
#include "../util/Exception.h"
#include "../util/HashSet.h"
#include "CallFixedSize.h"

Values::Values(QueryExecutionContext* qec, const SparqlValues& values)
: Operation(qec), _values(values) {}

string Values::asString(size_t indent) const {
std::ostringstream os;
for (size_t i = 0; i < indent; ++i) {
os << " ";
}
os << "VALUES (";
for (size_t i = 0; i < _values._variables.size(); i++) {
os << _values._variables[i];
if (i + 1 < _values._variables.size()) {
os << " ";
}
}
os << ") {";
for (size_t i = 0; i < _values._values.size(); i++) {
const vector<string>& v = _values._values[i];
os << "(";
for (size_t j = 0; j < v.size(); j++) {
os << v[j];
if (j + 1 < v.size()) {
os << " ";
}
}
os << ")";
if (i + 1 < _values._variables.size()) {
os << " ";
}
}
os << "}";
return os.str();
}

string Values::getDescriptor() const {
std::ostringstream os;
os << "Values with variables ";
for (size_t i = 0; i < _values._variables.size(); i++) {
os << _values._variables[i];
if (i + 1 < _values._variables.size()) {
os << " ";
}
}
os << " and values ";
for (size_t i = 0; i < _values._values.size(); i++) {
const vector<string>& v = _values._values[i];
os << "(";
for (size_t j = 0; j < v.size(); j++) {
os << v[j];
if (j + 1 < v.size()) {
os << " ";
}
}
os << ")";
if (i + 1 < _values._variables.size()) {
os << " ";
}
}
return os.str();
}

size_t Values::getResultWidth() const { return _values._variables.size(); }

vector<size_t> Values::resultSortedOn() const { return {}; }

ad_utility::HashMap<string, size_t> Values::getVariableColumns() const {
ad_utility::HashMap<string, size_t> map;
for (size_t i = 0; i < _values._variables.size(); i++) {
map[_values._variables[i]] = i;
}
return map;
}

float Values::getMultiplicity(size_t col) {
if (_multiplicities.empty()) {
computeMultiplicities();
}
if (col < _multiplicities.size()) {
return _multiplicities[col];
}
return 1;
}

size_t Values::getSizeEstimate() { return _values._values.size(); }

size_t Values::getCostEstimate() { return _values._values.size(); }

void Values::computeMultiplicities() {
if (_values._variables.empty()) {
// If the result is empty we still add a column to the multiplicities to
// mark them as computed.
_multiplicities.resize(1, 1);
return;
}
_multiplicities.resize(_values._variables.size());
ad_utility::HashSet<string> values;
for (size_t col = 0; col < _values._variables.size(); col++) {
values.clear();
size_t count = 0;
size_t distinct = 0;
for (size_t j = 0; j < _values._values.size(); j++) {
const std::string& v = _values._values[j][col];
count++;
if (values.count(v) == 0) {
distinct++;
values.insert(v);
}
}
_multiplicities[col] = double(count) / distinct;
}
}

void Values::computeResult(ResultTable* result) {
const Index& index = getIndex();

result->_sortedBy = resultSortedOn();
result->_data.setCols(getResultWidth());
result->_resultTypes.resize(_values._variables.size(),
ResultTable::ResultType::KB);

size_t resWidth = getResultWidth();
CALL_FIXED_SIZE_1(resWidth, writeValues, &result->_data, index, _values);
}

template <size_t I>
void Values::writeValues(IdTable* res, const Index& index,
const SparqlValues& values) {
IdTableStatic<I> result = res->moveToStatic<I>();
result.resize(values._values.size());
for (size_t rowIdx = 0; rowIdx < values._values.size(); rowIdx++) {
const vector<string> row = values._values[rowIdx];
for (size_t colIdx = 0; colIdx < result.cols(); colIdx++) {
size_t id;
if (!index.getVocab().getId(row[colIdx], &id)) {
AD_THROW(ad_semsearch::Exception::BAD_INPUT,
"The word " + row[colIdx] + "is not part of the vocabulary.")
}
result(rowIdx, colIdx) = id;
}
}
*res = result.moveToDynamic();
}
47 changes: 47 additions & 0 deletions src/engine/Values.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright 2019, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Florian Kramer (florian.kramer@netpun.uni-freiburg.de)

#pragma once

#include "../parser/ParsedQuery.h"
#include "Operation.h"

class Values : public Operation {
public:
Values(QueryExecutionContext* qec, const SparqlValues& values);

virtual string asString(size_t indent = 0) const override;

virtual string getDescriptor() const override;

virtual size_t getResultWidth() const override;

virtual vector<size_t> resultSortedOn() const override;

ad_utility::HashMap<string, size_t> getVariableColumns() const;

virtual void setTextLimit(size_t limit) override { (void)limit; }

virtual bool knownEmptyResult() override {
return _values._variables.empty() || _values._values.empty();
}

virtual float getMultiplicity(size_t col) override;

virtual size_t getSizeEstimate() override;

virtual size_t getCostEstimate() override;

private:
void computeMultiplicities();
std::vector<size_t> _multiplicities;

SparqlValues _values;

virtual void computeResult(ResultTable* result) override;

template <size_t I>
static void writeValues(IdTable* res, const Index& index,
const SparqlValues& values);
};

0 comments on commit 363544e

Please sign in to comment.