-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
649b434
commit 363544e
Showing
9 changed files
with
338 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
// Copyright 2019, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Florian Kramer (florian.kramer@netpun.uni-freiburg.de) | ||
|
||
#include "Values.h" | ||
|
||
#include <sstream> | ||
#include "../util/Exception.h" | ||
#include "../util/HashSet.h" | ||
#include "CallFixedSize.h" | ||
|
||
Values::Values(QueryExecutionContext* qec, const SparqlValues& values) | ||
: Operation(qec), _values(values) {} | ||
|
||
string Values::asString(size_t indent) const { | ||
std::ostringstream os; | ||
for (size_t i = 0; i < indent; ++i) { | ||
os << " "; | ||
} | ||
os << "VALUES ("; | ||
for (size_t i = 0; i < _values._variables.size(); i++) { | ||
os << _values._variables[i]; | ||
if (i + 1 < _values._variables.size()) { | ||
os << " "; | ||
} | ||
} | ||
os << ") {"; | ||
for (size_t i = 0; i < _values._values.size(); i++) { | ||
const vector<string>& v = _values._values[i]; | ||
os << "("; | ||
for (size_t j = 0; j < v.size(); j++) { | ||
os << v[j]; | ||
if (j + 1 < v.size()) { | ||
os << " "; | ||
} | ||
} | ||
os << ")"; | ||
if (i + 1 < _values._variables.size()) { | ||
os << " "; | ||
} | ||
} | ||
os << "}"; | ||
return os.str(); | ||
} | ||
|
||
string Values::getDescriptor() const { | ||
std::ostringstream os; | ||
os << "Values with variables "; | ||
for (size_t i = 0; i < _values._variables.size(); i++) { | ||
os << _values._variables[i]; | ||
if (i + 1 < _values._variables.size()) { | ||
os << " "; | ||
} | ||
} | ||
os << " and values "; | ||
for (size_t i = 0; i < _values._values.size(); i++) { | ||
const vector<string>& v = _values._values[i]; | ||
os << "("; | ||
for (size_t j = 0; j < v.size(); j++) { | ||
os << v[j]; | ||
if (j + 1 < v.size()) { | ||
os << " "; | ||
} | ||
} | ||
os << ")"; | ||
if (i + 1 < _values._variables.size()) { | ||
os << " "; | ||
} | ||
} | ||
return os.str(); | ||
} | ||
|
||
size_t Values::getResultWidth() const { return _values._variables.size(); } | ||
|
||
vector<size_t> Values::resultSortedOn() const { return {}; } | ||
|
||
ad_utility::HashMap<string, size_t> Values::getVariableColumns() const { | ||
ad_utility::HashMap<string, size_t> map; | ||
for (size_t i = 0; i < _values._variables.size(); i++) { | ||
map[_values._variables[i]] = i; | ||
} | ||
return map; | ||
} | ||
|
||
float Values::getMultiplicity(size_t col) { | ||
if (_multiplicities.empty()) { | ||
computeMultiplicities(); | ||
} | ||
if (col < _multiplicities.size()) { | ||
return _multiplicities[col]; | ||
} | ||
return 1; | ||
} | ||
|
||
size_t Values::getSizeEstimate() { return _values._values.size(); } | ||
|
||
size_t Values::getCostEstimate() { return _values._values.size(); } | ||
|
||
void Values::computeMultiplicities() { | ||
if (_values._variables.empty()) { | ||
// If the result is empty we still add a column to the multiplicities to | ||
// mark them as computed. | ||
_multiplicities.resize(1, 1); | ||
return; | ||
} | ||
_multiplicities.resize(_values._variables.size()); | ||
ad_utility::HashSet<string> values; | ||
for (size_t col = 0; col < _values._variables.size(); col++) { | ||
values.clear(); | ||
size_t count = 0; | ||
size_t distinct = 0; | ||
for (size_t j = 0; j < _values._values.size(); j++) { | ||
const std::string& v = _values._values[j][col]; | ||
count++; | ||
if (values.count(v) == 0) { | ||
distinct++; | ||
values.insert(v); | ||
} | ||
} | ||
_multiplicities[col] = double(count) / distinct; | ||
} | ||
} | ||
|
||
void Values::computeResult(ResultTable* result) { | ||
const Index& index = getIndex(); | ||
|
||
result->_sortedBy = resultSortedOn(); | ||
result->_data.setCols(getResultWidth()); | ||
result->_resultTypes.resize(_values._variables.size(), | ||
ResultTable::ResultType::KB); | ||
|
||
size_t resWidth = getResultWidth(); | ||
CALL_FIXED_SIZE_1(resWidth, writeValues, &result->_data, index, _values); | ||
} | ||
|
||
template <size_t I> | ||
void Values::writeValues(IdTable* res, const Index& index, | ||
const SparqlValues& values) { | ||
IdTableStatic<I> result = res->moveToStatic<I>(); | ||
result.resize(values._values.size()); | ||
for (size_t rowIdx = 0; rowIdx < values._values.size(); rowIdx++) { | ||
const vector<string> row = values._values[rowIdx]; | ||
for (size_t colIdx = 0; colIdx < result.cols(); colIdx++) { | ||
size_t id; | ||
if (!index.getVocab().getId(row[colIdx], &id)) { | ||
AD_THROW(ad_semsearch::Exception::BAD_INPUT, | ||
"The word " + row[colIdx] + "is not part of the vocabulary.") | ||
} | ||
result(rowIdx, colIdx) = id; | ||
} | ||
} | ||
*res = result.moveToDynamic(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// Copyright 2019, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Florian Kramer (florian.kramer@netpun.uni-freiburg.de) | ||
|
||
#pragma once | ||
|
||
#include "../parser/ParsedQuery.h" | ||
#include "Operation.h" | ||
|
||
class Values : public Operation { | ||
public: | ||
Values(QueryExecutionContext* qec, const SparqlValues& values); | ||
|
||
virtual string asString(size_t indent = 0) const override; | ||
|
||
virtual string getDescriptor() const override; | ||
|
||
virtual size_t getResultWidth() const override; | ||
|
||
virtual vector<size_t> resultSortedOn() const override; | ||
|
||
ad_utility::HashMap<string, size_t> getVariableColumns() const; | ||
|
||
virtual void setTextLimit(size_t limit) override { (void)limit; } | ||
|
||
virtual bool knownEmptyResult() override { | ||
return _values._variables.empty() || _values._values.empty(); | ||
} | ||
|
||
virtual float getMultiplicity(size_t col) override; | ||
|
||
virtual size_t getSizeEstimate() override; | ||
|
||
virtual size_t getCostEstimate() override; | ||
|
||
private: | ||
void computeMultiplicities(); | ||
std::vector<size_t> _multiplicities; | ||
|
||
SparqlValues _values; | ||
|
||
virtual void computeResult(ResultTable* result) override; | ||
|
||
template <size_t I> | ||
static void writeValues(IdTable* res, const Index& index, | ||
const SparqlValues& values); | ||
}; |
Oops, something went wrong.