Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Starting 'Select *' implementation #546

Merged
merged 14 commits into from
Jan 27, 2022
Merged
8 changes: 4 additions & 4 deletions e2e/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ PROJECT_DIR=$(readlink -f -- "$(dirname ${BASH_SOURCE[0]})/..")

# Change to the project directory so we can use simple relative paths
echo "Changing to project directory: $PROJECT_DIR"
pushd $PROJECT_DIR
pushd "$PROJECT_DIR"
BINARY_DIR=$(readlink -f -- ./build)
if [ ! -e $BINARY_DIR ]; then
if [ ! -e "$BINARY_DIR" ]; then
BINARY_DIR=$(readlink -f -- .)
fi
echo "Binary dir is $BINARY_DIR"
Expand All @@ -88,7 +88,7 @@ mkdir -p "$INDEX_DIR"
# Travis' caching creates it
if [ ! -e "$INPUT.nt" ]; then
# Why the hell is this a ZIP that can't easily be decompressed from stdin?!?
unzip -j $ZIPPED_INPUT -d "$INPUT_DIR/"
unzip -j "$ZIPPED_INPUT" -d "$INPUT_DIR/"
fi;


Expand Down Expand Up @@ -123,5 +123,5 @@ echo "Waiting for ServerMain to launch and open port"
while ! curl --max-time 1 --output /dev/null --silent http://localhost:9099/; do
sleep 1
done
$PYTHON_BINARY "$PROJECT_DIR/e2e/queryit.py" "$PROJECT_DIR/e2e/scientists_queries.yaml" "http://localhost:9099" &> $BINARY_DIR/query_log.txt || bail "Querying Server failed"
$PYTHON_BINARY "$PROJECT_DIR/e2e/queryit.py" "$PROJECT_DIR/e2e/scientists_queries.yaml" "http://localhost:9099" &> "$BINARY_DIR/query_log.txt" || bail "Querying Server failed"
popd
84 changes: 55 additions & 29 deletions src/engine/QueryExecutionTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <sstream>
#include <string>
#include <utility>
#include <variant>

#include "../parser/RdfEscaping.h"
#include "./Distinct.h"
Expand Down Expand Up @@ -91,22 +92,29 @@ void QueryExecutionTree::setVariableColumns(
// _____________________________________________________________________________
template <QueryExecutionTree::ExportSubFormat format>
ad_utility::stream_generator::stream_generator
QueryExecutionTree::generateResults(const vector<string>& selectVars,
QueryExecutionTree::generateResults(const SelectedVarsOrAsterisk & selectVarsOrAsterisk,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the type is now called SelectedVarsOrAsterisk
the corresponding variables should also be called selectedVarsOrAsterisk
(vs. selectVarsOrAsterisk)

size_t limit, size_t offset) const {
// They may trigger computation (but does not have to).
shared_ptr<const ResultTable> resultTable = getResult();
LOG(DEBUG) << "Resolving strings for finished binary result...\n";
vector<std::optional<pair<size_t, ResultTable::ResultType>>> validIndices;
for (auto var : selectVars) {
if (ad_utility::startsWith(var, "TEXT(")) {
var = var.substr(5, var.rfind(')') - 5);
}
auto it = getVariableColumns().find(var);
if (it != getVariableColumns().end()) {
validIndices.push_back(pair<size_t, ResultTable::ResultType>(
it->second, resultTable->getResultType(it->second)));
} else {
validIndices.push_back(std::nullopt);
if(selectVarsOrAsterisk.isAsterisk()) {
for(const auto& elem : getVariableColumns())
validIndices.emplace_back(pair<size_t, ResultTable::ResultType>(
joka921 marked this conversation as resolved.
Show resolved Hide resolved
elem.second, resultTable->getResultType(elem.second)));
}
else {
for (auto var : selectVarsOrAsterisk.getSelectVariables()) {
if (ad_utility::startsWith(var, "TEXT(")) {
var = var.substr(5, var.rfind(')') - 5);
}
auto it = getVariableColumns().find(var);
if (it != getVariableColumns().end()) {
validIndices.emplace_back(pair<size_t, ResultTable::ResultType>(
it->second, resultTable->getResultType(it->second)));
} else {
validIndices.emplace_back(std::nullopt);
}
}
}
if (validIndices.empty()) {
Expand All @@ -123,48 +131,56 @@ QueryExecutionTree::generateResults(const vector<string>& selectVars,

template ad_utility::stream_generator::stream_generator
QueryExecutionTree::generateResults<QueryExecutionTree::ExportSubFormat::CSV>(
const vector<string>& selectVars, size_t limit, size_t offset) const;
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit, size_t offset) const;

template ad_utility::stream_generator::stream_generator
QueryExecutionTree::generateResults<QueryExecutionTree::ExportSubFormat::TSV>(
const vector<string>& selectVars, size_t limit, size_t offset) const;
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit, size_t offset) const;

template ad_utility::stream_generator::stream_generator QueryExecutionTree::
generateResults<QueryExecutionTree::ExportSubFormat::BINARY>(
const vector<string>& selectVars, size_t limit, size_t offset) const;
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit, size_t offset) const;

// ___________________________________________________________________________
QueryExecutionTree::ColumnIndicesAndTypes
QueryExecutionTree::selectedVariablesToColumnIndices(
const std::vector<string>& selectVariables,
const SelectedVarsOrAsterisk & selectVarsOrAsterisk,
const ResultTable& resultTable) const {
ColumnIndicesAndTypes exportColumns;
for (auto var : selectVariables) {
if (ad_utility::startsWith(var, "TEXT(")) {
var = var.substr(5, var.rfind(')') - 5);
}
if (getVariableColumns().contains(var)) {
auto columnIndex = getVariableColumns().at(var);
if(selectVarsOrAsterisk.isAsterisk()) {
for(const auto& elem: getVariableColumns()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a further improvement:
the type of elem here is a pair of (std::string, Index) (the index is an unsigned integer).
The string is the Variable (the keys of the hash map).

So you can further Improve this to
for (const auto& [variable, columnIndex]) {.... }

here variable is what previously was elem.first and columnIndex is variable.second.

This is a C++17 feature called "structured binding" in case you want to research it.

exportColumns.push_back(VariableAndColumnIndex{
var, columnIndex, resultTable.getResultType(columnIndex)});
} else {
exportColumns.emplace_back(std::nullopt);
elem.first, elem.second, resultTable.getResultType(elem.second)});
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you sort here by the variable (currently "elem.first", then the output order becomes
deterministic for the asterisk (easiest way for the e2e tests).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course it should then be commented, that these deterministic orders are not required by the standard.

}
}
else {
for (auto var : selectVarsOrAsterisk.getSelectVariables()) {
if (ad_utility::startsWith(var, "TEXT(")) {
var = var.substr(5, var.rfind(')') - 5);
}
if (getVariableColumns().contains(var)) {
auto columnIndex = getVariableColumns().at(var);
exportColumns.push_back(VariableAndColumnIndex{
var, columnIndex, resultTable.getResultType(columnIndex)});
} else {
exportColumns.emplace_back(std::nullopt);
}
}
}
return exportColumns;
}

// _____________________________________________________________________________
nlohmann::json QueryExecutionTree::writeResultAsQLeverJson(
const vector<string>& selectVars, size_t limit, size_t offset,
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit, size_t offset,
shared_ptr<const ResultTable> resultTable) const {
// They may trigger computation (but does not have to).
if (!resultTable) {
resultTable = getResult();
}
LOG(DEBUG) << "Resolving strings for finished binary result...\n";
ColumnIndicesAndTypes validIndices =
selectedVariablesToColumnIndices(selectVars, *resultTable);
selectedVariablesToColumnIndices(selectVarsOrAsterisk, *resultTable);
if (validIndices.empty()) {
return {std::vector<std::string>()};
}
Expand All @@ -175,7 +191,7 @@ nlohmann::json QueryExecutionTree::writeResultAsQLeverJson(

// _____________________________________________________________________________
nlohmann::json QueryExecutionTree::writeResultAsSparqlJson(
const vector<string>& selectVars, size_t limit, size_t offset,
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit, size_t offset,
shared_ptr<const ResultTable> resultTable) const {
using nlohmann::json;

Expand All @@ -186,7 +202,7 @@ nlohmann::json QueryExecutionTree::writeResultAsSparqlJson(
LOG(DEBUG) << "Finished computing the query result in the ID space. "
"Resolving strings in result...\n";
ColumnIndicesAndTypes columns =
selectedVariablesToColumnIndices(selectVars, *resultTable);
selectedVariablesToColumnIndices(selectVarsOrAsterisk, *resultTable);

std::erase(columns, std::nullopt);

Expand All @@ -197,7 +213,17 @@ nlohmann::json QueryExecutionTree::writeResultAsSparqlJson(
const IdTable& idTable = resultTable->_idTable;

json result;
result["head"]["vars"] = selectVars;

if(selectVarsOrAsterisk.isAsterisk()) {
vector<string> vars_names;
for(auto const& varName_index: getVariableColumns()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Also consider the structured binding here.

  2. This only works, if getVariableColumns() (a hash map) alsways gives us the results in the same order. This should "typically" work, but in general you should either sort them consistently, or write exactly the variables which you have extracted here to the selectedVarsOrAsterisk

vars_names.push_back(varName_index.first);
}
result["head"]["vars"] = vars_names;
}
else {
result["head"]["vars"] = selectVarsOrAsterisk.getSelectVariables();
}

json bindings = json::array();

Expand Down
11 changes: 7 additions & 4 deletions src/engine/QueryExecutionTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "../util/streamable_generator.h"
#include "./Operation.h"
#include "./QueryExecutionContext.h"
#include "../parser/ParsedQuery.h"

using std::shared_ptr;
using std::string;
Expand Down Expand Up @@ -101,17 +102,19 @@ class QueryExecutionTree {
ResultTable::ResultType _resultType;
};

using SelectedVarsOrAsterisk = ParsedQuery::SelectedVarsOrAsterisk;

using ColumnIndicesAndTypes = vector<std::optional<VariableAndColumnIndex>>;

// Returns a vector where the i-th element contains the column index and
// `ResultType` of the i-th `selectVariable` in the `resultTable`
ColumnIndicesAndTypes selectedVariablesToColumnIndices(
const std::vector<string>& selectVariables,
const SelectedVarsOrAsterisk & selectVarsOrAsterisk,
const ResultTable& resultTable) const;

template <ExportSubFormat format>
ad_utility::stream_generator::stream_generator generateResults(
const vector<string>& selectVars, size_t limit = MAX_NOF_ROWS_IN_RESULT,
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit = MAX_NOF_ROWS_IN_RESULT,
size_t offset = 0) const;

// Generate an RDF graph in turtle format for a CONSTRUCT query.
Expand All @@ -131,11 +134,11 @@ class QueryExecutionTree {
size_t offset, std::shared_ptr<const ResultTable> res) const;

nlohmann::json writeResultAsQLeverJson(
const vector<string>& selectVars, size_t limit, size_t offset,
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit, size_t offset,
shared_ptr<const ResultTable> resultTable = nullptr) const;

nlohmann::json writeResultAsSparqlJson(
const vector<string>& selectVars, size_t limit, size_t offset,
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit, size_t offset,
shared_ptr<const ResultTable> preComputedResult = nullptr) const;

const std::vector<size_t>& resultSortedOn() const {
Expand Down
70 changes: 42 additions & 28 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,16 +499,24 @@ bool QueryPlanner::checkUsePatternTrick(

// check that all selected variables are outputs of
// CountAvailablePredicates
joka921 marked this conversation as resolved.
Show resolved Hide resolved
for (const std::string& s : selectClause._selectedVariables) {
if (s != t._o && s != count_var_name) {
usePatternTrick = false;
break;
}
/*
if(selectClause._varsOrAsterisk.isAsterisk()) {
return false;
}
if (!usePatternTrick) {
continue;
*/
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this commented out?
it should work as expected.
You can then immediately assign
the selectedVariables to a variable
const auto& selectedVariables = ....getSelectVariables() and use this below to make the code
much more readable.

if(selectClause._varsOrAsterisk.isVariables()) {
for (const std::string& s : selectClause._varsOrAsterisk.getSelectVariables()) {
if (s != t._o && s != count_var_name) {
usePatternTrick = false;
break;
}
}
if (!usePatternTrick) {
continue;
}
}


// Check for triples containing the ql:has-predicate triple's
// object.
for (auto& otherChild : pq->children()) {
Expand Down Expand Up @@ -568,10 +576,12 @@ bool QueryPlanner::checkUsePatternTrick(
return;
}
const auto& selectClause = arg._subquery.selectClause();
for (const auto& v : selectClause._selectedVariables) {
if (v == t._o) {
usePatternTrick = false;
break;
if(selectClause._varsOrAsterisk.isVariables()) {
for (const auto& v : selectClause._varsOrAsterisk.getSelectVariables()) {
if (v == t._o) {
usePatternTrick = false;
break;
}
}
}
} else if constexpr (std::is_same_v<T, GraphPatternOperation::Bind>) {
Expand Down Expand Up @@ -620,10 +630,12 @@ bool QueryPlanner::checkUsePatternTrick(
return;
}
const auto& selectClause = arg._subquery.selectClause();
for (const auto& v : selectClause._selectedVariables) {
if (v == t._o) {
usePatternTrick = false;
break;
if(selectClause._varsOrAsterisk.isVariables()) {
for (const auto& v : selectClause._varsOrAsterisk.getSelectVariables()) {
if (v == t._o) {
usePatternTrick = false;
break;
}
}
}
} else if constexpr (std::is_same_v<T, GraphPatternOperation::
Expand Down Expand Up @@ -828,25 +840,27 @@ vector<QueryPlanner::SubtreePlan> QueryPlanner::getDistinctRow(
vector<size_t> keepIndices;
ad_utility::HashSet<size_t> indDone;
const auto& colMap = parent._qet->getVariableColumns();
for (const auto& var : selectClause._selectedVariables) {
const auto it = colMap.find(var);
if (it != colMap.end()) {
auto ind = it->second;
if (indDone.count(ind) == 0) {
keepIndices.push_back(ind);
indDone.insert(ind);
}
} else if (ad_utility::startsWith(var, "SCORE(") ||
ad_utility::startsWith(var, "TEXT(")) {
auto varInd = var.find('?');
auto cVar = var.substr(varInd, var.rfind(')') - varInd);
const auto it = colMap.find(cVar);
if(selectClause._varsOrAsterisk.isVariables()){
for (const auto& var : selectClause._varsOrAsterisk.getSelectVariables()) {
const auto it = colMap.find(var);
if (it != colMap.end()) {
auto ind = it->second;
if (indDone.count(ind) == 0) {
keepIndices.push_back(ind);
indDone.insert(ind);
}
} else if (ad_utility::startsWith(var, "SCORE(") ||
ad_utility::startsWith(var, "TEXT(")) {
auto varInd = var.find('?');
auto cVar = var.substr(varInd, var.rfind(')') - varInd);
const auto it = colMap.find(cVar);
if (it != colMap.end()) {
auto ind = it->second;
if (indDone.count(ind) == 0) {
keepIndices.push_back(ind);
indDone.insert(ind);
}
}
}
}
}
Expand Down
24 changes: 17 additions & 7 deletions src/engine/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,19 @@ Awaitable<json> Server::composeResponseQleverJson(
j["query"] = query._originalString;
j["status"] = "OK";
j["warnings"] = qet.collectWarnings();
j["selected"] =
query.hasSelectClause()
? query.selectClause()._selectedVariables
: std::vector<std::string>{"?subject", "?predicate", "?object"};
if(query.hasSelectClause()){
if(query.selectClause()._varsOrAsterisk.isAsterisk()) {
string str;
str+=(query.selectClause()._varsOrAsterisk.getAsterisk());
j["selected"] = std::vector<std::string>{str};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here you also need the variable names in the (correct!) order,

and not only the asterisk.

}
else {
j["selected"] = query.selectClause()._varsOrAsterisk.getSelectVariables();
}
}
else {
j["selected"] = std::vector<std::string>{"?subject", "?predicate", "?object"};
}

j["runtimeInformation"] = RuntimeInformation::ordered_json(
qet.getRootOperation()->getRuntimeInfo());
Expand All @@ -184,7 +193,7 @@ Awaitable<json> Server::composeResponseQleverJson(
requestTimer.cont();
j["res"] = query.hasSelectClause()
? qet.writeResultAsQLeverJson(
query.selectClause()._selectedVariables, limit,
query.selectClause()._varsOrAsterisk, limit,
offset, std::move(resultTable))
: qet.writeRdfGraphJson(query.constructClause(), limit,
offset, std::move(resultTable));
Expand Down Expand Up @@ -220,8 +229,9 @@ Awaitable<json> Server::composeResponseSparqlJson(
std::min(query._limit.value_or(MAX_NOF_ROWS_IN_RESULT), maxSend);
size_t offset = query._offset.value_or(0);
requestTimer.cont();
j = qet.writeResultAsSparqlJson(query.selectClause()._selectedVariables,
j = qet.writeResultAsSparqlJson(query.selectClause()._varsOrAsterisk,
limit, offset, std::move(resultTable));

requestTimer.stop();
return j;
};
Expand All @@ -238,7 +248,7 @@ Server::composeResponseSepValues(const ParsedQuery& query,
size_t offset = query._offset.value_or(0);
return query.hasSelectClause()
? qet.generateResults<format>(
query.selectClause()._selectedVariables, limit, offset)
query.selectClause()._varsOrAsterisk, limit, offset)
: qet.writeRdfGraphSeparatedValues<format>(
query.constructClause(), limit, offset, qet.getResult());
};
Expand Down