Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Starting 'Select *' implementation #546

Merged
merged 14 commits into from
Jan 27, 2022
Merged
8 changes: 4 additions & 4 deletions e2e/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ PROJECT_DIR=$(readlink -f -- "$(dirname ${BASH_SOURCE[0]})/..")

# Change to the project directory so we can use simple relative paths
echo "Changing to project directory: $PROJECT_DIR"
pushd $PROJECT_DIR
pushd "$PROJECT_DIR"
BINARY_DIR=$(readlink -f -- ./build)
if [ ! -e $BINARY_DIR ]; then
if [ ! -e "$BINARY_DIR" ]; then
BINARY_DIR=$(readlink -f -- .)
fi
echo "Binary dir is $BINARY_DIR"
Expand All @@ -88,7 +88,7 @@ mkdir -p "$INDEX_DIR"
# Travis' caching creates it
if [ ! -e "$INPUT.nt" ]; then
# Why the hell is this a ZIP that can't easily be decompressed from stdin?!?
unzip -j $ZIPPED_INPUT -d "$INPUT_DIR/"
unzip -j "$ZIPPED_INPUT" -d "$INPUT_DIR/"
fi;


Expand Down Expand Up @@ -123,5 +123,5 @@ echo "Waiting for ServerMain to launch and open port"
while ! curl --max-time 1 --output /dev/null --silent http://localhost:9099/; do
sleep 1
done
$PYTHON_BINARY "$PROJECT_DIR/e2e/queryit.py" "$PROJECT_DIR/e2e/scientists_queries.yaml" "http://localhost:9099" &> $BINARY_DIR/query_log.txt || bail "Querying Server failed"
$PYTHON_BINARY "$PROJECT_DIR/e2e/queryit.py" "$PROJECT_DIR/e2e/scientists_queries.yaml" "http://localhost:9099" &> "$BINARY_DIR/query_log.txt" || bail "Querying Server failed"
popd
100 changes: 72 additions & 28 deletions src/engine/QueryExecutionTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <sstream>
#include <string>
#include <utility>
#include <variant>

#include "../parser/RdfEscaping.h"
#include "./Distinct.h"
Expand Down Expand Up @@ -91,22 +92,41 @@ void QueryExecutionTree::setVariableColumns(
// _____________________________________________________________________________
template <QueryExecutionTree::ExportSubFormat format>
ad_utility::stream_generator::stream_generator
QueryExecutionTree::generateResults(const vector<string>& selectVars,
QueryExecutionTree::generateResults(const SelectedVarsOrAsterisk & selectedVarsOrAsterisk,
size_t limit, size_t offset) const {
// They may trigger computation (but does not have to).
shared_ptr<const ResultTable> resultTable = getResult();
LOG(DEBUG) << "Resolving strings for finished binary result...\n";
vector<std::optional<pair<size_t, ResultTable::ResultType>>> validIndices;
for (auto var : selectVars) {
if (ad_utility::startsWith(var, "TEXT(")) {
var = var.substr(5, var.rfind(')') - 5);
if(selectedVarsOrAsterisk.isAsterisk()) {
list<string> orderedVariables = selectedVarsOrAsterisk.retrieveOrder();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

call them orderedVariablesFromQuery instead of orderedVariables

auto allVars = getVariableColumns();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

allVars -> variablesFromExecutionTree

for (const auto& var : orderedVariables) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

var -> variableFromQuery

auto it = allVars.find(var);
if (it != allVars.end()) {
validIndices.emplace_back(pair<size_t, ResultTable::ResultType>(
it->second, resultTable->getResultType(it->second)));
allVars.erase(it);
} else {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This else branch should also warn:
OG(WARN) << "The variable "" << variableFromQuery <<
"" was found in the original, query, but not in the execution tree. "
"This is likely a bug\n";

validIndices.emplace_back(std::nullopt);
}
}
auto it = getVariableColumns().find(var);
if (it != getVariableColumns().end()) {
validIndices.push_back(pair<size_t, ResultTable::ResultType>(
it->second, resultTable->getResultType(it->second)));
} else {
validIndices.push_back(std::nullopt);
for(const auto& var : allVars){
LOG(DEBUG) << "Variable " << var.first << " was not parsed!! \n";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LOG(WARN) << "The variable \"" << var.first << "\" was found in the execution tree, but not in the original query. This is likely a bug\n";

}
}
else {
for (auto var : selectedVarsOrAsterisk.getSelectVariables()) {
if (ad_utility::startsWith(var, "TEXT(")) {
var = var.substr(5, var.rfind(')') - 5);
}
auto it = getVariableColumns().find(var);
if (it != getVariableColumns().end()) {
validIndices.emplace_back(pair<size_t, ResultTable::ResultType>(
it->second, resultTable->getResultType(it->second)));
} else {
validIndices.emplace_back(std::nullopt);
}
}
}
if (validIndices.empty()) {
Expand All @@ -123,48 +143,61 @@ QueryExecutionTree::generateResults(const vector<string>& selectVars,

template ad_utility::stream_generator::stream_generator
QueryExecutionTree::generateResults<QueryExecutionTree::ExportSubFormat::CSV>(
const vector<string>& selectVars, size_t limit, size_t offset) const;
const SelectedVarsOrAsterisk & selectedVarsOrAsterisk, size_t limit, size_t offset) const;

template ad_utility::stream_generator::stream_generator
QueryExecutionTree::generateResults<QueryExecutionTree::ExportSubFormat::TSV>(
const vector<string>& selectVars, size_t limit, size_t offset) const;
const SelectedVarsOrAsterisk & selectedVarsOrAsterisk, size_t limit, size_t offset) const;

template ad_utility::stream_generator::stream_generator QueryExecutionTree::
generateResults<QueryExecutionTree::ExportSubFormat::BINARY>(
const vector<string>& selectVars, size_t limit, size_t offset) const;
const SelectedVarsOrAsterisk & selectedVarsOrAsterisk, size_t limit, size_t offset) const;

// ___________________________________________________________________________
QueryExecutionTree::ColumnIndicesAndTypes
QueryExecutionTree::selectedVariablesToColumnIndices(
const std::vector<string>& selectVariables,
SelectedVarsOrAsterisk selectedVarsOrAsterisk,
const ResultTable& resultTable) const {
ColumnIndicesAndTypes exportColumns;
for (auto var : selectVariables) {
if (ad_utility::startsWith(var, "TEXT(")) {
var = var.substr(5, var.rfind(')') - 5);
if(selectedVarsOrAsterisk.isAsterisk()) {
for(auto var: selectedVarsOrAsterisk.retrieveOrder()){
if (getVariableColumns().contains(var)) {
auto columnIndex = getVariableColumns().at(var);
exportColumns.push_back(VariableAndColumnIndex{
var, columnIndex, resultTable.getResultType(columnIndex)});
} else {
exportColumns.emplace_back(std::nullopt);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also add the complementary warning here.

}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also use your code from above here (including the warning etc.)
We have another PR by someone else upcoming, that will soon remove this duplication.

if (getVariableColumns().contains(var)) {
auto columnIndex = getVariableColumns().at(var);
exportColumns.push_back(VariableAndColumnIndex{
var, columnIndex, resultTable.getResultType(columnIndex)});
} else {
exportColumns.emplace_back(std::nullopt);
}
else {
for (auto var : selectedVarsOrAsterisk.getSelectVariables()) {
if (ad_utility::startsWith(var, "TEXT(")) {
var = var.substr(5, var.rfind(')') - 5);
}
if (getVariableColumns().contains(var)) {
auto columnIndex = getVariableColumns().at(var);
exportColumns.push_back(VariableAndColumnIndex{
var, columnIndex, resultTable.getResultType(columnIndex)});
} else {
exportColumns.emplace_back(std::nullopt);
}
}
}
return exportColumns;
}

// _____________________________________________________________________________
nlohmann::json QueryExecutionTree::writeResultAsQLeverJson(
const vector<string>& selectVars, size_t limit, size_t offset,
const SelectedVarsOrAsterisk & selectedVarsOrAsterisk, size_t limit, size_t offset,
shared_ptr<const ResultTable> resultTable) const {
// They may trigger computation (but does not have to).
if (!resultTable) {
resultTable = getResult();
}
LOG(DEBUG) << "Resolving strings for finished binary result...\n";
ColumnIndicesAndTypes validIndices =
selectedVariablesToColumnIndices(selectVars, *resultTable);
selectedVariablesToColumnIndices(selectedVarsOrAsterisk, *resultTable);
if (validIndices.empty()) {
return {std::vector<std::string>()};
}
Expand All @@ -175,7 +208,7 @@ nlohmann::json QueryExecutionTree::writeResultAsQLeverJson(

// _____________________________________________________________________________
nlohmann::json QueryExecutionTree::writeResultAsSparqlJson(
const vector<string>& selectVars, size_t limit, size_t offset,
const SelectedVarsOrAsterisk & selectedVarsOrAsterisk, size_t limit, size_t offset,
shared_ptr<const ResultTable> resultTable) const {
using nlohmann::json;

Expand All @@ -186,7 +219,7 @@ nlohmann::json QueryExecutionTree::writeResultAsSparqlJson(
LOG(DEBUG) << "Finished computing the query result in the ID space. "
"Resolving strings in result...\n";
ColumnIndicesAndTypes columns =
selectedVariablesToColumnIndices(selectVars, *resultTable);
selectedVariablesToColumnIndices(selectedVarsOrAsterisk, *resultTable);

std::erase(columns, std::nullopt);

Expand All @@ -197,7 +230,17 @@ nlohmann::json QueryExecutionTree::writeResultAsSparqlJson(
const IdTable& idTable = resultTable->_idTable;

json result;
result["head"]["vars"] = selectVars;

if(selectedVarsOrAsterisk.isAsterisk()) {
vector<string> vars_names;
for(auto const & variable: selectedVarsOrAsterisk.retrieveOrder()) {
vars_names.push_back(variable);
}
result["head"]["vars"] = vars_names;
}
else {
result["head"]["vars"] = selectedVarsOrAsterisk.getSelectVariables();
}

json bindings = json::array();

Expand Down Expand Up @@ -242,6 +285,7 @@ nlohmann::json QueryExecutionTree::writeResultAsSparqlJson(
};

for (size_t rowIndex = offset; rowIndex < upperBound; ++rowIndex) {
// Due to be an 'nlohmann' object, object keys are alphabetically sorted!
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace by //TODO: ordered_json` entries are ordered alphabetically, but insertion order would be preferable.

nlohmann::ordered_json binding;
for (const auto& column : columns) {
const auto& currentId = idTable(rowIndex, column->_columnIndex);
Expand Down
11 changes: 7 additions & 4 deletions src/engine/QueryExecutionTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "../util/streamable_generator.h"
#include "./Operation.h"
#include "./QueryExecutionContext.h"
#include "../parser/ParsedQuery.h"

using std::shared_ptr;
using std::string;
Expand Down Expand Up @@ -101,17 +102,19 @@ class QueryExecutionTree {
ResultTable::ResultType _resultType;
};

using SelectedVarsOrAsterisk = ParsedQuery::SelectedVarsOrAsterisk;

using ColumnIndicesAndTypes = vector<std::optional<VariableAndColumnIndex>>;

// Returns a vector where the i-th element contains the column index and
// `ResultType` of the i-th `selectVariable` in the `resultTable`
ColumnIndicesAndTypes selectedVariablesToColumnIndices(
const std::vector<string>& selectVariables,
SelectedVarsOrAsterisk selectedVarsOrAsterisk,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this not a const & anymore?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you modify it in place.

const ResultTable& resultTable) const;

template <ExportSubFormat format>
ad_utility::stream_generator::stream_generator generateResults(
const vector<string>& selectVars, size_t limit = MAX_NOF_ROWS_IN_RESULT,
const SelectedVarsOrAsterisk & selectedVarsOrAsterisk, size_t limit = MAX_NOF_ROWS_IN_RESULT,
size_t offset = 0) const;

// Generate an RDF graph in turtle format for a CONSTRUCT query.
Expand All @@ -131,11 +134,11 @@ class QueryExecutionTree {
size_t offset, std::shared_ptr<const ResultTable> res) const;

nlohmann::json writeResultAsQLeverJson(
const vector<string>& selectVars, size_t limit, size_t offset,
const SelectedVarsOrAsterisk & selectedVarsOrAsterisk, size_t limit, size_t offset,
shared_ptr<const ResultTable> resultTable = nullptr) const;

nlohmann::json writeResultAsSparqlJson(
const vector<string>& selectVars, size_t limit, size_t offset,
const SelectedVarsOrAsterisk & selectedVarsOrAsterisk, size_t limit, size_t offset,
shared_ptr<const ResultTable> preComputedResult = nullptr) const;

const std::vector<size_t>& resultSortedOn() const {
Expand Down
57 changes: 34 additions & 23 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -497,9 +497,14 @@ bool QueryPlanner::checkUsePatternTrick(
continue;
}

// check that all selected variables are outputs of
// Check that all selected variables are outputs of
// CountAvailablePredicates
joka921 marked this conversation as resolved.
Show resolved Hide resolved
for (const std::string& s : selectClause._selectedVariables) {
if(selectClause._varsOrAsterisk.isAsterisk()) {
return false;
}

const auto& selectedVariables = selectClause._varsOrAsterisk.getSelectVariables();
for (const std::string& s : selectedVariables) {
if (s != t._o && s != count_var_name) {
usePatternTrick = false;
break;
Expand Down Expand Up @@ -568,10 +573,12 @@ bool QueryPlanner::checkUsePatternTrick(
return;
}
const auto& selectClause = arg._subquery.selectClause();
for (const auto& v : selectClause._selectedVariables) {
if (v == t._o) {
usePatternTrick = false;
break;
if(selectClause._varsOrAsterisk.isVariables()) {
for (const auto& v : selectClause._varsOrAsterisk.getSelectVariables()) {
if (v == t._o) {
usePatternTrick = false;
break;
}
}
}
} else if constexpr (std::is_same_v<T, GraphPatternOperation::Bind>) {
Expand Down Expand Up @@ -620,10 +627,12 @@ bool QueryPlanner::checkUsePatternTrick(
return;
}
const auto& selectClause = arg._subquery.selectClause();
for (const auto& v : selectClause._selectedVariables) {
if (v == t._o) {
usePatternTrick = false;
break;
if(selectClause._varsOrAsterisk.isVariables()) {
for (const auto& v : selectClause._varsOrAsterisk.getSelectVariables()) {
if (v == t._o) {
usePatternTrick = false;
break;
}
}
}
} else if constexpr (std::is_same_v<T, GraphPatternOperation::
Expand Down Expand Up @@ -828,25 +837,27 @@ vector<QueryPlanner::SubtreePlan> QueryPlanner::getDistinctRow(
vector<size_t> keepIndices;
ad_utility::HashSet<size_t> indDone;
const auto& colMap = parent._qet->getVariableColumns();
for (const auto& var : selectClause._selectedVariables) {
const auto it = colMap.find(var);
if (it != colMap.end()) {
auto ind = it->second;
if (indDone.count(ind) == 0) {
keepIndices.push_back(ind);
indDone.insert(ind);
}
} else if (ad_utility::startsWith(var, "SCORE(") ||
ad_utility::startsWith(var, "TEXT(")) {
auto varInd = var.find('?');
auto cVar = var.substr(varInd, var.rfind(')') - varInd);
const auto it = colMap.find(cVar);
if(selectClause._varsOrAsterisk.isVariables()){
for (const auto& var : selectClause._varsOrAsterisk.getSelectVariables()) {
const auto it = colMap.find(var);
if (it != colMap.end()) {
auto ind = it->second;
if (indDone.count(ind) == 0) {
keepIndices.push_back(ind);
indDone.insert(ind);
}
} else if (ad_utility::startsWith(var, "SCORE(") ||
ad_utility::startsWith(var, "TEXT(")) {
auto varInd = var.find('?');
auto cVar = var.substr(varInd, var.rfind(')') - varInd);
const auto it = colMap.find(cVar);
if (it != colMap.end()) {
auto ind = it->second;
if (indDone.count(ind) == 0) {
keepIndices.push_back(ind);
indDone.insert(ind);
}
}
}
}
}
Expand Down
24 changes: 17 additions & 7 deletions src/engine/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,19 @@ Awaitable<json> Server::composeResponseQleverJson(
j["query"] = query._originalString;
j["status"] = "OK";
j["warnings"] = qet.collectWarnings();
j["selected"] =
query.hasSelectClause()
? query.selectClause()._selectedVariables
: std::vector<std::string>{"?subject", "?predicate", "?object"};
if(query.hasSelectClause()){
if(query.selectClause()._varsOrAsterisk.isAsterisk()) {
auto list = query.selectClause()._varsOrAsterisk.retrieveOrder();
std::vector<string> result{ list.begin(),list.end() };
j["selected"] = result;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will make retrieveOrder return a std::vector directly, then you can write
j["selected"] = ...... retrieveOrder() and save the two lines above.

}
else {
j["selected"] = query.selectClause()._varsOrAsterisk.getSelectVariables();
}
}
else {
j["selected"] = std::vector<std::string>{"?subject", "?predicate", "?object"};
}

j["runtimeInformation"] = RuntimeInformation::ordered_json(
qet.getRootOperation()->getRuntimeInfo());
Expand All @@ -184,7 +193,7 @@ Awaitable<json> Server::composeResponseQleverJson(
requestTimer.cont();
j["res"] = query.hasSelectClause()
? qet.writeResultAsQLeverJson(
query.selectClause()._selectedVariables, limit,
query.selectClause()._varsOrAsterisk, limit,
offset, std::move(resultTable))
: qet.writeRdfGraphJson(query.constructClause(), limit,
offset, std::move(resultTable));
Expand Down Expand Up @@ -220,8 +229,9 @@ Awaitable<json> Server::composeResponseSparqlJson(
std::min(query._limit.value_or(MAX_NOF_ROWS_IN_RESULT), maxSend);
size_t offset = query._offset.value_or(0);
requestTimer.cont();
j = qet.writeResultAsSparqlJson(query.selectClause()._selectedVariables,
j = qet.writeResultAsSparqlJson(query.selectClause()._varsOrAsterisk,
limit, offset, std::move(resultTable));

requestTimer.stop();
return j;
};
Expand All @@ -238,7 +248,7 @@ Server::composeResponseSepValues(const ParsedQuery& query,
size_t offset = query._offset.value_or(0);
return query.hasSelectClause()
? qet.generateResults<format>(
query.selectClause()._selectedVariables, limit, offset)
query.selectClause()._varsOrAsterisk, limit, offset)
: qet.writeRdfGraphSeparatedValues<format>(
query.constructClause(), limit, offset, qet.getResult());
};
Expand Down