Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Starting 'Select *' implementation #546

Merged
merged 14 commits into from
Jan 27, 2022
Merged
8 changes: 4 additions & 4 deletions e2e/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ PROJECT_DIR=$(readlink -f -- "$(dirname ${BASH_SOURCE[0]})/..")

# Change to the project directory so we can use simple relative paths
echo "Changing to project directory: $PROJECT_DIR"
pushd $PROJECT_DIR
pushd "$PROJECT_DIR"
BINARY_DIR=$(readlink -f -- ./build)
if [ ! -e $BINARY_DIR ]; then
if [ ! -e "$BINARY_DIR" ]; then
BINARY_DIR=$(readlink -f -- .)
fi
echo "Binary dir is $BINARY_DIR"
Expand All @@ -88,7 +88,7 @@ mkdir -p "$INDEX_DIR"
# Travis' caching creates it
if [ ! -e "$INPUT.nt" ]; then
# Why the hell is this a ZIP that can't easily be decompressed from stdin?!?
unzip -j $ZIPPED_INPUT -d "$INPUT_DIR/"
unzip -j "$ZIPPED_INPUT" -d "$INPUT_DIR/"
fi;


Expand Down Expand Up @@ -132,5 +132,5 @@ if [ $i -ge 60 ]; then
fi

echo "ServerMain was succesfully started, running queries ..."
$PYTHON_BINARY "$PROJECT_DIR/e2e/queryit.py" "$PROJECT_DIR/e2e/scientists_queries.yaml" "http://localhost:9099" &> $BINARY_DIR/query_log.txt || bail "Querying Server failed"
$PYTHON_BINARY "$PROJECT_DIR/e2e/queryit.py" "$PROJECT_DIR/e2e/scientists_queries.yaml" "http://localhost:9099" &> "$BINARY_DIR/query_log.txt" || bail "Querying Server failed"
popd
132 changes: 101 additions & 31 deletions src/engine/QueryExecutionTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <sstream>
#include <string>
#include <utility>
#include <variant>

#include "../parser/RdfEscaping.h"
#include "./Distinct.h"
Expand Down Expand Up @@ -91,22 +92,46 @@ void QueryExecutionTree::setVariableColumns(
// _____________________________________________________________________________
template <QueryExecutionTree::ExportSubFormat format>
ad_utility::stream_generator::stream_generator
QueryExecutionTree::generateResults(const vector<string>& selectVars,
size_t limit, size_t offset) const {
QueryExecutionTree::generateResults(
const SelectedVarsOrAsterisk& selectedVarsOrAsterisk, size_t limit,
size_t offset) const {
// They may trigger computation (but does not have to).
shared_ptr<const ResultTable> resultTable = getResult();
LOG(DEBUG) << "Resolving strings for finished binary result...\n";
vector<std::optional<pair<size_t, ResultTable::ResultType>>> validIndices;
for (auto var : selectVars) {
if (var.starts_with("TEXT(")) {
var = var.substr(5, var.rfind(')') - 5);
if (selectedVarsOrAsterisk.isAsterisk()) {
auto orderedVariablesFromQuery =
selectedVarsOrAsterisk.orderedVariablesFromQueryBody();
auto variablesFromExecutionTree = getVariableColumns();
for (const auto& variableFromQuery : orderedVariablesFromQuery) {
auto it = variablesFromExecutionTree.find(variableFromQuery);
if (it != variablesFromExecutionTree.end()) {
validIndices.emplace_back(pair<size_t, ResultTable::ResultType>(
it->second, resultTable->getResultType(it->second)));
variablesFromExecutionTree.erase(it);
} else {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This else branch should also warn:
OG(WARN) << "The variable "" << variableFromQuery <<
"" was found in the original, query, but not in the execution tree. "
"This is likely a bug\n";

validIndices.emplace_back(std::nullopt);
}
}
auto it = getVariableColumns().find(var);
if (it != getVariableColumns().end()) {
validIndices.push_back(pair<size_t, ResultTable::ResultType>(
it->second, resultTable->getResultType(it->second)));
} else {
validIndices.push_back(std::nullopt);
for (const auto& variableFromQuery : variablesFromExecutionTree) {
LOG(WARN) << "The variable \"" << variableFromQuery.first
<< "\" was found in the execution tree, but not in the "
"original query. "
"This is likely a bug\n";
}
} else {
for (auto variableFromQuery : selectedVarsOrAsterisk.getSelectVariables()) {
if (variableFromQuery.starts_with("TEXT(")) {
variableFromQuery =
variableFromQuery.substr(5, variableFromQuery.rfind(')') - 5);
}
auto it = getVariableColumns().find(variableFromQuery);
if (it != getVariableColumns().end()) {
validIndices.emplace_back(pair<size_t, ResultTable::ResultType>(
it->second, resultTable->getResultType(it->second)));
} else {
validIndices.emplace_back(std::nullopt);
}
}
}
if (validIndices.empty()) {
Expand All @@ -123,48 +148,81 @@ QueryExecutionTree::generateResults(const vector<string>& selectVars,

template ad_utility::stream_generator::stream_generator
QueryExecutionTree::generateResults<QueryExecutionTree::ExportSubFormat::CSV>(
const vector<string>& selectVars, size_t limit, size_t offset) const;
const SelectedVarsOrAsterisk& selectedVarsOrAsterisk, size_t limit,
size_t offset) const;

template ad_utility::stream_generator::stream_generator
QueryExecutionTree::generateResults<QueryExecutionTree::ExportSubFormat::TSV>(
const vector<string>& selectVars, size_t limit, size_t offset) const;
const SelectedVarsOrAsterisk& selectedVarsOrAsterisk, size_t limit,
size_t offset) const;

template ad_utility::stream_generator::stream_generator QueryExecutionTree::
generateResults<QueryExecutionTree::ExportSubFormat::BINARY>(
const vector<string>& selectVars, size_t limit, size_t offset) const;
const SelectedVarsOrAsterisk& selectedVarsOrAsterisk, size_t limit,
size_t offset) const;

// ___________________________________________________________________________
QueryExecutionTree::ColumnIndicesAndTypes
QueryExecutionTree::selectedVariablesToColumnIndices(
const std::vector<string>& selectVariables,
const SelectedVarsOrAsterisk& selectedVarsOrAsterisk,
const ResultTable& resultTable) const {
ColumnIndicesAndTypes exportColumns;
for (auto var : selectVariables) {
if (var.starts_with("TEXT(")) {
var = var.substr(5, var.rfind(')') - 5);
if (selectedVarsOrAsterisk.isAsterisk()) {
auto variablesFromExecutionTree = getVariableColumns();
for (auto variableFromQuery :
selectedVarsOrAsterisk.orderedVariablesFromQueryBody()) {
if (getVariableColumns().contains(variableFromQuery)) {
auto columnIndex = getVariableColumns().at(variableFromQuery);
exportColumns.push_back(
VariableAndColumnIndex{variableFromQuery, columnIndex,
resultTable.getResultType(columnIndex)});
variablesFromExecutionTree.erase(variableFromQuery);
} else {
exportColumns.emplace_back(std::nullopt);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also add the complementary warning here.

LOG(WARN) << "The variable \"" << variableFromQuery
<< "\" was found in the original query, but not in the "
"execution tree. "
"This is likely a bug\n";
}
}
if (getVariableColumns().contains(var)) {
auto columnIndex = getVariableColumns().at(var);
exportColumns.push_back(VariableAndColumnIndex{
var, columnIndex, resultTable.getResultType(columnIndex)});
} else {
exportColumns.emplace_back(std::nullopt);
for (const auto& variableFromQuery : variablesFromExecutionTree) {
LOG(WARN) << "The variable \"" << variableFromQuery.first
<< "\" was found in the execution tree, but not in the "
"original query. "
"This is likely a bug\n";
}
} else {
for (auto var : selectedVarsOrAsterisk.getSelectVariables()) {
if (var.starts_with("TEXT(")) {
var = var.substr(5, var.rfind(')') - 5);
}
if (getVariableColumns().contains(var)) {
auto columnIndex = getVariableColumns().at(var);
exportColumns.push_back(VariableAndColumnIndex{
var, columnIndex, resultTable.getResultType(columnIndex)});
} else {
exportColumns.emplace_back(std::nullopt);
LOG(WARN) << "The variable \"" << var
<< "\" was found in the original query, but not in the "
"execution tree. "
"This is likely a bug\n";
}
}
}
return exportColumns;
}

// _____________________________________________________________________________
nlohmann::json QueryExecutionTree::writeResultAsQLeverJson(
const vector<string>& selectVars, size_t limit, size_t offset,
shared_ptr<const ResultTable> resultTable) const {
const SelectedVarsOrAsterisk& selectedVarsOrAsterisk, size_t limit,
size_t offset, shared_ptr<const ResultTable> resultTable) const {
// They may trigger computation (but does not have to).
if (!resultTable) {
resultTable = getResult();
}
LOG(DEBUG) << "Resolving strings for finished binary result...\n";
ColumnIndicesAndTypes validIndices =
selectedVariablesToColumnIndices(selectVars, *resultTable);
selectedVariablesToColumnIndices(selectedVarsOrAsterisk, *resultTable);
if (validIndices.empty()) {
return {std::vector<std::string>()};
}
Expand All @@ -175,8 +233,8 @@ nlohmann::json QueryExecutionTree::writeResultAsQLeverJson(

// _____________________________________________________________________________
nlohmann::json QueryExecutionTree::writeResultAsSparqlJson(
const vector<string>& selectVars, size_t limit, size_t offset,
shared_ptr<const ResultTable> resultTable) const {
const SelectedVarsOrAsterisk& selectedVarsOrAsterisk, size_t limit,
size_t offset, shared_ptr<const ResultTable> resultTable) const {
using nlohmann::json;

// This might trigger the actual query processing.
Expand All @@ -186,7 +244,7 @@ nlohmann::json QueryExecutionTree::writeResultAsSparqlJson(
LOG(DEBUG) << "Finished computing the query result in the ID space. "
"Resolving strings in result...\n";
ColumnIndicesAndTypes columns =
selectedVariablesToColumnIndices(selectVars, *resultTable);
selectedVariablesToColumnIndices(selectedVarsOrAsterisk, *resultTable);

std::erase(columns, std::nullopt);

Expand All @@ -197,7 +255,17 @@ nlohmann::json QueryExecutionTree::writeResultAsSparqlJson(
const IdTable& idTable = resultTable->_idTable;

json result;
result["head"]["vars"] = selectVars;

if (selectedVarsOrAsterisk.isAsterisk()) {
vector<string> vars_names;
for (auto const& variable :
selectedVarsOrAsterisk.orderedVariablesFromQueryBody()) {
vars_names.push_back(variable);
}
result["head"]["vars"] = vars_names;
} else {
result["head"]["vars"] = selectedVarsOrAsterisk.getSelectVariables();
}

json bindings = json::array();

Expand Down Expand Up @@ -242,6 +310,8 @@ nlohmann::json QueryExecutionTree::writeResultAsSparqlJson(
};

for (size_t rowIndex = offset; rowIndex < upperBound; ++rowIndex) {
// TODO: ordered_json` entries are ordered alphabetically, but insertion
// order would be preferable.
nlohmann::ordered_json binding;
for (const auto& column : columns) {
const auto& currentId = idTable(rowIndex, column->_columnIndex);
Expand Down
16 changes: 10 additions & 6 deletions src/engine/QueryExecutionTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <unordered_map>
#include <unordered_set>

#include "../parser/ParsedQuery.h"
#include "../parser/data/Context.h"
#include "../parser/data/Types.h"
#include "../parser/data/VarOrTerm.h"
Expand Down Expand Up @@ -101,18 +102,20 @@ class QueryExecutionTree {
ResultTable::ResultType _resultType;
};

using SelectedVarsOrAsterisk = ParsedQuery::SelectedVarsOrAsterisk;

using ColumnIndicesAndTypes = vector<std::optional<VariableAndColumnIndex>>;

// Returns a vector where the i-th element contains the column index and
// `ResultType` of the i-th `selectVariable` in the `resultTable`
ColumnIndicesAndTypes selectedVariablesToColumnIndices(
const std::vector<string>& selectVariables,
const SelectedVarsOrAsterisk& selectedVarsOrAsterisk,
const ResultTable& resultTable) const;

template <ExportSubFormat format>
ad_utility::stream_generator::stream_generator generateResults(
const vector<string>& selectVars, size_t limit = MAX_NOF_ROWS_IN_RESULT,
size_t offset = 0) const;
const SelectedVarsOrAsterisk& selectedVarsOrAsterisk,
size_t limit = MAX_NOF_ROWS_IN_RESULT, size_t offset = 0) const;

// Generate an RDF graph in turtle format for a CONSTRUCT query.
ad_utility::stream_generator::stream_generator writeRdfGraphTurtle(
Expand All @@ -131,11 +134,12 @@ class QueryExecutionTree {
size_t offset, std::shared_ptr<const ResultTable> res) const;

nlohmann::json writeResultAsQLeverJson(
const vector<string>& selectVars, size_t limit, size_t offset,
shared_ptr<const ResultTable> resultTable = nullptr) const;
const SelectedVarsOrAsterisk& selectedVarsOrAsterisk, size_t limit,
size_t offset, shared_ptr<const ResultTable> resultTable = nullptr) const;

nlohmann::json writeResultAsSparqlJson(
const vector<string>& selectVars, size_t limit, size_t offset,
const SelectedVarsOrAsterisk& selectedVarsOrAsterisk, size_t limit,
size_t offset,
shared_ptr<const ResultTable> preComputedResult = nullptr) const;

const std::vector<size_t>& resultSortedOn() const {
Expand Down
59 changes: 37 additions & 22 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -497,9 +497,15 @@ bool QueryPlanner::checkUsePatternTrick(
continue;
}

// check that all selected variables are outputs of
// Check that all selected variables are outputs of
// CountAvailablePredicates
joka921 marked this conversation as resolved.
Show resolved Hide resolved
for (const std::string& s : selectClause._selectedVariables) {
if (selectClause._varsOrAsterisk.isAsterisk()) {
return false;
}

const auto& selectedVariables =
selectClause._varsOrAsterisk.getSelectVariables();
for (const std::string& s : selectedVariables) {
if (s != t._o && s != count_var_name) {
usePatternTrick = false;
break;
Expand Down Expand Up @@ -568,10 +574,13 @@ bool QueryPlanner::checkUsePatternTrick(
return;
}
const auto& selectClause = arg._subquery.selectClause();
for (const auto& v : selectClause._selectedVariables) {
if (v == t._o) {
usePatternTrick = false;
break;
if (selectClause._varsOrAsterisk.isVariables()) {
for (const auto& v :
selectClause._varsOrAsterisk.getSelectVariables()) {
if (v == t._o) {
usePatternTrick = false;
break;
}
}
}
} else if constexpr (std::is_same_v<T, GraphPatternOperation::Bind>) {
Expand Down Expand Up @@ -620,10 +629,13 @@ bool QueryPlanner::checkUsePatternTrick(
return;
}
const auto& selectClause = arg._subquery.selectClause();
for (const auto& v : selectClause._selectedVariables) {
if (v == t._o) {
usePatternTrick = false;
break;
if (selectClause._varsOrAsterisk.isVariables()) {
for (const auto& v :
selectClause._varsOrAsterisk.getSelectVariables()) {
if (v == t._o) {
usePatternTrick = false;
break;
}
}
}
} else if constexpr (std::is_same_v<T, GraphPatternOperation::
Expand Down Expand Up @@ -828,24 +840,27 @@ vector<QueryPlanner::SubtreePlan> QueryPlanner::getDistinctRow(
vector<size_t> keepIndices;
ad_utility::HashSet<size_t> indDone;
const auto& colMap = parent._qet->getVariableColumns();
for (const auto& var : selectClause._selectedVariables) {
const auto it = colMap.find(var);
if (it != colMap.end()) {
auto ind = it->second;
if (indDone.count(ind) == 0) {
keepIndices.push_back(ind);
indDone.insert(ind);
}
} else if (var.starts_with("SCORE(") || var.starts_with("TEXT(")) {
auto varInd = var.find('?');
auto cVar = var.substr(varInd, var.rfind(')') - varInd);
const auto it = colMap.find(cVar);
if (selectClause._varsOrAsterisk.isVariables()) {
for (const auto& var :
selectClause._varsOrAsterisk.getSelectVariables()) {
const auto it = colMap.find(var);
if (it != colMap.end()) {
auto ind = it->second;
if (indDone.count(ind) == 0) {
keepIndices.push_back(ind);
indDone.insert(ind);
}
} else if (var.starts_with("SCORE(") || var.starts_with("TEXT(")) {
auto varInd = var.find('?');
auto cVar = var.substr(varInd, var.rfind(')') - varInd);
const auto it = colMap.find(cVar);
if (it != colMap.end()) {
auto ind = it->second;
if (indDone.count(ind) == 0) {
keepIndices.push_back(ind);
indDone.insert(ind);
}
}
}
}
}
Expand Down