-
Notifications
You must be signed in to change notification settings - Fork 37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Starting 'Select *' implementation #546
Changes from 5 commits
3ada215
fe6577e
7b8c3d8
c45ad7a
c176e1f
8cb8570
3bb4aaa
085f23d
4054695
43d7f9c
ed8ea69
c331fc4
afdbb93
c40cb3f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
#include <sstream> | ||
#include <string> | ||
#include <utility> | ||
#include <variant> | ||
|
||
#include "../parser/RdfEscaping.h" | ||
#include "./Distinct.h" | ||
|
@@ -91,22 +92,29 @@ void QueryExecutionTree::setVariableColumns( | |
// _____________________________________________________________________________ | ||
template <QueryExecutionTree::ExportSubFormat format> | ||
ad_utility::stream_generator::stream_generator | ||
QueryExecutionTree::generateResults(const vector<string>& selectVars, | ||
QueryExecutionTree::generateResults(const SelectedVarsOrAsterisk & selectVarsOrAsterisk, | ||
size_t limit, size_t offset) const { | ||
// They may trigger computation (but does not have to). | ||
shared_ptr<const ResultTable> resultTable = getResult(); | ||
LOG(DEBUG) << "Resolving strings for finished binary result...\n"; | ||
vector<std::optional<pair<size_t, ResultTable::ResultType>>> validIndices; | ||
for (auto var : selectVars) { | ||
if (ad_utility::startsWith(var, "TEXT(")) { | ||
var = var.substr(5, var.rfind(')') - 5); | ||
} | ||
auto it = getVariableColumns().find(var); | ||
if (it != getVariableColumns().end()) { | ||
validIndices.push_back(pair<size_t, ResultTable::ResultType>( | ||
it->second, resultTable->getResultType(it->second))); | ||
} else { | ||
validIndices.push_back(std::nullopt); | ||
if(selectVarsOrAsterisk.isAsterisk()) { | ||
for(const auto& elem : getVariableColumns()) | ||
validIndices.emplace_back(pair<size_t, ResultTable::ResultType>( | ||
joka921 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
elem.second, resultTable->getResultType(elem.second))); | ||
} | ||
else { | ||
for (auto var : selectVarsOrAsterisk.getSelectVariables()) { | ||
if (ad_utility::startsWith(var, "TEXT(")) { | ||
var = var.substr(5, var.rfind(')') - 5); | ||
} | ||
auto it = getVariableColumns().find(var); | ||
if (it != getVariableColumns().end()) { | ||
validIndices.emplace_back(pair<size_t, ResultTable::ResultType>( | ||
it->second, resultTable->getResultType(it->second))); | ||
} else { | ||
validIndices.emplace_back(std::nullopt); | ||
} | ||
} | ||
} | ||
if (validIndices.empty()) { | ||
|
@@ -123,48 +131,56 @@ QueryExecutionTree::generateResults(const vector<string>& selectVars, | |
|
||
template ad_utility::stream_generator::stream_generator | ||
QueryExecutionTree::generateResults<QueryExecutionTree::ExportSubFormat::CSV>( | ||
const vector<string>& selectVars, size_t limit, size_t offset) const; | ||
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit, size_t offset) const; | ||
|
||
template ad_utility::stream_generator::stream_generator | ||
QueryExecutionTree::generateResults<QueryExecutionTree::ExportSubFormat::TSV>( | ||
const vector<string>& selectVars, size_t limit, size_t offset) const; | ||
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit, size_t offset) const; | ||
|
||
template ad_utility::stream_generator::stream_generator QueryExecutionTree:: | ||
generateResults<QueryExecutionTree::ExportSubFormat::BINARY>( | ||
const vector<string>& selectVars, size_t limit, size_t offset) const; | ||
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit, size_t offset) const; | ||
|
||
// ___________________________________________________________________________ | ||
QueryExecutionTree::ColumnIndicesAndTypes | ||
QueryExecutionTree::selectedVariablesToColumnIndices( | ||
const std::vector<string>& selectVariables, | ||
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, | ||
const ResultTable& resultTable) const { | ||
ColumnIndicesAndTypes exportColumns; | ||
for (auto var : selectVariables) { | ||
if (ad_utility::startsWith(var, "TEXT(")) { | ||
var = var.substr(5, var.rfind(')') - 5); | ||
} | ||
if (getVariableColumns().contains(var)) { | ||
auto columnIndex = getVariableColumns().at(var); | ||
if(selectVarsOrAsterisk.isAsterisk()) { | ||
for(const auto& elem: getVariableColumns()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As a further improvement: So you can further Improve this to here This is a C++17 feature called "structured binding" in case you want to research it. |
||
exportColumns.push_back(VariableAndColumnIndex{ | ||
var, columnIndex, resultTable.getResultType(columnIndex)}); | ||
} else { | ||
exportColumns.emplace_back(std::nullopt); | ||
elem.first, elem.second, resultTable.getResultType(elem.second)}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you sort here by the variable (currently "elem.first", then the output order becomes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Of course it should then be commented, that these deterministic orders are not required by the standard. |
||
} | ||
} | ||
else { | ||
for (auto var : selectVarsOrAsterisk.getSelectVariables()) { | ||
if (ad_utility::startsWith(var, "TEXT(")) { | ||
var = var.substr(5, var.rfind(')') - 5); | ||
} | ||
if (getVariableColumns().contains(var)) { | ||
auto columnIndex = getVariableColumns().at(var); | ||
exportColumns.push_back(VariableAndColumnIndex{ | ||
var, columnIndex, resultTable.getResultType(columnIndex)}); | ||
} else { | ||
exportColumns.emplace_back(std::nullopt); | ||
} | ||
} | ||
} | ||
return exportColumns; | ||
} | ||
|
||
// _____________________________________________________________________________ | ||
nlohmann::json QueryExecutionTree::writeResultAsQLeverJson( | ||
const vector<string>& selectVars, size_t limit, size_t offset, | ||
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit, size_t offset, | ||
shared_ptr<const ResultTable> resultTable) const { | ||
// They may trigger computation (but does not have to). | ||
if (!resultTable) { | ||
resultTable = getResult(); | ||
} | ||
LOG(DEBUG) << "Resolving strings for finished binary result...\n"; | ||
ColumnIndicesAndTypes validIndices = | ||
selectedVariablesToColumnIndices(selectVars, *resultTable); | ||
selectedVariablesToColumnIndices(selectVarsOrAsterisk, *resultTable); | ||
if (validIndices.empty()) { | ||
return {std::vector<std::string>()}; | ||
} | ||
|
@@ -175,7 +191,7 @@ nlohmann::json QueryExecutionTree::writeResultAsQLeverJson( | |
|
||
// _____________________________________________________________________________ | ||
nlohmann::json QueryExecutionTree::writeResultAsSparqlJson( | ||
const vector<string>& selectVars, size_t limit, size_t offset, | ||
const SelectedVarsOrAsterisk & selectVarsOrAsterisk, size_t limit, size_t offset, | ||
shared_ptr<const ResultTable> resultTable) const { | ||
using nlohmann::json; | ||
|
||
|
@@ -186,7 +202,7 @@ nlohmann::json QueryExecutionTree::writeResultAsSparqlJson( | |
LOG(DEBUG) << "Finished computing the query result in the ID space. " | ||
"Resolving strings in result...\n"; | ||
ColumnIndicesAndTypes columns = | ||
selectedVariablesToColumnIndices(selectVars, *resultTable); | ||
selectedVariablesToColumnIndices(selectVarsOrAsterisk, *resultTable); | ||
|
||
std::erase(columns, std::nullopt); | ||
|
||
|
@@ -197,7 +213,17 @@ nlohmann::json QueryExecutionTree::writeResultAsSparqlJson( | |
const IdTable& idTable = resultTable->_idTable; | ||
|
||
json result; | ||
result["head"]["vars"] = selectVars; | ||
|
||
if(selectVarsOrAsterisk.isAsterisk()) { | ||
vector<string> vars_names; | ||
for(auto const& varName_index: getVariableColumns()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
vars_names.push_back(varName_index.first); | ||
} | ||
result["head"]["vars"] = vars_names; | ||
} | ||
else { | ||
result["head"]["vars"] = selectVarsOrAsterisk.getSelectVariables(); | ||
} | ||
|
||
json bindings = json::array(); | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -499,16 +499,24 @@ bool QueryPlanner::checkUsePatternTrick( | |
|
||
// check that all selected variables are outputs of | ||
// CountAvailablePredicates | ||
joka921 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for (const std::string& s : selectClause._selectedVariables) { | ||
if (s != t._o && s != count_var_name) { | ||
usePatternTrick = false; | ||
break; | ||
} | ||
/* | ||
if(selectClause._varsOrAsterisk.isAsterisk()) { | ||
return false; | ||
} | ||
if (!usePatternTrick) { | ||
continue; | ||
*/ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this commented out? |
||
if(selectClause._varsOrAsterisk.isVariables()) { | ||
for (const std::string& s : selectClause._varsOrAsterisk.getSelectVariables()) { | ||
if (s != t._o && s != count_var_name) { | ||
usePatternTrick = false; | ||
break; | ||
} | ||
} | ||
if (!usePatternTrick) { | ||
continue; | ||
} | ||
} | ||
|
||
|
||
// Check for triples containing the ql:has-predicate triple's | ||
// object. | ||
for (auto& otherChild : pq->children()) { | ||
|
@@ -568,10 +576,12 @@ bool QueryPlanner::checkUsePatternTrick( | |
return; | ||
} | ||
const auto& selectClause = arg._subquery.selectClause(); | ||
for (const auto& v : selectClause._selectedVariables) { | ||
if (v == t._o) { | ||
usePatternTrick = false; | ||
break; | ||
if(selectClause._varsOrAsterisk.isVariables()) { | ||
for (const auto& v : selectClause._varsOrAsterisk.getSelectVariables()) { | ||
if (v == t._o) { | ||
usePatternTrick = false; | ||
break; | ||
} | ||
} | ||
} | ||
} else if constexpr (std::is_same_v<T, GraphPatternOperation::Bind>) { | ||
|
@@ -620,10 +630,12 @@ bool QueryPlanner::checkUsePatternTrick( | |
return; | ||
} | ||
const auto& selectClause = arg._subquery.selectClause(); | ||
for (const auto& v : selectClause._selectedVariables) { | ||
if (v == t._o) { | ||
usePatternTrick = false; | ||
break; | ||
if(selectClause._varsOrAsterisk.isVariables()) { | ||
for (const auto& v : selectClause._varsOrAsterisk.getSelectVariables()) { | ||
if (v == t._o) { | ||
usePatternTrick = false; | ||
break; | ||
} | ||
} | ||
} | ||
} else if constexpr (std::is_same_v<T, GraphPatternOperation:: | ||
|
@@ -828,25 +840,27 @@ vector<QueryPlanner::SubtreePlan> QueryPlanner::getDistinctRow( | |
vector<size_t> keepIndices; | ||
ad_utility::HashSet<size_t> indDone; | ||
const auto& colMap = parent._qet->getVariableColumns(); | ||
for (const auto& var : selectClause._selectedVariables) { | ||
const auto it = colMap.find(var); | ||
if (it != colMap.end()) { | ||
auto ind = it->second; | ||
if (indDone.count(ind) == 0) { | ||
keepIndices.push_back(ind); | ||
indDone.insert(ind); | ||
} | ||
} else if (ad_utility::startsWith(var, "SCORE(") || | ||
ad_utility::startsWith(var, "TEXT(")) { | ||
auto varInd = var.find('?'); | ||
auto cVar = var.substr(varInd, var.rfind(')') - varInd); | ||
const auto it = colMap.find(cVar); | ||
if(selectClause._varsOrAsterisk.isVariables()){ | ||
for (const auto& var : selectClause._varsOrAsterisk.getSelectVariables()) { | ||
const auto it = colMap.find(var); | ||
if (it != colMap.end()) { | ||
auto ind = it->second; | ||
if (indDone.count(ind) == 0) { | ||
keepIndices.push_back(ind); | ||
indDone.insert(ind); | ||
} | ||
} else if (ad_utility::startsWith(var, "SCORE(") || | ||
ad_utility::startsWith(var, "TEXT(")) { | ||
auto varInd = var.find('?'); | ||
auto cVar = var.substr(varInd, var.rfind(')') - varInd); | ||
const auto it = colMap.find(cVar); | ||
if (it != colMap.end()) { | ||
auto ind = it->second; | ||
if (indDone.count(ind) == 0) { | ||
keepIndices.push_back(ind); | ||
indDone.insert(ind); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -169,10 +169,19 @@ Awaitable<json> Server::composeResponseQleverJson( | |
j["query"] = query._originalString; | ||
j["status"] = "OK"; | ||
j["warnings"] = qet.collectWarnings(); | ||
j["selected"] = | ||
query.hasSelectClause() | ||
? query.selectClause()._selectedVariables | ||
: std::vector<std::string>{"?subject", "?predicate", "?object"}; | ||
if(query.hasSelectClause()){ | ||
if(query.selectClause()._varsOrAsterisk.isAsterisk()) { | ||
string str; | ||
str+=(query.selectClause()._varsOrAsterisk.getAsterisk()); | ||
j["selected"] = std::vector<std::string>{str}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here you also need the variable names in the (correct!) order, and not only the asterisk. |
||
} | ||
else { | ||
j["selected"] = query.selectClause()._varsOrAsterisk.getSelectVariables(); | ||
} | ||
} | ||
else { | ||
j["selected"] = std::vector<std::string>{"?subject", "?predicate", "?object"}; | ||
} | ||
|
||
j["runtimeInformation"] = RuntimeInformation::ordered_json( | ||
qet.getRootOperation()->getRuntimeInfo()); | ||
|
@@ -184,7 +193,7 @@ Awaitable<json> Server::composeResponseQleverJson( | |
requestTimer.cont(); | ||
j["res"] = query.hasSelectClause() | ||
? qet.writeResultAsQLeverJson( | ||
query.selectClause()._selectedVariables, limit, | ||
query.selectClause()._varsOrAsterisk, limit, | ||
offset, std::move(resultTable)) | ||
: qet.writeRdfGraphJson(query.constructClause(), limit, | ||
offset, std::move(resultTable)); | ||
|
@@ -220,8 +229,9 @@ Awaitable<json> Server::composeResponseSparqlJson( | |
std::min(query._limit.value_or(MAX_NOF_ROWS_IN_RESULT), maxSend); | ||
size_t offset = query._offset.value_or(0); | ||
requestTimer.cont(); | ||
j = qet.writeResultAsSparqlJson(query.selectClause()._selectedVariables, | ||
j = qet.writeResultAsSparqlJson(query.selectClause()._varsOrAsterisk, | ||
limit, offset, std::move(resultTable)); | ||
|
||
requestTimer.stop(); | ||
return j; | ||
}; | ||
|
@@ -238,7 +248,7 @@ Server::composeResponseSepValues(const ParsedQuery& query, | |
size_t offset = query._offset.value_or(0); | ||
return query.hasSelectClause() | ||
? qet.generateResults<format>( | ||
query.selectClause()._selectedVariables, limit, offset) | ||
query.selectClause()._varsOrAsterisk, limit, offset) | ||
: qet.writeRdfGraphSeparatedValues<format>( | ||
query.constructClause(), limit, offset, qet.getResult()); | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since the type is now called
SelectedVarsOrAsterisk
the corresponding variables should also be called
selectedVarsOrAsterisk
(vs.
selectVarsOrAsterisk
)