Skip to content
Permalink
Browse files

fix!(gravsearch): Handle UNION scopes with FILTER correctly (DSP-1240) (

  • Loading branch information
benjamingeer committed Jan 25, 2021
1 parent b1e1b9e commit 48d77cde965ce861c9b611f0a246509c70ea9ad0
@@ -51,6 +51,10 @@ It is certainly possible to write Gravsearch queries by hand, but we expect
that in general, they will be automatically generated by client
software, e.g. by a client user interface.

For a more detailed overview of Gravsearch, see
[Gravsearch: Transforming SPARQL to query humanities data](https://doi.org/10.3233/SW-200386).


## Submitting Gravsearch Queries

The recommended way to submit a Gravsearch query is via HTTP POST:
@@ -1087,3 +1091,123 @@ CONSTRUCT {
FILTER(?pubdate = "JULIAN:1497-03-01"^^knora-api:Date) .
}
```

## Scoping Issues

SPARQL is evaluated [from the bottom up](https://sourceforge.net/p/bigdata/news/2015/09/understanding-sparqls-bottom-up-semantics/).
A `UNION` block therefore opens a new scope, in which variables bound at
higher levels are not necessarily in scope. This can cause unexpected results if queries
are not carefully designed. Gravsearch tries to prevent this by rejecting queries in the
following cases.

### FILTER in UNION

A `FILTER` in a `UNION` block can only use variables that are bound in the same block, otherwise the query will be rejected. This query is invalid because `?text` is not bound in the `UNION` block containing the `FILTER` where the variable is used:

```sparql
PREFIX knora-api: <http://api.knora.org/ontology/knora-api/simple/v2#>
PREFIX mls: <http://0.0.0.0:3333/ontology/0807/mls/simple/v2#>
CONSTRUCT {
?lemma knora-api:isMainResource true .
?lemma mls:hasLemmaText ?text .
} WHERE {
?lemma a mls:Lemma .
?lemma mls:hasLemmaText ?text .
{
?lemma mls:hasPseudonym ?pseudo .
FILTER regex(?pseudo, "Abel", "i") .
} UNION {
FILTER regex(?text, "Abel", "i") .
}
}
ORDER BY ASC(?text)
OFFSET 0
```

It can be corrected like this:

```sparql
PREFIX knora-api: <http://api.knora.org/ontology/knora-api/simple/v2#>
PREFIX mls: <http://0.0.0.0:3333/ontology/0807/mls/simple/v2#>
CONSTRUCT {
?lemma knora-api:isMainResource true .
?lemma mls:hasLemmaText ?text .
} WHERE {
?lemma a mls:Lemma .
?lemma mls:hasLemmaText ?text .
{
?lemma mls:hasPseudonym ?pseudo .
FILTER regex(?pseudo, "Abel", "i") .
} UNION {
?lemma mls:hasLemmaText ?text .
FILTER regex(?text, "Abel", "i") .
}
}
ORDER BY ASC(?text)
OFFSET 0
```

### ORDER BY

A variable used in `ORDER BY` must be bound at the top level of the `WHERE` clause. This query is invalid, because `?int` is not bound at the top level of the `WHERE` clause:

```sparql
PREFIX knora-api: <http://api.knora.org/ontology/knora-api/v2#>
PREFIX anything: <http://0.0.0.0:3333/ontology/0001/anything/v2#>
CONSTRUCT {
?thing knora-api:isMainResource true .
?thing anything:hasInteger ?int .
?thing anything:hasRichtext ?richtext .
?thing anything:hasText ?text .
} WHERE {
?thing a knora-api:Resource .
?thing a anything:Thing .
{
?thing anything:hasRichtext ?richtext .
FILTER knora-api:matchText(?richtext, "test")
?thing anything:hasInteger ?int .
}
UNION
{
?thing anything:hasText ?text .
FILTER knora-api:matchText(?text, "test")
?thing anything:hasInteger ?int .
}
}
ORDER BY (?int)
```

It can be corrected like this:

```sparql
PREFIX knora-api: <http://api.knora.org/ontology/knora-api/v2#>
PREFIX anything: <http://0.0.0.0:3333/ontology/0001/anything/v2#>
CONSTRUCT {
?thing knora-api:isMainResource true .
?thing anything:hasInteger ?int .
?thing anything:hasRichtext ?richtext .
?thing anything:hasText ?text .
} WHERE {
?thing a knora-api:Resource .
?thing a anything:Thing .
?thing anything:hasInteger ?int .
{
?thing anything:hasRichtext ?richtext .
FILTER knora-api:matchText(?richtext, "test")
}
UNION
{
?thing anything:hasText ?text .
FILTER knora-api:matchText(?text, "test")
}
}
ORDER BY (?int)
```
@@ -19,6 +19,9 @@ License along with Knora. If not, see <http://www.gnu.org/licenses/>.

# Gravsearch Design

For a detailed overview of Gravsearch, see
[Gravsearch: Transforming SPARQL to query humanities data](https://doi.org/10.3233/SW-200386).

## Gravsearch Package

The classes that process Gravsearch queries and results can be found in `org.knora.webapi.messages.util.search.gravsearch`.
@@ -59,6 +59,16 @@ trait WhereTransformer {
*/
def optimiseQueryPatterns(patterns: Seq[QueryPattern]): Seq[QueryPattern]

/**
* Called before entering a UNION block.
*/
def enteringUnionBlock(): Unit

/**
* Called before leaving a UNION block.
*/
def leavingUnionBlock(): Unit

/**
* Transforms a [[StatementPattern]] in a WHERE clause into zero or more query patterns.
*
@@ -240,10 +250,13 @@ object QueryTraverser {
Seq(OptionalPattern(patterns = transformedPatterns))

case unionPattern: UnionPattern =>
val transformedBlocks: Seq[Seq[QueryPattern]] = unionPattern.blocks.map { blockPatterns =>
transformWherePatterns(patterns = blockPatterns,
whereTransformer = whereTransformer,
inputOrderBy = inputOrderBy)
val transformedBlocks: Seq[Seq[QueryPattern]] = unionPattern.blocks.map { blockPatterns: Seq[QueryPattern] =>
whereTransformer.enteringUnionBlock()
val transformedPatterns: Seq[QueryPattern] = transformWherePatterns(patterns = blockPatterns,
whereTransformer = whereTransformer,
inputOrderBy = inputOrderBy)
whereTransformer.leavingUnionBlock()
transformedPatterns
}

Seq(UnionPattern(blocks = transformedBlocks))
@@ -66,6 +66,8 @@ trait SelectQueryColumn extends Entity
*/
case class QueryVariable(variableName: String) extends SelectQueryColumn {
override def toSparql: String = s"?$variableName"

override def getVariables: Set[QueryVariable] = Set(this)
}

/**
@@ -83,6 +85,8 @@ case class GroupConcat(inputVariable: QueryVariable, separator: Char, outputVari
override def toSparql: String = {
s"""(GROUP_CONCAT(DISTINCT(IF(BOUND(${inputVariable.toSparql}), STR(${inputVariable.toSparql}), "")); SEPARATOR='$separator') AS ${outputVariable.toSparql})"""
}

override def getVariables: Set[QueryVariable] = Set(inputVariable)
}

/**
@@ -108,6 +112,7 @@ case class Count(inputVariable: QueryVariable, distinct: Boolean, outputVariable
s"(COUNT($distinctAsStr ${inputVariable.toSparql}) AS ${outputVariable.toSparql})"
}

override def getVariables: Set[QueryVariable] = Set(inputVariable, outputVariable)
}

/**
@@ -135,6 +140,8 @@ case class IriRef(iri: SmartIri, propertyPathOperator: Option[Char] = None) exte
def toOntologySchema(targetSchema: OntologySchema): IriRef = {
copy(iri = iri.toOntologySchema(targetSchema))
}

override def getVariables: Set[QueryVariable] = Set.empty
}

/**
@@ -156,6 +163,8 @@ case class XsdLiteral(value: String, datatype: SmartIri) extends Entity {

"\"" + value + "\"^^" + transformedDatatype.toSparql
}

override def getVariables: Set[QueryVariable] = Set.empty
}

/**
@@ -198,7 +207,7 @@ case class StatementPattern(subj: Entity, pred: Entity, obj: Entity, namedGraph:

private def entityToOntologySchema(entity: Entity, targetSchema: OntologySchema): Entity = {
entity match {
case iriRef: IriRef => iriRef.toOntologySchema(InternalSchema)
case iriRef: IriRef => iriRef.toOntologySchema(targetSchema)
case other => other
}
}
@@ -314,7 +323,13 @@ object CompareExpressionOperator extends Enumeration {
/**
* Represents an expression that can be used in a FILTER.
*/
sealed trait Expression extends SparqlGenerator
sealed trait Expression extends SparqlGenerator {

/**
* Returns the set of query variables used in this expression.
*/
def getVariables: Set[QueryVariable]
}

/**
* Represents a comparison expression in a FILTER.
@@ -326,6 +341,8 @@ sealed trait Expression extends SparqlGenerator
case class CompareExpression(leftArg: Expression, operator: CompareExpressionOperator.Value, rightArg: Expression)
extends Expression {
override def toSparql: String = s"(${leftArg.toSparql} $operator ${rightArg.toSparql})"

override def getVariables: Set[QueryVariable] = leftArg.getVariables ++ rightArg.getVariables
}

/**
@@ -336,6 +353,8 @@ case class CompareExpression(leftArg: Expression, operator: CompareExpressionOpe
*/
case class AndExpression(leftArg: Expression, rightArg: Expression) extends Expression {
override def toSparql: String = s"(${leftArg.toSparql} && ${rightArg.toSparql})"

override def getVariables: Set[QueryVariable] = leftArg.getVariables ++ rightArg.getVariables
}

/**
@@ -346,6 +365,8 @@ case class AndExpression(leftArg: Expression, rightArg: Expression) extends Expr
*/
case class OrExpression(leftArg: Expression, rightArg: Expression) extends Expression {
override def toSparql: String = s"(${leftArg.toSparql} || ${rightArg.toSparql})"

override def getVariables: Set[QueryVariable] = leftArg.getVariables ++ rightArg.getVariables
}

/**
@@ -372,6 +393,8 @@ case object MinusOperator extends ArithmeticOperator {
*/
case class IntegerLiteral(value: Int) extends Expression {
override def toSparql: String = value.toString

override def getVariables: Set[QueryVariable] = Set.empty
}

/**
@@ -380,6 +403,8 @@ case class IntegerLiteral(value: Int) extends Expression {
case class ArithmeticExpression(leftArg: Expression, operator: ArithmeticOperator, rightArg: Expression)
extends Expression {
override def toSparql: String = s"""${leftArg.toSparql} $operator ${rightArg.toSparql}"""

override def getVariables: Set[QueryVariable] = leftArg.getVariables ++ rightArg.getVariables
}

/**
@@ -397,6 +422,8 @@ case class RegexFunction(textExpr: Expression, pattern: String, modifier: Option
case None =>
s"""regex(${textExpr.toSparql}, "$pattern")"""
}

override def getVariables: Set[QueryVariable] = textExpr.getVariables
}

/**
@@ -406,6 +433,8 @@ case class RegexFunction(textExpr: Expression, pattern: String, modifier: Option
*/
case class LangFunction(textValueVar: QueryVariable) extends Expression {
override def toSparql: String = s"""lang(${textValueVar.toSparql})"""

override def getVariables: Set[QueryVariable] = Set(textValueVar)
}

/**
@@ -419,6 +448,8 @@ case class SubStrFunction(textLiteralVar: QueryVariable, startExpression: Expres
extends Expression {
override def toSparql: String =
s"""substr(${textLiteralVar.toSparql}, ${startExpression.toSparql}, ${lengthExpression.toSparql})"""

override def getVariables: Set[QueryVariable] = Set(textLiteralVar)
}

/**
@@ -428,6 +459,8 @@ case class SubStrFunction(textLiteralVar: QueryVariable, startExpression: Expres
*/
case class StrFunction(textLiteralVar: QueryVariable) extends Expression {
override def toSparql: String = s"""str(${textLiteralVar.toSparql})"""

override def getVariables: Set[QueryVariable] = Set(textLiteralVar)
}

/**
@@ -482,6 +515,8 @@ case class FunctionCallExpression(functionIri: IriRef, args: Seq[Entity]) extend
}

}

override def getVariables: Set[QueryVariable] = args.toSet.flatMap((arg: Entity) => arg.getVariables)
}

/**
@@ -63,6 +63,10 @@ object SparqlTransformer {
Some(FromClause(IriRef(OntologyConstants.NamedGraphs.GraphDBExplicitNamedGraph.toSmartIri)))
}
}

override def enteringUnionBlock(): Unit = {}

override def leavingUnionBlock(): Unit = {}
}

/**
@@ -91,6 +95,10 @@ object SparqlTransformer {
transformLuceneQueryPatternForFuseki(luceneQueryPattern)

override def getFromClause: Option[FromClause] = None

override def enteringUnionBlock(): Unit = {}

override def leavingUnionBlock(): Unit = {}
}

/**
@@ -115,6 +123,10 @@ object SparqlTransformer {

override def transformLuceneQueryPattern(luceneQueryPattern: LuceneQueryPattern): Seq[QueryPattern] =
transformLuceneQueryPatternForGraphDB(luceneQueryPattern)

override def enteringUnionBlock(): Unit = {}

override def leavingUnionBlock(): Unit = {}
}

/**
@@ -138,6 +150,10 @@ object SparqlTransformer {

override def transformLuceneQueryPattern(luceneQueryPattern: LuceneQueryPattern): Seq[QueryPattern] =
transformLuceneQueryPatternForFuseki(luceneQueryPattern)

override def enteringUnionBlock(): Unit = {}

override def leavingUnionBlock(): Unit = {}
}

/**

0 comments on commit 48d77cd

Please sign in to comment.