Skip to content

Commit

Permalink
refactor: improve lucene handling in gravsearch (DEV-2148) (#2667)
Browse files Browse the repository at this point in the history
  • Loading branch information
BalduinLandolt committed May 17, 2023
1 parent 873eb46 commit bf5d4be
Show file tree
Hide file tree
Showing 13 changed files with 68 additions and 176 deletions.
Expand Up @@ -10,9 +10,8 @@ import org.knora.webapi.messages.IriConversions._
import org.knora.webapi.messages.OntologyConstants
import org.knora.webapi.messages.StringFormatter
import org.knora.webapi.messages.util.search._
import org.knora.webapi.routing.UnsafeZioRun
import org.knora.webapi.util.ApacheLuceneSupport.LuceneQueryString
import org.knora.webapi.messages.util.search.gravsearch.transformers._
import org.knora.webapi.routing.UnsafeZioRun

/**
* Tests [[SparqlTransformer]].
Expand Down Expand Up @@ -139,11 +138,13 @@ class SparqlTransformerSpec extends CoreSpec {
pred = IriRef(OntologyConstants.KnoraBase.ValueHasString.toSmartIri),
QueryVariable("text__valueHasString")
)
val luceneQueryPattern = LuceneQueryPattern(
val luceneQueryPattern = StatementPattern(
subj = QueryVariable("text"),
obj = QueryVariable("text__valueHasString"),
queryString = LuceneQueryString("Zeitglöcklein"),
literalStatement = Some(valueHasStringStatement)
pred = IriRef(OntologyConstants.Fuseki.luceneQueryPredicate.toSmartIri),
obj = XsdLiteral(
value = "Zeitglöcklein",
datatype = OntologyConstants.Xsd.String.toSmartIri
)
)
val patterns: Seq[QueryPattern] = Seq(
hasValueStatement,
Expand Down
@@ -1,12 +1,14 @@
package org.knora.webapi.messages.util.search.gravsearch.prequery

import scala.collection.mutable.ArrayBuffer
import dsp.errors.AssertionException
import zio.ZIO

import scala.collection.mutable.ArrayBuffer

import dsp.errors.AssertionException
import org.knora.webapi.CoreSpec
import org.knora.webapi.core.MessageRelay
import org.knora.webapi.messages.IriConversions._
import org.knora.webapi.messages.OntologyConstants
import org.knora.webapi.messages.StringFormatter
import org.knora.webapi.messages.util.search._
import org.knora.webapi.messages.util.search.gravsearch.GravsearchParser
Expand All @@ -15,7 +17,6 @@ import org.knora.webapi.messages.util.search.gravsearch.types.GravsearchTypeInsp
import org.knora.webapi.messages.util.search.gravsearch.types.GravsearchTypeInspectionUtil
import org.knora.webapi.routing.UnsafeZioRun
import org.knora.webapi.sharedtestdata.SharedTestDataADM.anythingAdminUser
import org.knora.webapi.util.ApacheLuceneSupport.LuceneQueryString

class GravsearchToPrequeryTransformerSpec extends CoreSpec {

Expand Down Expand Up @@ -1366,19 +1367,12 @@ class GravsearchToPrequeryTransformerSpec extends CoreSpec {
),
obj = QueryVariable(variableName = "recipient")
),
LuceneQueryPattern(
StatementPattern(
subj = QueryVariable(variableName = "familyName"),
obj = QueryVariable(variableName = "familyName__valueHasString"),
queryString = LuceneQueryString(queryString = "Bernoulli"),
literalStatement = Some(
StatementPattern(
subj = QueryVariable(variableName = "familyName"),
pred = IriRef(
iri = "http://www.knora.org/ontology/knora-base#valueHasString".toSmartIri,
propertyPathOperator = None
),
obj = QueryVariable(variableName = "familyName__valueHasString")
)
pred = IriRef(OntologyConstants.Fuseki.luceneQueryPredicate.toSmartIri),
obj = XsdLiteral(
value = "Bernoulli",
datatype = OntologyConstants.Xsd.String.toSmartIri
)
)
)
Expand Down Expand Up @@ -2377,19 +2371,12 @@ class GravsearchToPrequeryTransformerSpec extends CoreSpec {
datatype = "http://www.w3.org/2001/XMLSchema#boolean".toSmartIri
)
),
LuceneQueryPattern(
StatementPattern(
subj = QueryVariable(variableName = "richtext"),
obj = QueryVariable(variableName = "richtext__valueHasString"),
queryString = LuceneQueryString(queryString = "test"),
literalStatement = Some(
StatementPattern(
subj = QueryVariable(variableName = "richtext"),
pred = IriRef(
iri = "http://www.knora.org/ontology/knora-base#valueHasString".toSmartIri,
propertyPathOperator = None
),
obj = QueryVariable(variableName = "richtext__valueHasString")
)
pred = IriRef(OntologyConstants.Fuseki.luceneQueryPredicate.toSmartIri),
obj = XsdLiteral(
value = "test",
datatype = OntologyConstants.Xsd.String.toSmartIri
)
)
),
Expand Down Expand Up @@ -2451,19 +2438,12 @@ class GravsearchToPrequeryTransformerSpec extends CoreSpec {
datatype = "http://www.w3.org/2001/XMLSchema#boolean".toSmartIri
)
),
LuceneQueryPattern(
StatementPattern(
subj = QueryVariable(variableName = "text"),
obj = QueryVariable(variableName = "text__valueHasString"),
queryString = LuceneQueryString(queryString = "test"),
literalStatement = Some(
StatementPattern(
subj = QueryVariable(variableName = "text"),
pred = IriRef(
iri = "http://www.knora.org/ontology/knora-base#valueHasString".toSmartIri,
propertyPathOperator = None
),
obj = QueryVariable(variableName = "text__valueHasString")
)
pred = IriRef(OntologyConstants.Fuseki.luceneQueryPredicate.toSmartIri),
obj = XsdLiteral(
value = "test",
datatype = OntologyConstants.Xsd.String.toSmartIri
)
)
)
Expand Down
Expand Up @@ -588,10 +588,6 @@ object OntologyConstants {
val StandoffStyleElementTag: IRI = StandoffPrefixExpansion + "StandoffStyleTag"
}

object Ontotext {
val LuceneFulltext = "http://www.ontotext.com/owlim/lucene#fullTextSearchIndex"
}

object XPathFunctions {
val XPathPrefixExpansion: IRI = "http://www.w3.org/2005/xpath-functions#"

Expand Down Expand Up @@ -1122,4 +1118,8 @@ object OntologyConstants {
object NamedGraphs {
val DataNamedGraphStart: IRI = "http://www.knora.org/data"
}

object Fuseki {
val luceneQueryPredicate = "http://jena.apache.org/text#query"
}
}
Expand Up @@ -139,9 +139,6 @@ final case class QueryTraverser(
}
ZIO.collectAll(transformedBlocks).map(blocks => Seq(UnionPattern(blocks)))

case luceneQueryPattern: LuceneQueryPattern =>
whereTransformer.transformLuceneQueryPattern(luceneQueryPattern)

case valuesPattern: ValuesPattern => ZIO.succeed(Seq(valuesPattern))

case bindPattern: BindPattern => ZIO.succeed(Seq(bindPattern))
Expand Down
Expand Up @@ -8,14 +8,12 @@ package org.knora.webapi.messages.util.search
import akka.http.scaladsl.model.HttpCharsets
import akka.http.scaladsl.model.MediaType

import dsp.errors.AssertionException
import dsp.errors.GravsearchException
import org.knora.webapi._
import org.knora.webapi.messages.IriConversions._
import org.knora.webapi.messages.OntologyConstants
import org.knora.webapi.messages.SmartIri
import org.knora.webapi.messages.StringFormatter
import org.knora.webapi.util.ApacheLuceneSupport.LuceneQueryString

/**
* Constants used in processing SPARQL queries.
Expand Down Expand Up @@ -182,26 +180,6 @@ case class StatementPattern(subj: Entity, pred: Entity, obj: Entity) extends Que
}
}

/**
* A virtual query pattern representing a Lucene full-text index search. Will be replaced by triplestore-specific
* statements during Gravsearch processing.
*
* @param subj a variable representing the subject to be found.
* @param obj a variable representing the literal that is indexed.
* @param queryString the Lucene query string to be matched.
* @param literalStatement a statement that connects `subj` to `obj`. Needed with some triplestores but not others.
* Will be defined only if it has not already been added to the generated SPARQL.
*/
case class LuceneQueryPattern(
subj: QueryVariable,
obj: QueryVariable,
queryString: LuceneQueryString,
literalStatement: Option[StatementPattern]
) extends QueryPattern {
override def toSparql: String =
throw AssertionException("LuceneQueryPattern should have been transformed into statements")
}

/**
* Represents a BIND command in a query.
*
Expand Down
Expand Up @@ -155,9 +155,6 @@ abstract class AbstractPrequeryGenerator(
.optimiseQueryPatterns(patterns)
)

override def transformLuceneQueryPattern(luceneQueryPattern: LuceneQueryPattern): Task[Seq[QueryPattern]] =
ZIO.succeed(Seq(luceneQueryPattern))

/**
* Transforms a [[org.knora.webapi.messages.util.search.FilterPattern]] in a WHERE clause into zero or more statement patterns.
*
Expand Down Expand Up @@ -1628,17 +1625,10 @@ abstract class AbstractPrequeryGenerator(

val searchTerms: LuceneQueryString = LuceneQueryString(searchTerm.value)

// Replace the filter with a LuceneQueryPattern.
// Replace the filter with a Lucene statement.
TransformedFilterPattern(
None, // FILTER has been replaced by statements
Seq(
LuceneQueryPattern(
subj = textValueVar,
obj = textValHasString,
queryString = searchTerms,
literalStatement = valueHasStringStatement
)
)
lucenePattern(textValueVar, searchTerm.value)
)
}

Expand Down Expand Up @@ -1713,17 +1703,10 @@ abstract class AbstractPrequeryGenerator(

val searchTerms: LuceneQueryString = LuceneQueryString(searchTerm.value)

// Replace the filter with a LuceneQueryPattern.
// Replace the filter with a Lucene statement.
TransformedFilterPattern(
None, // FILTER has been replaced by statements
Seq(
LuceneQueryPattern(
subj = textValueVar,
obj = textValHasString,
queryString = searchTerms,
literalStatement = valueHasStringStatement
)
)
lucenePattern(textValueVar, searchTerm.value)
)
}

Expand Down Expand Up @@ -1799,16 +1782,8 @@ abstract class AbstractPrequeryGenerator(

val searchTerms: LuceneQueryString = LuceneQueryString(searchTermStr.value)

// Generate a LuceneQueryPattern to search the full-text search index, to assert that text value contains
// the search terms.
val luceneQueryPattern: Seq[LuceneQueryPattern] = Seq(
LuceneQueryPattern(
subj = textValueVar,
obj = textValHasString,
queryString = searchTerms,
literalStatement = None // We have to add this statement ourselves, so LuceneQueryPattern doesn't need to.
)
)
// Generate a Lucene statement, to assert that text value contains the search terms.
val luceneStatement = lucenePattern(textValueVar, searchTermStr.value)

// Generate query patterns to assign the text in the standoff tag to a variable, if we
// haven't done so already.
Expand Down Expand Up @@ -1868,7 +1843,7 @@ abstract class AbstractPrequeryGenerator(

TransformedFilterPattern(
expression = None, // The expression has been replaced by additional patterns.
additionalPatterns = valueHasStringStatement.toSeq ++ luceneQueryPattern ++ markedUpPatternsToAdd ++ regexFilters
additionalPatterns = valueHasStringStatement.toSeq ++ luceneStatement ++ markedUpPatternsToAdd ++ regexFilters
)
}

Expand Down Expand Up @@ -1983,19 +1958,24 @@ abstract class AbstractPrequeryGenerator(
functionCallExpression.getArgAsLiteral(1, xsdDatatype = OntologyConstants.Xsd.String.toSmartIri)
val luceneQueryString: LuceneQueryString = LuceneQueryString(searchTerm.value)

// Replace the filter with a LuceneQueryPattern.
// Replace the filter with a Lucene search statement.
TransformedFilterPattern(
None, // The FILTER has been replaced by statements.
Seq(
LuceneQueryPattern(
subj = resourceVar,
obj = rdfsLabelVar,
queryString = luceneQueryString,
literalStatement = rdfsLabelStatement
lucenePattern(resourceVar, searchTerm.value)
)
}

private def lucenePattern(subj: QueryVariable, queryString: String): Seq[StatementPattern] =
Seq(
StatementPattern(
subj = subj, // In Fuseki, an index entry is associated with an entity that has a literal.
pred = IriRef(OntologyConstants.Fuseki.luceneQueryPredicate.toSmartIri),
obj = XsdLiteral(
value = queryString,
datatype = OntologyConstants.Xsd.String.toSmartIri
)
)
)
}

/**
* Handles the function `knora-api:StandoffLink`.
Expand Down
Expand Up @@ -20,7 +20,6 @@ import org.knora.webapi.messages.util.search.BindPattern
import org.knora.webapi.messages.util.search.Entity
import org.knora.webapi.messages.util.search.FilterNotExistsPattern
import org.knora.webapi.messages.util.search.IriRef
import org.knora.webapi.messages.util.search.LuceneQueryPattern
import org.knora.webapi.messages.util.search.MinusPattern
import org.knora.webapi.messages.util.search.OptionalPattern
import org.knora.webapi.messages.util.search.QueryPattern
Expand Down Expand Up @@ -101,15 +100,14 @@ final case class InferenceOptimizationServiceLive(
def getEntities(patterns: Seq[QueryPattern]): Seq[Entity] =
patterns.flatMap { pattern =>
pattern match {
case ValuesPattern(_, values) => values.toSeq
case BindPattern(_, expression) => List(expression.asInstanceOf[Entity])
case UnionPattern(blocks) => blocks.flatMap(block => getEntities(block))
case StatementPattern(subj, pred, obj) => List(subj, pred, obj)
case LuceneQueryPattern(subj, obj, _, _) => List(subj, obj)
case FilterNotExistsPattern(patterns) => getEntities(patterns)
case MinusPattern(patterns) => getEntities(patterns)
case OptionalPattern(patterns) => getEntities(patterns)
case _ => List.empty
case ValuesPattern(_, values) => values.toSeq
case BindPattern(_, expression) => List(expression.asInstanceOf[Entity])
case UnionPattern(blocks) => blocks.flatMap(block => getEntities(block))
case StatementPattern(subj, pred, obj) => List(subj, pred, obj)
case FilterNotExistsPattern(patterns) => getEntities(patterns)
case MinusPattern(patterns) => getEntities(patterns)
case OptionalPattern(patterns) => getEntities(patterns)
case _ => List.empty
}
}

Expand Down
Expand Up @@ -7,7 +7,6 @@ package org.knora.webapi.messages.util.search.gravsearch.transformers

import zio._

import org.knora.webapi.messages.OntologyConstants
import org.knora.webapi.messages.SmartIri
import org.knora.webapi.messages.util.search._
import org.knora.webapi.slice.resourceinfo.domain.IriConverter
Expand Down Expand Up @@ -64,17 +63,9 @@ final case class ConstructTransformer(
case OptionalPattern(patterns) => ZIO.foreach(patterns)(transformPattern(_, limit).map(OptionalPattern))
case UnionPattern(blocks) =>
ZIO.foreach(blocks)(optimizeAndTransformPatterns(_, limit)).map(block => Seq(UnionPattern(block)))
case lucenePattern: LuceneQueryPattern => transformLuceneQueryPattern(lucenePattern)
case pattern: QueryPattern => ZIO.succeed(Seq(pattern))
case pattern: QueryPattern => ZIO.succeed(Seq(pattern))
}

private def transformLuceneQueryPattern(pattern: LuceneQueryPattern): Task[Seq[QueryPattern]] =
for {
predIri <- iriConverter.asSmartIri("http://jena.apache.org/text#query")
datatype <- iriConverter.asSmartIri(OntologyConstants.Xsd.String)
obj = XsdLiteral(pattern.queryString.getQueryString, datatype)
} yield Seq(StatementPattern(pattern.subj, IriRef(predIri), obj))

}

object ConstructTransformer {
Expand Down
Expand Up @@ -148,26 +148,6 @@ final case class OntologyInferencer(
// The predicate isn't a property IRI or no inference should be done, so no expansion needed.
ZIO.succeed(Seq(statementPattern))
}

/**
* Transforms a [[LuceneQueryPattern]] for Fuseki.
*
* @param luceneQueryPattern the query pattern.
* @return Fuseki-specific statements implementing the query.
*/
def transformLuceneQueryPatternForFuseki(luceneQueryPattern: LuceneQueryPattern): Task[Seq[StatementPattern]] =
ZIO.attempt(
Seq(
StatementPattern(
subj = luceneQueryPattern.subj, // In Fuseki, an index entry is associated with an entity that has a literal.
pred = IriRef("http://jena.apache.org/text#query".toSmartIri),
obj = XsdLiteral(
value = luceneQueryPattern.queryString.getQueryString,
datatype = OntologyConstants.Xsd.String.toSmartIri
)
)
)
)
}

object OntologyInferencer {
Expand Down

0 comments on commit bf5d4be

Please sign in to comment.