Skip to content

Commit

Permalink
refactor: improve lucene handling in gravsearch (DEV-2148) (#2667)
Browse files Browse the repository at this point in the history
  • Loading branch information
BalduinLandolt committed May 17, 2023
1 parent 873eb46 commit bf5d4be
Show file tree
Hide file tree
Showing 13 changed files with 68 additions and 176 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@ import org.knora.webapi.messages.IriConversions._
import org.knora.webapi.messages.OntologyConstants
import org.knora.webapi.messages.StringFormatter
import org.knora.webapi.messages.util.search._
import org.knora.webapi.routing.UnsafeZioRun
import org.knora.webapi.util.ApacheLuceneSupport.LuceneQueryString
import org.knora.webapi.messages.util.search.gravsearch.transformers._
import org.knora.webapi.routing.UnsafeZioRun

/**
* Tests [[SparqlTransformer]].
Expand Down Expand Up @@ -139,11 +138,13 @@ class SparqlTransformerSpec extends CoreSpec {
pred = IriRef(OntologyConstants.KnoraBase.ValueHasString.toSmartIri),
QueryVariable("text__valueHasString")
)
val luceneQueryPattern = LuceneQueryPattern(
val luceneQueryPattern = StatementPattern(
subj = QueryVariable("text"),
obj = QueryVariable("text__valueHasString"),
queryString = LuceneQueryString("Zeitglöcklein"),
literalStatement = Some(valueHasStringStatement)
pred = IriRef(OntologyConstants.Fuseki.luceneQueryPredicate.toSmartIri),
obj = XsdLiteral(
value = "Zeitglöcklein",
datatype = OntologyConstants.Xsd.String.toSmartIri
)
)
val patterns: Seq[QueryPattern] = Seq(
hasValueStatement,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
package org.knora.webapi.messages.util.search.gravsearch.prequery

import scala.collection.mutable.ArrayBuffer
import dsp.errors.AssertionException
import zio.ZIO

import scala.collection.mutable.ArrayBuffer

import dsp.errors.AssertionException
import org.knora.webapi.CoreSpec
import org.knora.webapi.core.MessageRelay
import org.knora.webapi.messages.IriConversions._
import org.knora.webapi.messages.OntologyConstants
import org.knora.webapi.messages.StringFormatter
import org.knora.webapi.messages.util.search._
import org.knora.webapi.messages.util.search.gravsearch.GravsearchParser
Expand All @@ -15,7 +17,6 @@ import org.knora.webapi.messages.util.search.gravsearch.types.GravsearchTypeInsp
import org.knora.webapi.messages.util.search.gravsearch.types.GravsearchTypeInspectionUtil
import org.knora.webapi.routing.UnsafeZioRun
import org.knora.webapi.sharedtestdata.SharedTestDataADM.anythingAdminUser
import org.knora.webapi.util.ApacheLuceneSupport.LuceneQueryString

class GravsearchToPrequeryTransformerSpec extends CoreSpec {

Expand Down Expand Up @@ -1366,19 +1367,12 @@ class GravsearchToPrequeryTransformerSpec extends CoreSpec {
),
obj = QueryVariable(variableName = "recipient")
),
LuceneQueryPattern(
StatementPattern(
subj = QueryVariable(variableName = "familyName"),
obj = QueryVariable(variableName = "familyName__valueHasString"),
queryString = LuceneQueryString(queryString = "Bernoulli"),
literalStatement = Some(
StatementPattern(
subj = QueryVariable(variableName = "familyName"),
pred = IriRef(
iri = "http://www.knora.org/ontology/knora-base#valueHasString".toSmartIri,
propertyPathOperator = None
),
obj = QueryVariable(variableName = "familyName__valueHasString")
)
pred = IriRef(OntologyConstants.Fuseki.luceneQueryPredicate.toSmartIri),
obj = XsdLiteral(
value = "Bernoulli",
datatype = OntologyConstants.Xsd.String.toSmartIri
)
)
)
Expand Down Expand Up @@ -2377,19 +2371,12 @@ class GravsearchToPrequeryTransformerSpec extends CoreSpec {
datatype = "http://www.w3.org/2001/XMLSchema#boolean".toSmartIri
)
),
LuceneQueryPattern(
StatementPattern(
subj = QueryVariable(variableName = "richtext"),
obj = QueryVariable(variableName = "richtext__valueHasString"),
queryString = LuceneQueryString(queryString = "test"),
literalStatement = Some(
StatementPattern(
subj = QueryVariable(variableName = "richtext"),
pred = IriRef(
iri = "http://www.knora.org/ontology/knora-base#valueHasString".toSmartIri,
propertyPathOperator = None
),
obj = QueryVariable(variableName = "richtext__valueHasString")
)
pred = IriRef(OntologyConstants.Fuseki.luceneQueryPredicate.toSmartIri),
obj = XsdLiteral(
value = "test",
datatype = OntologyConstants.Xsd.String.toSmartIri
)
)
),
Expand Down Expand Up @@ -2451,19 +2438,12 @@ class GravsearchToPrequeryTransformerSpec extends CoreSpec {
datatype = "http://www.w3.org/2001/XMLSchema#boolean".toSmartIri
)
),
LuceneQueryPattern(
StatementPattern(
subj = QueryVariable(variableName = "text"),
obj = QueryVariable(variableName = "text__valueHasString"),
queryString = LuceneQueryString(queryString = "test"),
literalStatement = Some(
StatementPattern(
subj = QueryVariable(variableName = "text"),
pred = IriRef(
iri = "http://www.knora.org/ontology/knora-base#valueHasString".toSmartIri,
propertyPathOperator = None
),
obj = QueryVariable(variableName = "text__valueHasString")
)
pred = IriRef(OntologyConstants.Fuseki.luceneQueryPredicate.toSmartIri),
obj = XsdLiteral(
value = "test",
datatype = OntologyConstants.Xsd.String.toSmartIri
)
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -588,10 +588,6 @@ object OntologyConstants {
val StandoffStyleElementTag: IRI = StandoffPrefixExpansion + "StandoffStyleTag"
}

object Ontotext {
val LuceneFulltext = "http://www.ontotext.com/owlim/lucene#fullTextSearchIndex"
}

object XPathFunctions {
val XPathPrefixExpansion: IRI = "http://www.w3.org/2005/xpath-functions#"

Expand Down Expand Up @@ -1122,4 +1118,8 @@ object OntologyConstants {
object NamedGraphs {
val DataNamedGraphStart: IRI = "http://www.knora.org/data"
}

object Fuseki {
val luceneQueryPredicate = "http://jena.apache.org/text#query"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,6 @@ final case class QueryTraverser(
}
ZIO.collectAll(transformedBlocks).map(blocks => Seq(UnionPattern(blocks)))

case luceneQueryPattern: LuceneQueryPattern =>
whereTransformer.transformLuceneQueryPattern(luceneQueryPattern)

case valuesPattern: ValuesPattern => ZIO.succeed(Seq(valuesPattern))

case bindPattern: BindPattern => ZIO.succeed(Seq(bindPattern))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@ package org.knora.webapi.messages.util.search
import akka.http.scaladsl.model.HttpCharsets
import akka.http.scaladsl.model.MediaType

import dsp.errors.AssertionException
import dsp.errors.GravsearchException
import org.knora.webapi._
import org.knora.webapi.messages.IriConversions._
import org.knora.webapi.messages.OntologyConstants
import org.knora.webapi.messages.SmartIri
import org.knora.webapi.messages.StringFormatter
import org.knora.webapi.util.ApacheLuceneSupport.LuceneQueryString

/**
* Constants used in processing SPARQL queries.
Expand Down Expand Up @@ -182,26 +180,6 @@ case class StatementPattern(subj: Entity, pred: Entity, obj: Entity) extends Que
}
}

/**
* A virtual query pattern representing a Lucene full-text index search. Will be replaced by triplestore-specific
* statements during Gravsearch processing.
*
* @param subj a variable representing the subject to be found.
* @param obj a variable representing the literal that is indexed.
* @param queryString the Lucene query string to be matched.
* @param literalStatement a statement that connects `subj` to `obj`. Needed with some triplestores but not others.
* Will be defined only if it has not already been added to the generated SPARQL.
*/
case class LuceneQueryPattern(
subj: QueryVariable,
obj: QueryVariable,
queryString: LuceneQueryString,
literalStatement: Option[StatementPattern]
) extends QueryPattern {
override def toSparql: String =
throw AssertionException("LuceneQueryPattern should have been transformed into statements")
}

/**
* Represents a BIND command in a query.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,6 @@ abstract class AbstractPrequeryGenerator(
.optimiseQueryPatterns(patterns)
)

override def transformLuceneQueryPattern(luceneQueryPattern: LuceneQueryPattern): Task[Seq[QueryPattern]] =
ZIO.succeed(Seq(luceneQueryPattern))

/**
* Transforms a [[org.knora.webapi.messages.util.search.FilterPattern]] in a WHERE clause into zero or more statement patterns.
*
Expand Down Expand Up @@ -1628,17 +1625,10 @@ abstract class AbstractPrequeryGenerator(

val searchTerms: LuceneQueryString = LuceneQueryString(searchTerm.value)

// Replace the filter with a LuceneQueryPattern.
// Replace the filter with a Lucene statement.
TransformedFilterPattern(
None, // FILTER has been replaced by statements
Seq(
LuceneQueryPattern(
subj = textValueVar,
obj = textValHasString,
queryString = searchTerms,
literalStatement = valueHasStringStatement
)
)
lucenePattern(textValueVar, searchTerm.value)
)
}

Expand Down Expand Up @@ -1713,17 +1703,10 @@ abstract class AbstractPrequeryGenerator(

val searchTerms: LuceneQueryString = LuceneQueryString(searchTerm.value)

// Replace the filter with a LuceneQueryPattern.
// Replace the filter with a Lucene statement.
TransformedFilterPattern(
None, // FILTER has been replaced by statements
Seq(
LuceneQueryPattern(
subj = textValueVar,
obj = textValHasString,
queryString = searchTerms,
literalStatement = valueHasStringStatement
)
)
lucenePattern(textValueVar, searchTerm.value)
)
}

Expand Down Expand Up @@ -1799,16 +1782,8 @@ abstract class AbstractPrequeryGenerator(

val searchTerms: LuceneQueryString = LuceneQueryString(searchTermStr.value)

// Generate a LuceneQueryPattern to search the full-text search index, to assert that text value contains
// the search terms.
val luceneQueryPattern: Seq[LuceneQueryPattern] = Seq(
LuceneQueryPattern(
subj = textValueVar,
obj = textValHasString,
queryString = searchTerms,
literalStatement = None // We have to add this statement ourselves, so LuceneQueryPattern doesn't need to.
)
)
// Generate a Lucene statement, to assert that text value contains the search terms.
val luceneStatement = lucenePattern(textValueVar, searchTermStr.value)

// Generate query patterns to assign the text in the standoff tag to a variable, if we
// haven't done so already.
Expand Down Expand Up @@ -1868,7 +1843,7 @@ abstract class AbstractPrequeryGenerator(

TransformedFilterPattern(
expression = None, // The expression has been replaced by additional patterns.
additionalPatterns = valueHasStringStatement.toSeq ++ luceneQueryPattern ++ markedUpPatternsToAdd ++ regexFilters
additionalPatterns = valueHasStringStatement.toSeq ++ luceneStatement ++ markedUpPatternsToAdd ++ regexFilters
)
}

Expand Down Expand Up @@ -1983,19 +1958,24 @@ abstract class AbstractPrequeryGenerator(
functionCallExpression.getArgAsLiteral(1, xsdDatatype = OntologyConstants.Xsd.String.toSmartIri)
val luceneQueryString: LuceneQueryString = LuceneQueryString(searchTerm.value)

// Replace the filter with a LuceneQueryPattern.
// Replace the filter with a Lucene search statement.
TransformedFilterPattern(
None, // The FILTER has been replaced by statements.
Seq(
LuceneQueryPattern(
subj = resourceVar,
obj = rdfsLabelVar,
queryString = luceneQueryString,
literalStatement = rdfsLabelStatement
lucenePattern(resourceVar, searchTerm.value)
)
}

private def lucenePattern(subj: QueryVariable, queryString: String): Seq[StatementPattern] =
Seq(
StatementPattern(
subj = subj, // In Fuseki, an index entry is associated with an entity that has a literal.
pred = IriRef(OntologyConstants.Fuseki.luceneQueryPredicate.toSmartIri),
obj = XsdLiteral(
value = queryString,
datatype = OntologyConstants.Xsd.String.toSmartIri
)
)
)
}

/**
* Handles the function `knora-api:StandoffLink`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import org.knora.webapi.messages.util.search.BindPattern
import org.knora.webapi.messages.util.search.Entity
import org.knora.webapi.messages.util.search.FilterNotExistsPattern
import org.knora.webapi.messages.util.search.IriRef
import org.knora.webapi.messages.util.search.LuceneQueryPattern
import org.knora.webapi.messages.util.search.MinusPattern
import org.knora.webapi.messages.util.search.OptionalPattern
import org.knora.webapi.messages.util.search.QueryPattern
Expand Down Expand Up @@ -101,15 +100,14 @@ final case class InferenceOptimizationServiceLive(
def getEntities(patterns: Seq[QueryPattern]): Seq[Entity] =
patterns.flatMap { pattern =>
pattern match {
case ValuesPattern(_, values) => values.toSeq
case BindPattern(_, expression) => List(expression.asInstanceOf[Entity])
case UnionPattern(blocks) => blocks.flatMap(block => getEntities(block))
case StatementPattern(subj, pred, obj) => List(subj, pred, obj)
case LuceneQueryPattern(subj, obj, _, _) => List(subj, obj)
case FilterNotExistsPattern(patterns) => getEntities(patterns)
case MinusPattern(patterns) => getEntities(patterns)
case OptionalPattern(patterns) => getEntities(patterns)
case _ => List.empty
case ValuesPattern(_, values) => values.toSeq
case BindPattern(_, expression) => List(expression.asInstanceOf[Entity])
case UnionPattern(blocks) => blocks.flatMap(block => getEntities(block))
case StatementPattern(subj, pred, obj) => List(subj, pred, obj)
case FilterNotExistsPattern(patterns) => getEntities(patterns)
case MinusPattern(patterns) => getEntities(patterns)
case OptionalPattern(patterns) => getEntities(patterns)
case _ => List.empty
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ package org.knora.webapi.messages.util.search.gravsearch.transformers

import zio._

import org.knora.webapi.messages.OntologyConstants
import org.knora.webapi.messages.SmartIri
import org.knora.webapi.messages.util.search._
import org.knora.webapi.slice.resourceinfo.domain.IriConverter
Expand Down Expand Up @@ -64,17 +63,9 @@ final case class ConstructTransformer(
case OptionalPattern(patterns) => ZIO.foreach(patterns)(transformPattern(_, limit).map(OptionalPattern))
case UnionPattern(blocks) =>
ZIO.foreach(blocks)(optimizeAndTransformPatterns(_, limit)).map(block => Seq(UnionPattern(block)))
case lucenePattern: LuceneQueryPattern => transformLuceneQueryPattern(lucenePattern)
case pattern: QueryPattern => ZIO.succeed(Seq(pattern))
case pattern: QueryPattern => ZIO.succeed(Seq(pattern))
}

private def transformLuceneQueryPattern(pattern: LuceneQueryPattern): Task[Seq[QueryPattern]] =
for {
predIri <- iriConverter.asSmartIri("http://jena.apache.org/text#query")
datatype <- iriConverter.asSmartIri(OntologyConstants.Xsd.String)
obj = XsdLiteral(pattern.queryString.getQueryString, datatype)
} yield Seq(StatementPattern(pattern.subj, IriRef(predIri), obj))

}

object ConstructTransformer {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,26 +148,6 @@ final case class OntologyInferencer(
// The predicate isn't a property IRI or no inference should be done, so no expansion needed.
ZIO.succeed(Seq(statementPattern))
}

/**
* Transforms a [[LuceneQueryPattern]] for Fuseki.
*
* @param luceneQueryPattern the query pattern.
* @return Fuseki-specific statements implementing the query.
*/
def transformLuceneQueryPatternForFuseki(luceneQueryPattern: LuceneQueryPattern): Task[Seq[StatementPattern]] =
ZIO.attempt(
Seq(
StatementPattern(
subj = luceneQueryPattern.subj, // In Fuseki, an index entry is associated with an entity that has a literal.
pred = IriRef("http://jena.apache.org/text#query".toSmartIri),
obj = XsdLiteral(
value = luceneQueryPattern.queryString.getQueryString,
datatype = OntologyConstants.Xsd.String.toSmartIri
)
)
)
)
}

object OntologyInferencer {
Expand Down

0 comments on commit bf5d4be

Please sign in to comment.