Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Initial stab at Term queries and filter caching:

This commit adds the ability to have unanalyzed fields and to also cache filter
queries to speed up queries on Elasticsearch.
TODOs:
- Debug commented out line in testUnanalyzed Unit test (ElasticQueryTest:308)
  `"terms" : { "field" : [ [ "value1, "value2" ] ] } ` queries don't work
  because of a SearchParseException.
- Make SlashemGeoField extend SlashemUnanalyzedStringField
- Decide what to do for Solr queries on this field type (in the case of
  SlashemGeoField).  Do we want two separate field types one for both backends?
  The current plan is to not implement Term queries for Solr because we receive
  no benefit there.
  • Loading branch information...
commit a1ac4bc976b8d924809413be84663fb1054fdc2c 1 parent c4eb3ad
@adamalix adamalix authored
View
39 src/main/scala/com/foursquare/slashem/Ast.scala
@@ -7,6 +7,7 @@ import org.elasticsearch.index.query.{FilterBuilder => ElasticFilterBuilder,
QueryBuilder => ElasticQueryBuilder,
QueryBuilders => EQueryBuilders,
QueryStringQueryBuilder}
+import scalaj.collection.Imports._
/**
* Abstract Syntax Tree used to represent queries.
@@ -407,6 +408,40 @@ object Ast {
}
}
+ /**
+ * A term query. Used for queries that don't need to be analyzed
+ *
+ * By default, elasticFilter() will always be cached!
+ */
+ case class Term[T](query: Iterable[T], escaped: Boolean = true, cached: Boolean = true) extends Query[T] {
+ // hack for single term queries
+ def this(query: T) = this(List(query))
+ /** @inheritdoc */
+ //def extend() = throw new UnimplementedException("Slashem does not support Term queries Solr")
+ def extend(): String = {
+ escaped match {
+ case true => {'"' + escape(query.toString) + '"'}
+ case false => '"' + query.toString + '"'
+ }
+ }
+ /** @inheritdoc */
+ def elasticExtend(qf: List[WeightedField], pf: List[PhraseWeightedField], mm: Option[String]): ElasticQueryBuilder = {
+ val fieldName = qf.head.fieldName
+ val weight = qf.head.weight.toFloat
+ query match {
+ case term::Nil => EQueryBuilders.termQuery(fieldName, term).boost(weight)
+ case terms => EQueryBuilders.termsQuery(fieldName, terms.asJava).boost(weight)
+ }
+ }
+ /** @inheritdoc */
+ override def elasticFilter(qf: List[WeightedField]): ElasticFilterBuilder = {
+ val fieldName = qf.head.fieldName
+ query match {
+ case term::Nil => EFilterBuilders.termFilter(fieldName, term).cache(cached)
+ case terms => EFilterBuilders.termsFilter(fieldName, terms.asJava).cache(cached)
+ }
+ }
+ }
case class Range[T](q1: Query[T],q2: Query[T]) extends Query[T] {
/** @inheritdoc */
@@ -487,7 +522,7 @@ object Ast {
}
/**
- * Class representing clauses ANDed together
+ * Class representing queries ANDed together
*/
case class And[T](queries: Query[T]*) extends Query[T] {
/** @inheritdoc */
@@ -507,7 +542,7 @@ object Ast {
}
}
/**
- * Case class representing a list of clauses ORed together
+ * Case class representing a list of queries ORed together
*/
case class Or[T](queries: Query[T]*) extends Query[T] {
/** @inheritdoc */
View
60 src/main/scala/com/foursquare/slashem/Schema.scala
@@ -55,6 +55,12 @@ case class SolrResponseException(code: Int, reason: String, solrName: String, qu
}
}
+case class UnimplementedException(reason: String) extends RuntimeException {
+ override def getMessage(): String = {
+ "Not implemented: %s".format(reason)
+ }
+}
+
/** The response header. There are normally more fields in the response header we could extract, but
* we don't at present. */
case class ResponseHeader @JsonCreator()(@JsonProperty("status")status: Int, @JsonProperty("QTime")QTime: Int)
@@ -782,28 +788,54 @@ trait SolrSchema[M <: Record[M]] extends SlashemSchema[M] {
}
+/**
+ * A field type for unanalyzed queries. Results in using Term[V] queries.
+ */
+trait SlashemUnanalyzedField[V, M <: Record[M]] extends SlashemField[V, M] {
+ self: Field[V, M] =>
+ import Helpers._
+
+ override val unanalyzed = true
+}
trait SlashemField[V, M <: Record[M]] extends OwnedField[M] {
self: Field[V, M] =>
import Helpers._
- //Note eqs and neqs results in phrase queries!
- def eqs(v: V) = Clause[V](self.queryName, Group(Phrase(v)))
- def neqs(v: V) = Clause[V](self.queryName, Phrase(v),false)
+ val unanalyzed = false
+
+ def produceQuery(v: V): Query[V] = {
+ unanalyzed match {
+ // use new to use Term's additional non-default constructor
+ case true => new Term(v)
+ case false => Phrase(v)
+ }
+ }
+
+ def produceGroupedQuery(v: Iterable[V]): Query[V] = {
+ unanalyzed match {
+ // we don't want to groupWithOr and instead take advantage of "terms" queries
+ case true => Term(v)
+ case false => groupWithOr(v.map({x: V => produceQuery(x)}))
+ }
+ }
+
+ def eqs(v: V) = Clause[V](self.queryName, Group(produceQuery(v)))
+ def neqs(v: V) = Clause[V](self.queryName, produceQuery(v),false)
//With a boost
- def eqs(v: V, b: Float) = Clause[V](self.queryName, Boost(Group(Phrase(v)),b))
- def neqs(v: V, b:Float) = Clause[V](self.queryName, Boost(Phrase(v),b),false)
+ def eqs(v: V, b: Float) = Clause[V](self.queryName, Boost(Group(produceQuery(v)),b))
+ def neqs(v: V, b:Float) = Clause[V](self.queryName, Boost(produceQuery(v),b),false)
//This allows for bag of words style matching.
def contains(v: V) = Clause[V](self.queryName, Group(BagOfWords(v)))
def contains(v: V, b: Float) = Clause[V](self.queryName, Boost(Group(BagOfWords(v)),b))
- def in(v: Iterable[V]) = Clause[V](self.queryName, groupWithOr(v.map({x: V => Phrase(x)})))
- def nin(v: Iterable[V]) = Clause[V](self.queryName, groupWithOr(v.map({x: V => Phrase(x)})),false)
+ def in(v: Iterable[V]) = Clause[V](self.queryName, produceGroupedQuery(v))
+ def nin(v: Iterable[V]) = Clause[V](self.queryName, produceGroupedQuery(v),false)
- def in(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(groupWithOr(v.map({x: V => Phrase(x)})),b))
- def nin(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(groupWithOr(v.map({x: V => Phrase(x)})),b),false)
+ def in(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(produceGroupedQuery(v),b))
+ def nin(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(produceGroupedQuery(v),b),false)
def inRange(v1: V, v2: V) = Clause[V](self.queryName, Group(Range(BagOfWords(v1),BagOfWords(v2))))
def ninRange(v1: V, v2: V) = Clause[V](self.queryName, Group(Range(BagOfWords(v1),BagOfWords(v2))),false)
@@ -843,6 +875,14 @@ trait SlashemField[V, M <: Record[M]] extends OwnedField[M] {
//Slashem field types
class SlashemStringField[T <: Record[T]](owner: T) extends StringField[T](owner, 0) with SlashemField[String, T]
+/**
+ * Field type that can be queried without analyzing whitespace.
+ *
+ * @see SlashemStringField
+ */
+class SlashemUnanalyzedStringField[T <: Record[T]](owner: T)
+ extends StringField[T](owner, 0) with SlashemUnanalyzedField[String, T]
+
//Allows for querying against the default filed in solr. This field doesn't have a name
class SlashemDefaultStringField[T <: Record[T]](owner: T) extends StringField[T](owner, 0) with SlashemField[String, T] {
override def name = ""
@@ -951,7 +991,7 @@ class SlashemPointField[T <: Record[T]](owner: T) extends PointField[T](owner) w
class SlashemBooleanField[T <: Record[T]](owner: T) extends BooleanField[T](owner) with SlashemField[Boolean, T]
class SlashemDateTimeField[T <: Record[T]](owner: T) extends JodaDateTimeField[T](owner) with SlashemField[DateTime, T]
//More restrictive type so we can access the geohash
-class SlashemGeoField[T <: SlashemSchema[T]](owner: T) extends StringField[T](owner,0) with SlashemField[String, T] {
+class SlashemGeoField[T <: SlashemSchema[T]](owner: T) extends SlashemStringField[T](owner) {
def inRadius(geoLat: Double, geoLong: Double, radiusInMeters: Int, maxCells: Int = owner.geohash.maxCells) = {
val cellIds = owner.geohash.coverString(geoLat, geoLong, radiusInMeters, maxCells = maxCells)
//If we have an empty cover we default to everything.
View
13 src/test/scala/com/foursquare/slashem/ElasticQueryTest.scala
@@ -310,6 +310,17 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers {
}
@Test
+ def testUnanalyzed {
+ try {
+ val res1 = ESimplePanda where (_.termsfield eqs "termhit") fetch()
+ //val res2 = ESimplePanda where (_.termsfield in List("randomterm", "termhit")) fetch()
+ Assert.assertEquals(res1.response.results.length, 1)
+ } catch {
+ case e: Exception => e.printStackTrace()
+ }
+ }
+
+ @Test
def testListFieldIn {
val response1 = ESimplePanda where (_.favnums in List(2, 3, 4, 5)) fetch()
val response2 = ESimplePanda where (_.favnums in List(99)) fetch()
@@ -394,6 +405,7 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers {
val favnums1 = List(1, 2, 3, 4, 5).asJava
val favnums2 = List(1, 2, 3, 4, 5).asJava
val favnums3 = List(6, 7, 8, 9, 10).asJava
+ val terms1 = List("termhit", "nohit").asJava
val nicknames1 = List("jerry", "dawg", "xzibit").asJava
val nicknames2 = List("xzibit", "alvin").asJava
val nicknames3 = List("alvin", "nathaniel", "joiner").asJava
@@ -407,6 +419,7 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers {
.field("favnums", favnums1)
.field("nicknames", nicknames1)
.field("hugenums", hugenums1)
+ .field("termsfield", terms1)
.endObject()
).execute()
.actionGet();
View
1  src/test/scala/com/foursquare/slashem/ElasticTest.scala
@@ -18,6 +18,7 @@ class ESimplePanda extends ElasticSchema[ESimplePanda] {
object favnums extends SlashemIntListField(this)
object nicknames extends SlashemStringListField(this)
object hugenums extends SlashemLongListField(this)
+ object termsfield extends SlashemUnanalyzedStringField(this)
}
object ESimpleGeoPanda extends ESimpleGeoPanda with ElasticMeta[ESimpleGeoPanda] {
Please sign in to comment.
Something went wrong with that request. Please try again.