diff --git a/src/main/scala/com/foursquare/slashem/Ast.scala b/src/main/scala/com/foursquare/slashem/Ast.scala index 3d178c9..1404c0a 100644 --- a/src/main/scala/com/foursquare/slashem/Ast.scala +++ b/src/main/scala/com/foursquare/slashem/Ast.scala @@ -419,6 +419,48 @@ object Ast { } } + /** + * A term query. Used for queries that don't need to be analyzed + * + * By default, elasticFilter() will always be cached! + */ + case class Term[T](query: Iterable[T], escapeQuery: Boolean = true, cached: Boolean = true) extends Query[T] { + /** @inheritdoc */ + //def extend() = throw new UnimplementedException("Slashem does not support Term queries Solr") + def extend(): String = { + escapeQuery match { + // hack to fix wrapping the queries in a List() + case true => { + val queries = query.map(q => {'"' + escape(q.toString) + '"'}) + queries.mkString(" OR ") + } + case false => '"' + query.mkString(" OR ") + '"' + } + } + /** @inheritdoc */ + def elasticExtend(qf: List[WeightedField], pf: List[PhraseWeightedField], mm: Option[String]): ElasticQueryBuilder = { + val fieldName = qf.head.fieldName + val weight = qf.head.weight.toFloat + query match { + case term::Nil => EQueryBuilders.termQuery(fieldName, term).boost(weight) + case terms => { + val moarTerms = terms.toSeq.map(_.toString) + EQueryBuilders.termsQuery(fieldName, moarTerms: _*).boost(weight) + } + } + } + /** @inheritdoc */ + override def elasticFilter(qf: List[WeightedField]): ElasticFilterBuilder = { + val fieldName = qf.head.fieldName + query match { + case term::Nil => EFilterBuilders.termFilter(fieldName, term).cache(cached) + case terms => { + val moarTerms = terms.toSeq.map(_.toString) + EFilterBuilders.termsFilter(fieldName, moarTerms: _*).cache(cached) + } + } + } + } case class Range[T](q1: Query[T],q2: Query[T]) extends Query[T] { /** @inheritdoc */ @@ -505,7 +547,7 @@ object Ast { } /** - * Class representing clauses ANDed together + * Class representing queries ANDed together */ case class And[T](queries: Query[T]*) extends Query[T] { /** @inheritdoc */ @@ -525,7 +567,7 @@ object Ast { } } /** - * Case class representing a list of clauses ORed together + * Case class representing a list of queries ORed together */ case class Or[T](queries: Query[T]*) extends Query[T] { /** @inheritdoc */ diff --git a/src/main/scala/com/foursquare/slashem/Schema.scala b/src/main/scala/com/foursquare/slashem/Schema.scala index 5757ba9..fd4dc75 100644 --- a/src/main/scala/com/foursquare/slashem/Schema.scala +++ b/src/main/scala/com/foursquare/slashem/Schema.scala @@ -55,6 +55,12 @@ case class SolrResponseException(code: Int, reason: String, solrName: String, qu } } +case class UnimplementedException(reason: String) extends RuntimeException { + override def getMessage(): String = { + "Not implemented: %s".format(reason) + } +} + /** The response header. There are normally more fields in the response header we could extract, but * we don't at present. */ case class ResponseHeader @JsonCreator()(@JsonProperty("status")status: Int, @JsonProperty("QTime")QTime: Int) @@ -433,8 +439,8 @@ trait SolrGeoHash { } //Default geohash, does nothing. object NoopSolrGeoHash extends SolrGeoHash { - def coverString (geoLat: Double, geoLong: Double, radiusInMeters: Int, maxCells: Int ): Seq[String] = List("pleaseUseaRealGeoHash") - def rectCoverString(topRight: (Double, Double), bottomLeft: (Double, Double), maxCells: Int = 0, minLevel: Int = 0, maxLevel: Int = 0): Seq[String] = List("pleaseUseaRealGeoHash") + def coverString (geoLat: Double, geoLong: Double, radiusInMeters: Int, maxCells: Int ): Seq[String] = List("pleaseUseaRealGeoHash", "thisIsForFunctionalityTests") + def rectCoverString(topRight: (Double, Double), bottomLeft: (Double, Double), maxCells: Int = 0, minLevel: Int = 0, maxLevel: Int = 0): Seq[String] = List("pleaseUseaRealGeoHash", "thisIsForFunctionalityTests") } trait SlashemSchema[M <: Record[M]] extends Record[M] { @@ -782,40 +788,64 @@ trait SolrSchema[M <: Record[M]] extends SlashemSchema[M] { } +/** + * A field type for unanalyzed queries. Results in using Term[V] queries. + */ +trait SlashemUnanalyzedField[V, M <: Record[M]] extends SlashemField[V, M] { + self: Field[V, M] => + + override val unanalyzed = true +} trait SlashemField[V, M <: Record[M]] extends OwnedField[M] { self: Field[V, M] => import Helpers._ - //Note eqs and neqs results in phrase queries! - def eqs(v: V) = Clause[V](self.queryName, Group(Phrase(v))) - def neqs(v: V) = Clause[V](self.queryName, Phrase(v),false) + // Override this value to produce unanalyzed queries! + val unanalyzed = false + + def produceQuery(v: V, escapeQuery: Boolean = true): Query[V] = { + unanalyzed match { + case true => Term(List(v),escapeQuery) + case false => Phrase(v,escapeQuery) + } + } + + def produceGroupedQuery(v: Iterable[V], escapeQuery: Boolean = true): Query[V] = { + unanalyzed match { + // we don't want to groupWithOr and instead take advantage of "terms" queries + case true => Term(v, escapeQuery) + case false => groupWithOr(v.map({x: V => produceQuery(x,escapeQuery)})) + } + } + + def eqs(v: V) = Clause[V](self.queryName, Group(produceQuery(v))) + def neqs(v: V) = Clause[V](self.queryName, produceQuery(v),false) //With a boost - def eqs(v: V, b: Float) = Clause[V](self.queryName, Boost(Group(Phrase(v)),b)) - def neqs(v: V, b:Float) = Clause[V](self.queryName, Boost(Phrase(v),b),false) + def eqs(v: V, b: Float) = Clause[V](self.queryName, Boost(Group(produceQuery(v)),b)) + def neqs(v: V, b:Float) = Clause[V](self.queryName, Boost(produceQuery(v),b),false) //This allows for bag of words style matching. def contains(v: V) = Clause[V](self.queryName, Group(BagOfWords(v))) def contains(v: V, b: Float) = Clause[V](self.queryName, Boost(Group(BagOfWords(v)),b)) - //Search with explicit escaping. By normal we escape, set e to false to disable //Note eqs and neqs results in phrase queries! - def eqs(v: V, e: Boolean) = Clause[V](self.queryName, Group(Phrase(v,e))) - def neqs(v: V, e: Boolean) = Clause[V](self.queryName, Phrase(v,e),false) + def eqs(v: V, e: Boolean) = Clause[V](self.queryName, Group(produceQuery(v,e))) + def neqs(v: V, e: Boolean) = Clause[V](self.queryName, produceQuery(v,e),false) //With a boost - def eqs(v: V, b: Float, e: Boolean) = Clause[V](self.queryName, Boost(Group(Phrase(v,e)),b)) - def neqs(v: V, b:Float, e: Boolean) = Clause[V](self.queryName, Boost(Phrase(v,e),b),false) + def eqs(v: V, b: Float, e: Boolean) = Clause[V](self.queryName, Boost(Group(produceQuery(v,e)),b)) + def neqs(v: V, b:Float, e: Boolean) = Clause[V](self.queryName, Boost(produceQuery(v,e),b),false) //This allows for bag of words style matching. def contains(v: V, e: Boolean) = Clause[V](self.queryName, Group(BagOfWords(v,e))) def contains(v: V, b: Float, e: Boolean) = Clause[V](self.queryName, Boost(Group(BagOfWords(v,e)),b)) - def in(v: Iterable[V]) = Clause[V](self.queryName, groupWithOr(v.map({x: V => Phrase(x)}))) - def nin(v: Iterable[V]) = Clause[V](self.queryName, groupWithOr(v.map({x: V => Phrase(x)})),false) + def in(v: Iterable[V]) = Clause[V](self.queryName, produceGroupedQuery(v)) + def nin(v: Iterable[V]) = Clause[V](self.queryName, produceGroupedQuery(v),false) - def in(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(groupWithOr(v.map({x: V => Phrase(x)})),b)) - def nin(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(groupWithOr(v.map({x: V => Phrase(x)})),b),false) + def in(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(produceGroupedQuery(v),b)) + def nin(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(produceGroupedQuery(v),b),false) def inRange(v1: V, v2: V) = Clause[V](self.queryName, Group(Range(BagOfWords(v1),BagOfWords(v2)))) def ninRange(v1: V, v2: V) = Clause[V](self.queryName, Group(Range(BagOfWords(v1),BagOfWords(v2))),false) @@ -855,6 +885,17 @@ trait SlashemField[V, M <: Record[M]] extends OwnedField[M] { //Slashem field types class SlashemStringField[T <: Record[T]](owner: T) extends StringField[T](owner, 0) with SlashemField[String, T] +/** + * Field type that can be queried without analyzing. + * + * Ex: multi-value field or a whitespace tokenized field where + * search terms are always for a specific token. + * + * @see SlashemStringField + */ +class SlashemUnanalyzedStringField[T <: Record[T]](owner: T) + extends StringField[T](owner, 0) with SlashemUnanalyzedField[String, T] + //Allows for querying against the default filed in solr. This field doesn't have a name class SlashemDefaultStringField[T <: Record[T]](owner: T) extends StringField[T](owner, 0) with SlashemField[String, T] { override def name = "" @@ -963,7 +1004,7 @@ class SlashemPointField[T <: Record[T]](owner: T) extends PointField[T](owner) w class SlashemBooleanField[T <: Record[T]](owner: T) extends BooleanField[T](owner) with SlashemField[Boolean, T] class SlashemDateTimeField[T <: Record[T]](owner: T) extends JodaDateTimeField[T](owner) with SlashemField[DateTime, T] //More restrictive type so we can access the geohash -class SlashemGeoField[T <: SlashemSchema[T]](owner: T) extends StringField[T](owner,0) with SlashemField[String, T] { +class SlashemGeoField[T <: SlashemSchema[T]](owner: T) extends SlashemUnanalyzedStringField[T](owner) { def inRadius(geoLat: Double, geoLong: Double, radiusInMeters: Int, maxCells: Int = owner.geohash.maxCells) = { val cellIds = owner.geohash.coverString(geoLat, geoLong, radiusInMeters, maxCells = maxCells) //If we have an empty cover we default to everything. diff --git a/src/test/scala/com/foursquare/slashem/ElasticQueryTest.scala b/src/test/scala/com/foursquare/slashem/ElasticQueryTest.scala index 8171c86..4efcabe 100644 --- a/src/test/scala/com/foursquare/slashem/ElasticQueryTest.scala +++ b/src/test/scala/com/foursquare/slashem/ElasticQueryTest.scala @@ -319,20 +319,26 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers { def testListFieldIn { val response1 = ESimplePanda where (_.favnums in List(2, 3, 4, 5)) fetch() val response2 = ESimplePanda where (_.favnums in List(99)) fetch() + val response3 = ESimplePanda where (_.termsfield in List("termhit", "lol")) fetch() Assert.assertEquals(response1.response.results.length, 2) Assert.assertEquals(response2.response.results.length, 0) + Assert.assertEquals(response3.response.results.length, 1) } @Test def testIntListFieldEmptyIn { - val response = ESimplePanda where (_.favnums in List()) fetch() - Assert.assertEquals(response.response.results.length, 0) + val response1 = ESimplePanda where (_.favnums in List()) fetch() + val response2 = ESimplePanda where (_.termsfield in List()) fetch() + Assert.assertEquals(response1.response.results.length, 0) + Assert.assertEquals(response2.response.results.length, 0) } @Test def testIntListFieldEmptyNin { - val response = ESimplePanda where (_.favnums nin List()) fetch() - Assert.assertEquals(response.response.results.length, 8) + val response1 = ESimplePanda where (_.favnums nin List()) fetch() + val response2 = ESimplePanda where (_.termsfield nin List()) fetch() + Assert.assertEquals(response1.response.results.length, 8) + Assert.assertEquals(response2.response.results.length, 8) } @Test @@ -350,6 +356,26 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers { val ids2 = response2.response.oids // All three docs with favnums should be returned, none contain 99 Assert.assertEquals(ids2.intersect(idsWithFavNums).length, 3) + + val response3 = ESimplePanda where (_.termsfield nin List("termhit")) fetch() + val ids3 = response3.response.oids + // All three docs with favnums should be returned, none contain 99 + Assert.assertEquals(ids3.intersect(idsWithFavNums).length, 2) + } + + @Test + def testTermQueries { + val res1 = ESimplePanda where (_.termsfield eqs "termhit") fetch() + val res2 = ESimplePanda where (_.termsfield in List("randomterm", "termhit")) fetch() + Assert.assertEquals(res1.response.results.length, 1) + Assert.assertEquals(res2.response.results.length, 1) + } + + @Test + def testTermFilters { + // grab 2 results, filter to 1 + val res1 = ESimplePanda where (_.hugenums contains 1L) filter(_.termsfield in List("termhit", "randomterm")) fetch() + Assert.assertEquals(res1.response.results.length, 1) } @Before @@ -400,6 +426,7 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers { val favnums1 = List(1, 2, 3, 4, 5).asJava val favnums2 = List(1, 2, 3, 4, 5).asJava val favnums3 = List(6, 7, 8, 9, 10).asJava + val terms1 = List("termhit", "nohit").asJava val nicknames1 = List("jerry", "dawg", "xzibit").asJava val nicknames2 = List("xzibit", "alvin").asJava val nicknames3 = List("alvin", "nathaniel", "joiner").asJava @@ -413,6 +440,7 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers { .field("favnums", favnums1) .field("nicknames", nicknames1) .field("hugenums", hugenums1) + .field("termsfield", terms1) .endObject() ).execute() .actionGet(); diff --git a/src/test/scala/com/foursquare/slashem/ElasticTest.scala b/src/test/scala/com/foursquare/slashem/ElasticTest.scala index ec2b19f..bd46354 100644 --- a/src/test/scala/com/foursquare/slashem/ElasticTest.scala +++ b/src/test/scala/com/foursquare/slashem/ElasticTest.scala @@ -18,6 +18,7 @@ class ESimplePanda extends ElasticSchema[ESimplePanda] { object favnums extends SlashemIntListField(this) object nicknames extends SlashemStringListField(this) object hugenums extends SlashemLongListField(this) + object termsfield extends SlashemUnanalyzedStringField(this) } object ESimpleGeoPanda extends ESimpleGeoPanda with ElasticMeta[ESimpleGeoPanda] { diff --git a/src/test/scala/com/foursquare/slashem/QueryTest.scala b/src/test/scala/com/foursquare/slashem/QueryTest.scala index 2972fa1..b4c0182 100644 --- a/src/test/scala/com/foursquare/slashem/QueryTest.scala +++ b/src/test/scala/com/foursquare/slashem/QueryTest.scala @@ -572,7 +572,7 @@ class QueryTest extends SpecsMatchers with ScalaCheckMatchers { "qf" -> "text", "qf" -> "ngram_name^0.2", "qf" -> "tags^0.01", - "fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\")", + "fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\" OR \"thisIsForFunctionalityTests\")", "tieBreaker" -> "0.2", "fl" -> "id,name,userid,mayorid,category_id_0,popularity,decayedPopularity1,lat,lng,checkin_info,score,hasSpecial,address,crossstreet,city,state,zip,country,checkinCount,partitionedPopularity", "bq" -> "name:(holden's hobohut)^10.0", @@ -619,7 +619,7 @@ class QueryTest extends SpecsMatchers with ScalaCheckMatchers { "qf" -> "text", "qf" -> "ngram_name^0.2", "qf" -> "tags^0.01", - "fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\")", + "fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\" OR \"thisIsForFunctionalityTests\")", "tieBreaker" -> "0.2", "fl" -> "id,name,userid,mayorid,category_id_0,popularity,decayedPopularity1,lat,lng,checkin_info,score,hasSpecial,address,crossstreet,city,state,zip,country,checkinCount,partitionedPopularity", "bq" -> "name:(holden's hobohut)^10.0", @@ -640,7 +640,7 @@ class QueryTest extends SpecsMatchers with ScalaCheckMatchers { "q" -> "(DJ Hixxy)", "start" -> "0", "rows" -> "10", - "fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\")") + "fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\" OR \"thisIsForFunctionalityTests\")") Assert.assertEquals(Nil, ((qp.toSet &~ expected.toSet)).toList) Assert.assertEquals(Nil, (expected.toSet &~ qp.toSet).toList) }