Skip to content
Browse files

Merge remote-tracking branch '4sq/master'

Conflicts:
	src/main/scala/com/foursquare/slashem/Schema.scala
  • Loading branch information...
2 parents 9f9f78f + a71fd7d commit fb0f69bf7078dc37bfcf738b2fec0477e2848b08 @holdenk committed May 14, 2012
View
46 src/main/scala/com/foursquare/slashem/Ast.scala
@@ -419,6 +419,48 @@ object Ast {
}
}
+ /**
+ * A term query. Used for queries that don't need to be analyzed
+ *
+ * By default, elasticFilter() will always be cached!
+ */
+ case class Term[T](query: Iterable[T], escapeQuery: Boolean = true, cached: Boolean = true) extends Query[T] {
+ /** @inheritdoc */
+ //def extend() = throw new UnimplementedException("Slashem does not support Term queries Solr")
+ def extend(): String = {
+ escapeQuery match {
+ // hack to fix wrapping the queries in a List()
+ case true => {
+ val queries = query.map(q => {'"' + escape(q.toString) + '"'})
+ queries.mkString(" OR ")
+ }
+ case false => '"' + query.mkString(" OR ") + '"'
+ }
+ }
+ /** @inheritdoc */
+ def elasticExtend(qf: List[WeightedField], pf: List[PhraseWeightedField], mm: Option[String]): ElasticQueryBuilder = {
+ val fieldName = qf.head.fieldName
+ val weight = qf.head.weight.toFloat
+ query match {
+ case term::Nil => EQueryBuilders.termQuery(fieldName, term).boost(weight)
+ case terms => {
+ val moarTerms = terms.toSeq.map(_.toString)
+ EQueryBuilders.termsQuery(fieldName, moarTerms: _*).boost(weight)
+ }
+ }
+ }
+ /** @inheritdoc */
+ override def elasticFilter(qf: List[WeightedField]): ElasticFilterBuilder = {
+ val fieldName = qf.head.fieldName
+ query match {
+ case term::Nil => EFilterBuilders.termFilter(fieldName, term).cache(cached)
+ case terms => {
+ val moarTerms = terms.toSeq.map(_.toString)
+ EFilterBuilders.termsFilter(fieldName, moarTerms: _*).cache(cached)
+ }
+ }
+ }
+ }
case class Range[T](q1: Query[T],q2: Query[T]) extends Query[T] {
/** @inheritdoc */
@@ -505,7 +547,7 @@ object Ast {
}
/**
- * Class representing clauses ANDed together
+ * Class representing queries ANDed together
*/
case class And[T](queries: Query[T]*) extends Query[T] {
/** @inheritdoc */
@@ -525,7 +567,7 @@ object Ast {
}
}
/**
- * Case class representing a list of clauses ORed together
+ * Case class representing a list of queries ORed together
*/
case class Or[T](queries: Query[T]*) extends Query[T] {
/** @inheritdoc */
View
75 src/main/scala/com/foursquare/slashem/Schema.scala
@@ -55,6 +55,12 @@ case class SolrResponseException(code: Int, reason: String, solrName: String, qu
}
}
+case class UnimplementedException(reason: String) extends RuntimeException {
+ override def getMessage(): String = {
+ "Not implemented: %s".format(reason)
+ }
+}
+
/** The response header. There are normally more fields in the response header we could extract, but
* we don't at present. */
case class ResponseHeader @JsonCreator()(@JsonProperty("status")status: Int, @JsonProperty("QTime")QTime: Int)
@@ -433,8 +439,8 @@ trait SolrGeoHash {
}
//Default geohash, does nothing.
object NoopSolrGeoHash extends SolrGeoHash {
- def coverString (geoLat: Double, geoLong: Double, radiusInMeters: Int, maxCells: Int ): Seq[String] = List("pleaseUseaRealGeoHash")
- def rectCoverString(topRight: (Double, Double), bottomLeft: (Double, Double), maxCells: Int = 0, minLevel: Int = 0, maxLevel: Int = 0): Seq[String] = List("pleaseUseaRealGeoHash")
+ def coverString (geoLat: Double, geoLong: Double, radiusInMeters: Int, maxCells: Int ): Seq[String] = List("pleaseUseaRealGeoHash", "thisIsForFunctionalityTests")
+ def rectCoverString(topRight: (Double, Double), bottomLeft: (Double, Double), maxCells: Int = 0, minLevel: Int = 0, maxLevel: Int = 0): Seq[String] = List("pleaseUseaRealGeoHash", "thisIsForFunctionalityTests")
}
trait SlashemSchema[M <: Record[M]] extends Record[M] {
@@ -782,40 +788,64 @@ trait SolrSchema[M <: Record[M]] extends SlashemSchema[M] {
}
+/**
+ * A field type for unanalyzed queries. Results in using Term[V] queries.
+ */
+trait SlashemUnanalyzedField[V, M <: Record[M]] extends SlashemField[V, M] {
+ self: Field[V, M] =>
+
+ override val unanalyzed = true
+}
trait SlashemField[V, M <: Record[M]] extends OwnedField[M] {
self: Field[V, M] =>
import Helpers._
- //Note eqs and neqs results in phrase queries!
- def eqs(v: V) = Clause[V](self.queryName, Group(Phrase(v)))
- def neqs(v: V) = Clause[V](self.queryName, Phrase(v),false)
+ // Override this value to produce unanalyzed queries!
+ val unanalyzed = false
+
+ def produceQuery(v: V, escapeQuery: Boolean = true): Query[V] = {
+ unanalyzed match {
+ case true => Term(List(v),escapeQuery)
+ case false => Phrase(v,escapeQuery)
+ }
+ }
+
+ def produceGroupedQuery(v: Iterable[V], escapeQuery: Boolean = true): Query[V] = {
+ unanalyzed match {
+ // we don't want to groupWithOr and instead take advantage of "terms" queries
+ case true => Term(v, escapeQuery)
+ case false => groupWithOr(v.map({x: V => produceQuery(x,escapeQuery)}))
+ }
+ }
+
+ def eqs(v: V) = Clause[V](self.queryName, Group(produceQuery(v)))
+ def neqs(v: V) = Clause[V](self.queryName, produceQuery(v),false)
//With a boost
- def eqs(v: V, b: Float) = Clause[V](self.queryName, Boost(Group(Phrase(v)),b))
- def neqs(v: V, b:Float) = Clause[V](self.queryName, Boost(Phrase(v),b),false)
+ def eqs(v: V, b: Float) = Clause[V](self.queryName, Boost(Group(produceQuery(v)),b))
+ def neqs(v: V, b:Float) = Clause[V](self.queryName, Boost(produceQuery(v),b),false)
//This allows for bag of words style matching.
def contains(v: V) = Clause[V](self.queryName, Group(BagOfWords(v)))
def contains(v: V, b: Float) = Clause[V](self.queryName, Boost(Group(BagOfWords(v)),b))
-
//Search with explicit escaping. By normal we escape, set e to false to disable
//Note eqs and neqs results in phrase queries!
- def eqs(v: V, e: Boolean) = Clause[V](self.queryName, Group(Phrase(v,e)))
- def neqs(v: V, e: Boolean) = Clause[V](self.queryName, Phrase(v,e),false)
+ def eqs(v: V, e: Boolean) = Clause[V](self.queryName, Group(produceQuery(v,e)))
+ def neqs(v: V, e: Boolean) = Clause[V](self.queryName, produceQuery(v,e),false)
//With a boost
- def eqs(v: V, b: Float, e: Boolean) = Clause[V](self.queryName, Boost(Group(Phrase(v,e)),b))
- def neqs(v: V, b:Float, e: Boolean) = Clause[V](self.queryName, Boost(Phrase(v,e),b),false)
+ def eqs(v: V, b: Float, e: Boolean) = Clause[V](self.queryName, Boost(Group(produceQuery(v,e)),b))
+ def neqs(v: V, b:Float, e: Boolean) = Clause[V](self.queryName, Boost(produceQuery(v,e),b),false)
//This allows for bag of words style matching.
def contains(v: V, e: Boolean) = Clause[V](self.queryName, Group(BagOfWords(v,e)))
def contains(v: V, b: Float, e: Boolean) = Clause[V](self.queryName, Boost(Group(BagOfWords(v,e)),b))
- def in(v: Iterable[V]) = Clause[V](self.queryName, groupWithOr(v.map({x: V => Phrase(x)})))
- def nin(v: Iterable[V]) = Clause[V](self.queryName, groupWithOr(v.map({x: V => Phrase(x)})),false)
+ def in(v: Iterable[V]) = Clause[V](self.queryName, produceGroupedQuery(v))
+ def nin(v: Iterable[V]) = Clause[V](self.queryName, produceGroupedQuery(v),false)
- def in(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(groupWithOr(v.map({x: V => Phrase(x)})),b))
- def nin(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(groupWithOr(v.map({x: V => Phrase(x)})),b),false)
+ def in(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(produceGroupedQuery(v),b))
+ def nin(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(produceGroupedQuery(v),b),false)
def inRange(v1: V, v2: V) = Clause[V](self.queryName, Group(Range(BagOfWords(v1),BagOfWords(v2))))
def ninRange(v1: V, v2: V) = Clause[V](self.queryName, Group(Range(BagOfWords(v1),BagOfWords(v2))),false)
@@ -855,6 +885,17 @@ trait SlashemField[V, M <: Record[M]] extends OwnedField[M] {
//Slashem field types
class SlashemStringField[T <: Record[T]](owner: T) extends StringField[T](owner, 0) with SlashemField[String, T]
+/**
+ * Field type that can be queried without analyzing.
+ *
+ * Ex: multi-value field or a whitespace tokenized field where
+ * search terms are always for a specific token.
+ *
+ * @see SlashemStringField
+ */
+class SlashemUnanalyzedStringField[T <: Record[T]](owner: T)
+ extends StringField[T](owner, 0) with SlashemUnanalyzedField[String, T]
+
//Allows for querying against the default filed in solr. This field doesn't have a name
class SlashemDefaultStringField[T <: Record[T]](owner: T) extends StringField[T](owner, 0) with SlashemField[String, T] {
override def name = ""
@@ -963,7 +1004,7 @@ class SlashemPointField[T <: Record[T]](owner: T) extends PointField[T](owner) w
class SlashemBooleanField[T <: Record[T]](owner: T) extends BooleanField[T](owner) with SlashemField[Boolean, T]
class SlashemDateTimeField[T <: Record[T]](owner: T) extends JodaDateTimeField[T](owner) with SlashemField[DateTime, T]
//More restrictive type so we can access the geohash
-class SlashemGeoField[T <: SlashemSchema[T]](owner: T) extends StringField[T](owner,0) with SlashemField[String, T] {
+class SlashemGeoField[T <: SlashemSchema[T]](owner: T) extends SlashemUnanalyzedStringField[T](owner) {
def inRadius(geoLat: Double, geoLong: Double, radiusInMeters: Int, maxCells: Int = owner.geohash.maxCells) = {
val cellIds = owner.geohash.coverString(geoLat, geoLong, radiusInMeters, maxCells = maxCells)
//If we have an empty cover we default to everything.
View
36 src/test/scala/com/foursquare/slashem/ElasticQueryTest.scala
@@ -319,20 +319,26 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers {
def testListFieldIn {
val response1 = ESimplePanda where (_.favnums in List(2, 3, 4, 5)) fetch()
val response2 = ESimplePanda where (_.favnums in List(99)) fetch()
+ val response3 = ESimplePanda where (_.termsfield in List("termhit", "lol")) fetch()
Assert.assertEquals(response1.response.results.length, 2)
Assert.assertEquals(response2.response.results.length, 0)
+ Assert.assertEquals(response3.response.results.length, 1)
}
@Test
def testIntListFieldEmptyIn {
- val response = ESimplePanda where (_.favnums in List()) fetch()
- Assert.assertEquals(response.response.results.length, 0)
+ val response1 = ESimplePanda where (_.favnums in List()) fetch()
+ val response2 = ESimplePanda where (_.termsfield in List()) fetch()
+ Assert.assertEquals(response1.response.results.length, 0)
+ Assert.assertEquals(response2.response.results.length, 0)
}
@Test
def testIntListFieldEmptyNin {
- val response = ESimplePanda where (_.favnums nin List()) fetch()
- Assert.assertEquals(response.response.results.length, 8)
+ val response1 = ESimplePanda where (_.favnums nin List()) fetch()
+ val response2 = ESimplePanda where (_.termsfield nin List()) fetch()
+ Assert.assertEquals(response1.response.results.length, 8)
+ Assert.assertEquals(response2.response.results.length, 8)
}
@Test
@@ -350,6 +356,26 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers {
val ids2 = response2.response.oids
// All three docs with favnums should be returned, none contain 99
Assert.assertEquals(ids2.intersect(idsWithFavNums).length, 3)
+
+ val response3 = ESimplePanda where (_.termsfield nin List("termhit")) fetch()
+ val ids3 = response3.response.oids
+ // All three docs with favnums should be returned, none contain 99
+ Assert.assertEquals(ids3.intersect(idsWithFavNums).length, 2)
+ }
+
+ @Test
+ def testTermQueries {
+ val res1 = ESimplePanda where (_.termsfield eqs "termhit") fetch()
+ val res2 = ESimplePanda where (_.termsfield in List("randomterm", "termhit")) fetch()
+ Assert.assertEquals(res1.response.results.length, 1)
+ Assert.assertEquals(res2.response.results.length, 1)
+ }
+
+ @Test
+ def testTermFilters {
+ // grab 2 results, filter to 1
+ val res1 = ESimplePanda where (_.hugenums contains 1L) filter(_.termsfield in List("termhit", "randomterm")) fetch()
+ Assert.assertEquals(res1.response.results.length, 1)
}
@Before
@@ -400,6 +426,7 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers {
val favnums1 = List(1, 2, 3, 4, 5).asJava
val favnums2 = List(1, 2, 3, 4, 5).asJava
val favnums3 = List(6, 7, 8, 9, 10).asJava
+ val terms1 = List("termhit", "nohit").asJava
val nicknames1 = List("jerry", "dawg", "xzibit").asJava
val nicknames2 = List("xzibit", "alvin").asJava
val nicknames3 = List("alvin", "nathaniel", "joiner").asJava
@@ -413,6 +440,7 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers {
.field("favnums", favnums1)
.field("nicknames", nicknames1)
.field("hugenums", hugenums1)
+ .field("termsfield", terms1)
.endObject()
).execute()
.actionGet();
View
1 src/test/scala/com/foursquare/slashem/ElasticTest.scala
@@ -18,6 +18,7 @@ class ESimplePanda extends ElasticSchema[ESimplePanda] {
object favnums extends SlashemIntListField(this)
object nicknames extends SlashemStringListField(this)
object hugenums extends SlashemLongListField(this)
+ object termsfield extends SlashemUnanalyzedStringField(this)
}
object ESimpleGeoPanda extends ESimpleGeoPanda with ElasticMeta[ESimpleGeoPanda] {
View
6 src/test/scala/com/foursquare/slashem/QueryTest.scala
@@ -572,7 +572,7 @@ class QueryTest extends SpecsMatchers with ScalaCheckMatchers {
"qf" -> "text",
"qf" -> "ngram_name^0.2",
"qf" -> "tags^0.01",
- "fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\")",
+ "fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\" OR \"thisIsForFunctionalityTests\")",
"tieBreaker" -> "0.2",
"fl" -> "id,name,userid,mayorid,category_id_0,popularity,decayedPopularity1,lat,lng,checkin_info,score,hasSpecial,address,crossstreet,city,state,zip,country,checkinCount,partitionedPopularity",
"bq" -> "name:(holden's hobohut)^10.0",
@@ -619,7 +619,7 @@ class QueryTest extends SpecsMatchers with ScalaCheckMatchers {
"qf" -> "text",
"qf" -> "ngram_name^0.2",
"qf" -> "tags^0.01",
- "fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\")",
+ "fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\" OR \"thisIsForFunctionalityTests\")",
"tieBreaker" -> "0.2",
"fl" -> "id,name,userid,mayorid,category_id_0,popularity,decayedPopularity1,lat,lng,checkin_info,score,hasSpecial,address,crossstreet,city,state,zip,country,checkinCount,partitionedPopularity",
"bq" -> "name:(holden's hobohut)^10.0",
@@ -640,7 +640,7 @@ class QueryTest extends SpecsMatchers with ScalaCheckMatchers {
"q" -> "(DJ Hixxy)",
"start" -> "0",
"rows" -> "10",
- "fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\")")
+ "fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\" OR \"thisIsForFunctionalityTests\")")
Assert.assertEquals(Nil, ((qp.toSet &~ expected.toSet)).toList)
Assert.assertEquals(Nil, (expected.toSet &~ qp.toSet).toList)
}

0 comments on commit fb0f69b

Please sign in to comment.
Something went wrong with that request. Please try again.