Skip to content

Commit

Permalink
Merge remote-tracking branch '4sq/master'
Browse files Browse the repository at this point in the history
Conflicts:
	src/main/scala/com/foursquare/slashem/Schema.scala
  • Loading branch information
Holden Karau committed May 14, 2012
2 parents 9f9f78f + a71fd7d commit fb0f69b
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 26 deletions.
46 changes: 44 additions & 2 deletions src/main/scala/com/foursquare/slashem/Ast.scala
Expand Up @@ -419,6 +419,48 @@ object Ast {
}
}

/**
* A term query. Used for queries that don't need to be analyzed
*
* By default, elasticFilter() will always be cached!
*/
case class Term[T](query: Iterable[T], escapeQuery: Boolean = true, cached: Boolean = true) extends Query[T] {
/** @inheritdoc */
//def extend() = throw new UnimplementedException("Slashem does not support Term queries Solr")
def extend(): String = {
escapeQuery match {
// hack to fix wrapping the queries in a List()
case true => {
val queries = query.map(q => {'"' + escape(q.toString) + '"'})
queries.mkString(" OR ")
}
case false => '"' + query.mkString(" OR ") + '"'
}
}
/** @inheritdoc */
def elasticExtend(qf: List[WeightedField], pf: List[PhraseWeightedField], mm: Option[String]): ElasticQueryBuilder = {
val fieldName = qf.head.fieldName
val weight = qf.head.weight.toFloat
query match {
case term::Nil => EQueryBuilders.termQuery(fieldName, term).boost(weight)
case terms => {
val moarTerms = terms.toSeq.map(_.toString)
EQueryBuilders.termsQuery(fieldName, moarTerms: _*).boost(weight)
}
}
}
/** @inheritdoc */
override def elasticFilter(qf: List[WeightedField]): ElasticFilterBuilder = {
val fieldName = qf.head.fieldName
query match {
case term::Nil => EFilterBuilders.termFilter(fieldName, term).cache(cached)
case terms => {
val moarTerms = terms.toSeq.map(_.toString)
EFilterBuilders.termsFilter(fieldName, moarTerms: _*).cache(cached)
}
}
}
}

case class Range[T](q1: Query[T],q2: Query[T]) extends Query[T] {
/** @inheritdoc */
Expand Down Expand Up @@ -505,7 +547,7 @@ object Ast {
}

/**
* Class representing clauses ANDed together
* Class representing queries ANDed together
*/
case class And[T](queries: Query[T]*) extends Query[T] {
/** @inheritdoc */
Expand All @@ -525,7 +567,7 @@ object Ast {
}
}
/**
* Case class representing a list of clauses ORed together
* Case class representing a list of queries ORed together
*/
case class Or[T](queries: Query[T]*) extends Query[T] {
/** @inheritdoc */
Expand Down
75 changes: 58 additions & 17 deletions src/main/scala/com/foursquare/slashem/Schema.scala
Expand Up @@ -55,6 +55,12 @@ case class SolrResponseException(code: Int, reason: String, solrName: String, qu
}
}

case class UnimplementedException(reason: String) extends RuntimeException {
override def getMessage(): String = {
"Not implemented: %s".format(reason)
}
}

/** The response header. There are normally more fields in the response header we could extract, but
* we don't at present. */
case class ResponseHeader @JsonCreator()(@JsonProperty("status")status: Int, @JsonProperty("QTime")QTime: Int)
Expand Down Expand Up @@ -433,8 +439,8 @@ trait SolrGeoHash {
}
//Default geohash, does nothing.
object NoopSolrGeoHash extends SolrGeoHash {
def coverString (geoLat: Double, geoLong: Double, radiusInMeters: Int, maxCells: Int ): Seq[String] = List("pleaseUseaRealGeoHash")
def rectCoverString(topRight: (Double, Double), bottomLeft: (Double, Double), maxCells: Int = 0, minLevel: Int = 0, maxLevel: Int = 0): Seq[String] = List("pleaseUseaRealGeoHash")
def coverString (geoLat: Double, geoLong: Double, radiusInMeters: Int, maxCells: Int ): Seq[String] = List("pleaseUseaRealGeoHash", "thisIsForFunctionalityTests")
def rectCoverString(topRight: (Double, Double), bottomLeft: (Double, Double), maxCells: Int = 0, minLevel: Int = 0, maxLevel: Int = 0): Seq[String] = List("pleaseUseaRealGeoHash", "thisIsForFunctionalityTests")
}

trait SlashemSchema[M <: Record[M]] extends Record[M] {
Expand Down Expand Up @@ -782,40 +788,64 @@ trait SolrSchema[M <: Record[M]] extends SlashemSchema[M] {

}

/**
* A field type for unanalyzed queries. Results in using Term[V] queries.
*/
trait SlashemUnanalyzedField[V, M <: Record[M]] extends SlashemField[V, M] {
self: Field[V, M] =>

override val unanalyzed = true
}

trait SlashemField[V, M <: Record[M]] extends OwnedField[M] {
self: Field[V, M] =>
import Helpers._

//Note eqs and neqs results in phrase queries!
def eqs(v: V) = Clause[V](self.queryName, Group(Phrase(v)))
def neqs(v: V) = Clause[V](self.queryName, Phrase(v),false)
// Override this value to produce unanalyzed queries!
val unanalyzed = false

def produceQuery(v: V, escapeQuery: Boolean = true): Query[V] = {
unanalyzed match {
case true => Term(List(v),escapeQuery)
case false => Phrase(v,escapeQuery)
}
}

def produceGroupedQuery(v: Iterable[V], escapeQuery: Boolean = true): Query[V] = {
unanalyzed match {
// we don't want to groupWithOr and instead take advantage of "terms" queries
case true => Term(v, escapeQuery)
case false => groupWithOr(v.map({x: V => produceQuery(x,escapeQuery)}))
}
}

def eqs(v: V) = Clause[V](self.queryName, Group(produceQuery(v)))
def neqs(v: V) = Clause[V](self.queryName, produceQuery(v),false)
//With a boost
def eqs(v: V, b: Float) = Clause[V](self.queryName, Boost(Group(Phrase(v)),b))
def neqs(v: V, b:Float) = Clause[V](self.queryName, Boost(Phrase(v),b),false)
def eqs(v: V, b: Float) = Clause[V](self.queryName, Boost(Group(produceQuery(v)),b))
def neqs(v: V, b:Float) = Clause[V](self.queryName, Boost(produceQuery(v),b),false)

//This allows for bag of words style matching.
def contains(v: V) = Clause[V](self.queryName, Group(BagOfWords(v)))
def contains(v: V, b: Float) = Clause[V](self.queryName, Boost(Group(BagOfWords(v)),b))


//Search with explicit escaping. By normal we escape, set e to false to disable
//Note eqs and neqs results in phrase queries!
def eqs(v: V, e: Boolean) = Clause[V](self.queryName, Group(Phrase(v,e)))
def neqs(v: V, e: Boolean) = Clause[V](self.queryName, Phrase(v,e),false)
def eqs(v: V, e: Boolean) = Clause[V](self.queryName, Group(produceQuery(v,e)))
def neqs(v: V, e: Boolean) = Clause[V](self.queryName, produceQuery(v,e),false)
//With a boost
def eqs(v: V, b: Float, e: Boolean) = Clause[V](self.queryName, Boost(Group(Phrase(v,e)),b))
def neqs(v: V, b:Float, e: Boolean) = Clause[V](self.queryName, Boost(Phrase(v,e),b),false)
def eqs(v: V, b: Float, e: Boolean) = Clause[V](self.queryName, Boost(Group(produceQuery(v,e)),b))
def neqs(v: V, b:Float, e: Boolean) = Clause[V](self.queryName, Boost(produceQuery(v,e),b),false)
//This allows for bag of words style matching.
def contains(v: V, e: Boolean) = Clause[V](self.queryName, Group(BagOfWords(v,e)))
def contains(v: V, b: Float, e: Boolean) = Clause[V](self.queryName, Boost(Group(BagOfWords(v,e)),b))


def in(v: Iterable[V]) = Clause[V](self.queryName, groupWithOr(v.map({x: V => Phrase(x)})))
def nin(v: Iterable[V]) = Clause[V](self.queryName, groupWithOr(v.map({x: V => Phrase(x)})),false)
def in(v: Iterable[V]) = Clause[V](self.queryName, produceGroupedQuery(v))
def nin(v: Iterable[V]) = Clause[V](self.queryName, produceGroupedQuery(v),false)

def in(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(groupWithOr(v.map({x: V => Phrase(x)})),b))
def nin(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(groupWithOr(v.map({x: V => Phrase(x)})),b),false)
def in(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(produceGroupedQuery(v),b))
def nin(v: Iterable[V], b: Float) = Clause[V](self.queryName, Boost(produceGroupedQuery(v),b),false)

def inRange(v1: V, v2: V) = Clause[V](self.queryName, Group(Range(BagOfWords(v1),BagOfWords(v2))))
def ninRange(v1: V, v2: V) = Clause[V](self.queryName, Group(Range(BagOfWords(v1),BagOfWords(v2))),false)
Expand Down Expand Up @@ -855,6 +885,17 @@ trait SlashemField[V, M <: Record[M]] extends OwnedField[M] {

//Slashem field types
class SlashemStringField[T <: Record[T]](owner: T) extends StringField[T](owner, 0) with SlashemField[String, T]
/**
* Field type that can be queried without analyzing.
*
* Ex: multi-value field or a whitespace tokenized field where
* search terms are always for a specific token.
*
* @see SlashemStringField
*/
class SlashemUnanalyzedStringField[T <: Record[T]](owner: T)
extends StringField[T](owner, 0) with SlashemUnanalyzedField[String, T]

//Allows for querying against the default filed in solr. This field doesn't have a name
class SlashemDefaultStringField[T <: Record[T]](owner: T) extends StringField[T](owner, 0) with SlashemField[String, T] {
override def name = ""
Expand Down Expand Up @@ -963,7 +1004,7 @@ class SlashemPointField[T <: Record[T]](owner: T) extends PointField[T](owner) w
class SlashemBooleanField[T <: Record[T]](owner: T) extends BooleanField[T](owner) with SlashemField[Boolean, T]
class SlashemDateTimeField[T <: Record[T]](owner: T) extends JodaDateTimeField[T](owner) with SlashemField[DateTime, T]
//More restrictive type so we can access the geohash
class SlashemGeoField[T <: SlashemSchema[T]](owner: T) extends StringField[T](owner,0) with SlashemField[String, T] {
class SlashemGeoField[T <: SlashemSchema[T]](owner: T) extends SlashemUnanalyzedStringField[T](owner) {
def inRadius(geoLat: Double, geoLong: Double, radiusInMeters: Int, maxCells: Int = owner.geohash.maxCells) = {
val cellIds = owner.geohash.coverString(geoLat, geoLong, radiusInMeters, maxCells = maxCells)
//If we have an empty cover we default to everything.
Expand Down
36 changes: 32 additions & 4 deletions src/test/scala/com/foursquare/slashem/ElasticQueryTest.scala
Expand Up @@ -319,20 +319,26 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers {
def testListFieldIn {
val response1 = ESimplePanda where (_.favnums in List(2, 3, 4, 5)) fetch()
val response2 = ESimplePanda where (_.favnums in List(99)) fetch()
val response3 = ESimplePanda where (_.termsfield in List("termhit", "lol")) fetch()
Assert.assertEquals(response1.response.results.length, 2)
Assert.assertEquals(response2.response.results.length, 0)
Assert.assertEquals(response3.response.results.length, 1)
}

@Test
def testIntListFieldEmptyIn {
val response = ESimplePanda where (_.favnums in List()) fetch()
Assert.assertEquals(response.response.results.length, 0)
val response1 = ESimplePanda where (_.favnums in List()) fetch()
val response2 = ESimplePanda where (_.termsfield in List()) fetch()
Assert.assertEquals(response1.response.results.length, 0)
Assert.assertEquals(response2.response.results.length, 0)
}

@Test
def testIntListFieldEmptyNin {
val response = ESimplePanda where (_.favnums nin List()) fetch()
Assert.assertEquals(response.response.results.length, 8)
val response1 = ESimplePanda where (_.favnums nin List()) fetch()
val response2 = ESimplePanda where (_.termsfield nin List()) fetch()
Assert.assertEquals(response1.response.results.length, 8)
Assert.assertEquals(response2.response.results.length, 8)
}

@Test
Expand All @@ -350,6 +356,26 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers {
val ids2 = response2.response.oids
// All three docs with favnums should be returned, none contain 99
Assert.assertEquals(ids2.intersect(idsWithFavNums).length, 3)

val response3 = ESimplePanda where (_.termsfield nin List("termhit")) fetch()
val ids3 = response3.response.oids
// All three docs with favnums should be returned, none contain 99
Assert.assertEquals(ids3.intersect(idsWithFavNums).length, 2)
}

@Test
def testTermQueries {
val res1 = ESimplePanda where (_.termsfield eqs "termhit") fetch()
val res2 = ESimplePanda where (_.termsfield in List("randomterm", "termhit")) fetch()
Assert.assertEquals(res1.response.results.length, 1)
Assert.assertEquals(res2.response.results.length, 1)
}

@Test
def testTermFilters {
// grab 2 results, filter to 1
val res1 = ESimplePanda where (_.hugenums contains 1L) filter(_.termsfield in List("termhit", "randomterm")) fetch()
Assert.assertEquals(res1.response.results.length, 1)
}

@Before
Expand Down Expand Up @@ -400,6 +426,7 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers {
val favnums1 = List(1, 2, 3, 4, 5).asJava
val favnums2 = List(1, 2, 3, 4, 5).asJava
val favnums3 = List(6, 7, 8, 9, 10).asJava
val terms1 = List("termhit", "nohit").asJava
val nicknames1 = List("jerry", "dawg", "xzibit").asJava
val nicknames2 = List("xzibit", "alvin").asJava
val nicknames3 = List("alvin", "nathaniel", "joiner").asJava
Expand All @@ -413,6 +440,7 @@ class ElasticQueryTest extends SpecsMatchers with ScalaCheckMatchers {
.field("favnums", favnums1)
.field("nicknames", nicknames1)
.field("hugenums", hugenums1)
.field("termsfield", terms1)
.endObject()
).execute()
.actionGet();
Expand Down
1 change: 1 addition & 0 deletions src/test/scala/com/foursquare/slashem/ElasticTest.scala
Expand Up @@ -18,6 +18,7 @@ class ESimplePanda extends ElasticSchema[ESimplePanda] {
object favnums extends SlashemIntListField(this)
object nicknames extends SlashemStringListField(this)
object hugenums extends SlashemLongListField(this)
object termsfield extends SlashemUnanalyzedStringField(this)
}

object ESimpleGeoPanda extends ESimpleGeoPanda with ElasticMeta[ESimpleGeoPanda] {
Expand Down
6 changes: 3 additions & 3 deletions src/test/scala/com/foursquare/slashem/QueryTest.scala
Expand Up @@ -572,7 +572,7 @@ class QueryTest extends SpecsMatchers with ScalaCheckMatchers {
"qf" -> "text",
"qf" -> "ngram_name^0.2",
"qf" -> "tags^0.01",
"fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\")",
"fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\" OR \"thisIsForFunctionalityTests\")",
"tieBreaker" -> "0.2",
"fl" -> "id,name,userid,mayorid,category_id_0,popularity,decayedPopularity1,lat,lng,checkin_info,score,hasSpecial,address,crossstreet,city,state,zip,country,checkinCount,partitionedPopularity",
"bq" -> "name:(holden's hobohut)^10.0",
Expand Down Expand Up @@ -619,7 +619,7 @@ class QueryTest extends SpecsMatchers with ScalaCheckMatchers {
"qf" -> "text",
"qf" -> "ngram_name^0.2",
"qf" -> "tags^0.01",
"fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\")",
"fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\" OR \"thisIsForFunctionalityTests\")",
"tieBreaker" -> "0.2",
"fl" -> "id,name,userid,mayorid,category_id_0,popularity,decayedPopularity1,lat,lng,checkin_info,score,hasSpecial,address,crossstreet,city,state,zip,country,checkinCount,partitionedPopularity",
"bq" -> "name:(holden's hobohut)^10.0",
Expand All @@ -640,7 +640,7 @@ class QueryTest extends SpecsMatchers with ScalaCheckMatchers {
"q" -> "(DJ Hixxy)",
"start" -> "0",
"rows" -> "10",
"fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\")")
"fq" -> "geo_s2_cell_ids:(\"pleaseUseaRealGeoHash\" OR \"thisIsForFunctionalityTests\")")
Assert.assertEquals(Nil, ((qp.toSet &~ expected.toSet)).toList)
Assert.assertEquals(Nil, (expected.toSet &~ qp.toSet).toList)
}
Expand Down

0 comments on commit fb0f69b

Please sign in to comment.