Permalink
Browse files

Add some comments, slight code cleanup.

  • Loading branch information...
1 parent 2f555f2 commit 46e25722a9b8fc15eac1948e6295ccbc587f4b51 Holden Karau committed Aug 12, 2011
View
@@ -14,12 +14,18 @@ Use sbt (simple-build-tool) to build:
The finished jar will be in 'target/'.
## Examples
-[QueryTest.scala] contains sample queries and shows the corresponding solr query.
+[QueryTest.scala] contains sample queries and shows the corresponding query.
[SolrRogueTest.scala] countains some sample records.
## Dependencies
-lift, joda-time, junit, finagle. These dependencies are managed by the build system.
+lift, joda-time, junit, finagle, jackson. These dependencies are managed by
+the build system.
+
+## Warnings
+
+This is still a very early version. There are likely bugs (sorry!). Let us know
+if you find any. While we can't promise timely fixes, it will help :)
## Maintainers
@@ -76,7 +76,7 @@ object Ast {
def extend: String
def and(c: Query[T]): Query[T] = And(this, c)
def or(c: Query[T]): Query[T] = Or(this, c)
- def boost(b : Float): Query[T] = Boost(this,b)
+ def boost(b : Float): Query[T] = Boost(this, b)
}
case class Empty[T]() extends Query[T] {
@@ -41,27 +41,35 @@ case class QueryBuilder[M <: Record[M], Ord, Lim, MM <: minimumMatchType](
}
+ //Filter the result set. Filter queries can be run in parallel from the main query and
+ //have a separate cache. Filter queries are great for queries that are repeated often which
+ //you want to constrain your result set by.
def filter[F](f: M => Clause[F]): QueryBuilder[M, Ord, Lim, MM] = {
this.copy(filters=f(meta)::filters)
}
+ //A boostQuery affects the scoring of the results.
def boostQuery[F](f: M => Clause[F]): QueryBuilder[M, Ord, Lim, MM] = {
-
this.copy(boostQueries=f(meta) :: boostQueries)
}
+ //Where you want to start fetching results back from
def start(s: Int): QueryBuilder[M, Ord, Lim, MM] = {
this.copy(start=Some(s))
}
+ //Only fetch back l results
def limit(l: Int)(implicit ev: Lim =:= Unlimited): QueryBuilder[M, Ord, Limited, MM] = {
this.copy(limit=Some(l))
}
+ //In edismax the score is max({scores})+tieBreak*\sum{scores})
def tieBreaker(t: Double): QueryBuilder[M, Ord, Lim, MM] = {
this.copy(tieBreaker=Some(t))
}
+ //Right now we only support ordering by field
+ //TODO: Support ordering by function query
def orderAsc[F](f: M => SolrField[F, M])(implicit ev: Ord =:= Unordered): QueryBuilder[M, Ordered, Lim, MM] = {
QueryBuilder(meta, clauses, filters, boostQueries, queryFields, phraseBoostFields, boostFields, start, limit, tieBreaker, sort=Some(f(meta).name + " asc"), minimumMatch, queryType, fieldsToFetch)
}
@@ -70,32 +78,52 @@ case class QueryBuilder[M <: Record[M], Ord, Lim, MM <: minimumMatchType](
QueryBuilder(meta, clauses, filters, boostQueries, queryFields, phraseBoostFields, boostFields, start, limit, tieBreaker, sort=Some(f(meta).name + " desc"), minimumMatch, queryType, fieldsToFetch)
}
+ //If you doing a phrase search this the percent of terms that must match, rounded down
+ //So if you have it set to 50 and then do a search with 3 terms at least one term must match
+ //A search of 4 however would require 2 terms to match.
def minimumMatchPercent(percent: Int)(implicit ev: MM =:= defaultMM) : QueryBuilder[M, Ord, Lim, customMM] = {
this.copy(minimumMatch=Some(percent.toString+"%"))
}
+
+ //This is an absolute # of terms rather than a percent of the query terms to match
+ //Note: You must chose one or the other.
def minimumMatchAbsolute(count: Int)(implicit ev: MM =:= defaultMM) : QueryBuilder[M, Ord, Lim, customMM] = {
this.copy(minimumMatch=Some(count.toString))
}
+ //Set the query type. This corresponds to the "defType" field. Some sample values include "edismax" , "dismax"
+ //or just empty to use the default query type
def useQueryType(qt : String) : QueryBuilder[M, Ord, Lim, MM] ={
this.copy(queryType=Some(qt))
}
+ //Depending on the query type you set, you can specify different fields to be queried.
+ //This allows you to set a field and a boost.
+ //Fair warning: If you set this value, it may be ignored (it is by the default solr query parser)
def queryField[F](f : M => SolrField[F,M], boost: Double = 1): QueryBuilder[M, Ord, Lim, MM] ={
this.copy(queryFields=WeightedField(f(meta).name,boost)::queryFields)
}
+ //Same as above but takes a list of fields.
def queryFields(fs : List[M => SolrField[_,M]], boost: Double = 1): QueryBuilder[M, Ord, Lim, MM] ={
this.copy(queryFields=fs.map(f => WeightedField(f(meta).name,boost))++queryFields)
}
+ //Certain query parsers allow you to set a phraseBoost field. Generally these are only run on the returned
+ //documents. So if I want to return all documents matching either coffee or shop but I want documents
+ //with "coffee shop" to score higher I would set this.
+ //The params for pf,pf2,and pf3 control what type of phrase boost query to generate. In edismax pf2/pf3 results
+ //in a query which will match shingled phrase queries of length 2 & 3 respectively. For example pf2=true in edismax and
+ //a query of "delicious coffee shops" would boost documents containing "delicious coffee" and "coffee shops".
def phraseBoost[F](f : M => SolrField[F,M], boost: Double = 1, pf: Boolean = true, pf2: Boolean = true, pf3: Boolean = true): QueryBuilder[M, Ord, Lim, MM] ={
this.copy(phraseBoostFields=PhraseWeightedField(f(meta).name,boost,pf,pf2,pf3)::phraseBoostFields)
}
+ //Specify a field to be retrieved. If you want to get back all fields you can use a field of name "*"
def fetchField[F](f : M => SolrField[F,M]): QueryBuilder[M, Ord, Lim, MM] = {
this.copy(fieldsToFetch=f(meta).name::fieldsToFetch)
}
+ //Same as above but takes multiple fields
def fetchFields(fs : (M => SolrField[_,M])*): QueryBuilder[M, Ord, Lim, MM] = {
this.copy(fieldsToFetch=fs.map(f=> f(meta).name).toList++fieldsToFetch)
}
@@ -108,6 +136,7 @@ case class QueryBuilder[M <: Record[M], Ord, Lim, MM <: minimumMatchType](
this.copy(boostFields=(f(meta).name+"^"+boost)::boostFields)
}
+ //Print out some debugging information.
def test(): Unit = {
println("clauses: " + clauses.extend)
println("filters: " + filters.map(_.extend).mkString)
@@ -126,6 +155,7 @@ case class QueryBuilder[M <: Record[M], Ord, Lim, MM <: minimumMatchType](
bounds ++ queryParamsNoBounds()
}
+ //This is the part which generates most of the solr request
def queryParamsNoBounds(): Seq[(String,String)] = {
val p = List(("q" -> clauses.extend))
@@ -166,9 +196,12 @@ case class QueryBuilder[M <: Record[M], Ord, Lim, MM <: minimumMatchType](
t ++ mm ++ qt ++ bq ++ qf ++ p ++ s ++ f ++ pf ++ fl ++ bf
}
+ //Fetch the results with the limit of l
def fetch(l: Int)(implicit ev: Lim =:= Unlimited): SearchResults[M] = {
this.limit(l).fetch
}
+
+ //fetch the results
def fetch(): SearchResults[M] = {
// Gross++
meta.query(queryParams,fieldsToFetch)
@@ -184,7 +217,7 @@ case class QueryBuilder[M <: Record[M], Ord, Lim, MM <: minimumMatchType](
val maxResults = firstQuery.response.numFound - firstQuery.response.start
val rowsToGet : Long = maxRowsToGet.map(scala.math.min(_,maxResults)) getOrElse maxResults
// Now make rowsToGet/batchSizes calls to meta.query
- //Note the 1 is not a typo
+ //Note the 1 is not a typo since we have already fetched the first page.
f(firstQuery)++(1 to scala.math.ceil(rowsToGet*1.0/batchSize).toInt).flatMap{i =>
// cannot simply override this.start as it is a val, so removing/adding on queryParams
val starti = startPos + (i*batchSize)
@@ -34,7 +34,6 @@ case class Response[T <: Record[T]] (schema: T, numFound: Int, start: Int, docs:
def results[T <: Record[T]](B: Record[T]): List[T] = {
docs.map({doc => val q = B.meta.createRecord
doc.foreach({a =>
-
val fname = a._1
val value = a._2
q.fieldByName(fname).map(_.setFromAny(value))})
@@ -95,6 +94,7 @@ trait SolrMeta[T <: Record[T]] extends MetaRecord[T] {
}
def extractFromResponse(r: String, fieldstofetch: List[String]=Nil): SearchResults[T] = {
+ //This intentional avoids lift extract as it is too slow for our use case.
logger.log(solrName + ".jsonExtract", "extacting json") {
val rsr = try {
mapper.readValue(r, classOf[RawSearchResults])
@@ -17,8 +17,10 @@ import org.specs.matcher.ScalaCheckMatchers
class ParseTest extends SpecsMatchers with ScalaCheckMatchers {
+ //This is the test for the extraction code.
@Test
def testParseVenueFields {
+ //Here is some json returned from solr.
val r = """{
"responseHeader":{
"status":0,
@@ -499,7 +499,6 @@ class QueryTest extends SpecsMatchers with ScalaCheckMatchers {
@Test
def sortwithPopularandAll {
- val lols="holden's hobohut"
val geoLat = 37.7519528215759
val geoLong = -122.42086887359619
val q = SVenueTest where (_.metall any) useQueryType("edismax") orderDesc(_.decayedPopularity1)

0 comments on commit 46e2572

Please sign in to comment.