Skip to content

Commit

Permalink
Merge pull request #40 from foursquare/custom-scoring
Browse files Browse the repository at this point in the history
Custom scoring support
  • Loading branch information
holdenk committed Jun 1, 2012
2 parents 3bc3b48 + 38290e9 commit 95c71e7
Show file tree
Hide file tree
Showing 7 changed files with 239 additions and 129 deletions.
180 changes: 97 additions & 83 deletions src/main/scala/com/foursquare/slashem/QueryBuilder.scala

Large diffs are not rendered by default.

85 changes: 54 additions & 31 deletions src/main/scala/com/foursquare/slashem/Schema.scala
Expand Up @@ -33,7 +33,6 @@ import org.elasticsearch.index.query.{AndFilterBuilder, CustomScoreQueryBuilder,
QueryBuilder => ElasticQueryBuilder}
import org.elasticsearch.node.Node
import org.elasticsearch.node.NodeBuilder._

import org.elasticsearch.search.facet.AbstractFacetBuilder
import org.elasticsearch.search.facet.terms.TermsFacetBuilder
import org.elasticsearch.search.facet.terms.strings.InternalStringTermsFacet
Expand Down Expand Up @@ -460,30 +459,30 @@ trait SlashemSchema[M <: Record[M]] extends Record[M] {
})
}


def where[F](c: M => Clause[F]): QueryBuilder[M, Unordered, Unlimited, defaultMM, NoSelect, NoHighlighting, NoQualityFilter, NoMinimumFacetCount, Unlimited] = {
def where[F](c: M => Clause[F]): QueryBuilder[M, Unordered, Unlimited, defaultMM, NoSelect, NoHighlighting, NoQualityFilter, NoMinimumFacetCount, Unlimited, NoScoreModifiers] = {
QueryBuilder(self, c(self), filters=Nil, boostQueries=Nil, queryFields=Nil,
phraseBoostFields=Nil, boostFields=Nil, start=None, limit=None,
tieBreaker=None, sort=None, minimumMatch=None ,queryType=None,
fieldsToFetch=Nil, facetSettings=FacetSettings(facetFieldList=Nil,
facetMinCount=None,
facetLimit=None),
hls=None, hlFragSize=None, creator=None, comment=None, fallOf=None, min=None)
customScoreScript=None, hls=None, hlFragSize=None, creator=None,
comment=None, fallOf=None, min=None)
}
def query[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim](timeout: Duration, qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim]): SearchResults[M, Y]
def queryFuture[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim](qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim]): Future[SearchResults[M, Y]]
def query[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim, ST <: ScoreType](timeout: Duration, qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim, ST]): SearchResults[M, Y]
def queryFuture[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim, ST <: ScoreType](qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim, ST]): Future[SearchResults[M, Y]]
}
trait ElasticSchema[M <: Record[M]] extends SlashemSchema[M] {
self: M with SlashemSchema[M] =>

def meta: ElasticMeta[M]

def query[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim](timeout: Duration, qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim]):
def query[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim, ST <: ScoreType](timeout: Duration, qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim, ST]):
SearchResults[M, Y] = {
queryFuture(qb, Some(timeout))(timeout)
}

def queryFuture[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim](qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim]):
def queryFuture[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim, ST <: ScoreType](qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim, ST]):
Future[SearchResults[M, Y]] = {
elasticQueryFuture(qb, buildElasticQuery(qb), None)
}
Expand All @@ -492,12 +491,12 @@ trait ElasticSchema[M <: Record[M]] extends SlashemSchema[M] {
* @qb: The query builder representing the query to be executed
* @timeoutOpt: An option type that requests a server side timeout for the query
*/
def queryFuture[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim](qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim], timeoutOpt: Option[Duration]):
def queryFuture[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim, ST <: ScoreType](qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim, ST], timeoutOpt: Option[Duration]):
Future[SearchResults[M, Y]] = {
elasticQueryFuture(qb, buildElasticQuery(qb), timeoutOpt)
}

def elasticQueryFuture[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim](qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim], query: ElasticQueryBuilder, timeoutOpt: Option[Duration]): Future[SearchResults[M, Y]] = {
def elasticQueryFuture[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim, ST <: ScoreType](qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim, ST], query: ElasticQueryBuilder, timeoutOpt: Option[Duration]): Future[SearchResults[M, Y]] = {
val esfp = meta.executorServiceFuturePool

val searchResultsFuture = esfp {
Expand Down Expand Up @@ -614,7 +613,8 @@ trait ElasticSchema[M <: Record[M]] extends SlashemSchema[M] {
Response(this, creator, hitCount, start, docs,
fallOff=fallOff, min=min, fieldFacet))
}
def buildElasticQuery[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim](qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim]): ElasticQueryBuilder = {

def buildElasticQuery[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim, ST <: ScoreType](qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim, ST]): ElasticQueryBuilder = {
val baseQuery: ElasticQueryBuilder= qb.clauses.elasticExtend(qb.queryFields,
qb.phraseBoostFields,
qb.minimumMatch)
Expand All @@ -624,12 +624,16 @@ trait ElasticSchema[M <: Record[M]] extends SlashemSchema[M] {
case _ => filteredQuery(baseQuery,combineFilters(qb.filters.map(_.elasticFilter(qb.queryFields))))
}
//Apply any custom scoring rules (aka emulating Solr's bq/bf)
val boostedQuery = qb.boostFields match {
case Nil => fq
case _ => boostFields(fq, qb.boostFields)
val scoredQuery = qb.boostFields match {
case Nil => qb.customScoreScript match {
case Some((script, params)) => scoreWithScript(fq, script, params)
case None => fq
}
case _ => scoreFields(fq, qb.boostFields)
}
boostedQuery
scoredQuery
}

def termFacetQuery(facetFields: List[Ast.Field], facetLimit: Option[Int]): List[AbstractFacetBuilder] = {
val fieldNames = facetFields.map(_.boost())
val facetQueries = fieldNames.map(name => {
Expand All @@ -640,43 +644,62 @@ trait ElasticSchema[M <: Record[M]] extends SlashemSchema[M] {
}
case _ => q
}
}
)
})
facetQueries
}
def boostFields(query: ElasticQueryBuilder, boostFields: List[ScoreBoost]): ElasticQueryBuilder = {
val boostedQuery = new CustomScoreQueryBuilder(query)
val boostedQuerys = boostFields.map(_.elasticBoost)
val params = boostedQuerys.flatMap(_._1)
val scriptSrc = boostedQuerys.map(_._2).mkString(" + ")

/**
* Custom score the fields which have scoreboosts
*/
def scoreFields(query: ElasticQueryBuilder, fieldsToScore: List[ScoreBoost]): ElasticQueryBuilder = {
val scoredFields = fieldsToScore.map(_.elasticBoost)
val params = scoredFields.flatMap(_._1)
val scriptSrc = scoredFields.map(_._2).mkString(" + ")
val paramNames = (1 to params.length).map("p"+_)
val script = scriptSrc.format(paramNames:_*)
val keyedParams = paramNames zip params
keyedParams.foreach(p => {boostedQuery.param(p._1,p._2)})
val namesAndParams = paramNames.zip(params).toMap
val scoreScript = "_score * (1 +"+ script + " )"
boostedQuery.script(scoreScript)
scoreWithScript(query, scoreScript, namesAndParams, false)
}

/**
* Add the provided script and its params to the query and build a
* CustomScoreQuery with it.
*/
def scoreWithScript(query: ElasticQueryBuilder, script: String,
namesAndParams: Map[String, Any], native: Boolean = true): ElasticQueryBuilder = {
val customScoreQuery = new CustomScoreQueryBuilder(query).script(script)
native match {
case true => customScoreQuery.lang("native")
case false => customScoreQuery.lang("mvel")
}
for ((name, param) <- namesAndParams) {
customScoreQuery.param(name, param)
}
customScoreQuery
}

def combineFilters(filters: List[ElasticFilterBuilder]): ElasticFilterBuilder = {
new AndFilterBuilder(filters:_*)
}

}

trait SolrSchema[M <: Record[M]] extends SlashemSchema[M] {
self: M with SlashemSchema[M] =>

def meta: SolrMeta[M]
// 'Where' is the entry method for a SolrRogue query.

def queryParams[Ord, Lim, MM <: MinimumMatchType, Select, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim](qb: QueryBuilder[M, Ord, Lim, MM, Select, H, Q, FC, FLim]): Seq[(String, String)] = queryParamsWithBounds(qb,qb.start, qb.limit)
def queryParams[Ord, Lim, MM <: MinimumMatchType, Select, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim, ST <: ScoreType](qb: QueryBuilder[M, Ord, Lim, MM, Select, H, Q, FC, FLim, ST]): Seq[(String, String)] = queryParamsWithBounds(qb,qb.start, qb.limit)

def queryParamsWithBounds[Ord, Lim, MM <: MinimumMatchType, Select, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim](qb: QueryBuilder[M, Ord, Lim, MM, Select, H, Q, FC, FLim], qstart: Option[Long], qrows: Option[Long]): Seq[(String,String)] = {
def queryParamsWithBounds[Ord, Lim, MM <: MinimumMatchType, Select, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim, ST <: ScoreType](qb: QueryBuilder[M, Ord, Lim, MM, Select, H, Q, FC, FLim, ST], qstart: Option[Long], qrows: Option[Long]): Seq[(String,String)] = {
val bounds = List(("start" -> (qstart.getOrElse {qb.DefaultStart}).toString),
("rows" -> (qrows.getOrElse {qb.DefaultLimit}).toString))
bounds ++ queryParamsNoBounds(qb)
}

//This is the part which generates most of the solr request
def queryParamsNoBounds[Ord, Lim, MM <: MinimumMatchType, Select, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim](qb: QueryBuilder[M, Ord, Lim, MM, Select, H, Q, FC, FLim]): Seq[(String,String)] = {
def queryParamsNoBounds[Ord, Lim, MM <: MinimumMatchType, Select, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim, ST <: ScoreType](qb: QueryBuilder[M, Ord, Lim, MM, Select, H, Q, FC, FLim, ST]): Seq[(String,String)] = {

//The actual query
val p = List(("q" -> qb.clauses.extend))
Expand Down Expand Up @@ -752,12 +775,12 @@ trait SolrSchema[M <: Record[M]] extends SlashemSchema[M] {
}


def query[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim](timeout: Duration, qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim]):
def query[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim, ST <: ScoreType](timeout: Duration, qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim, ST]):
SearchResults[M, Y] = {
queryFuture(qb)(timeout)
}

def queryFuture[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim](qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim]):
def queryFuture[Ord, Lim, MM <: MinimumMatchType, Y, H <: Highlighting, Q <: QualityFilter, FC <: FacetCount, FLim, ST <: ScoreType](qb: QueryBuilder[M, Ord, Lim, MM, Y, H, Q, FC, FLim, ST]):
Future[SearchResults[M, Y]] = {
solrQueryFuture(qb.creator, queryParams(qb), qb.fieldsToFetch, qb.fallOf, qb.min)
}
Expand Down
1 change: 1 addition & 0 deletions src/test/resources/es-plugin.properties
@@ -0,0 +1 @@
plugin=com.foursquare.elasticsearch.scorer.FourSquareScorePlugin
@@ -0,0 +1,17 @@
package com.foursquare.elasticsearch.scorer;

import org.elasticsearch.plugins.AbstractPlugin;
import org.elasticsearch.script.ScriptModule;

/**
* Provides a fast* score script for our primary use case
*/
class FourSquareScorePlugin extends AbstractPlugin {
override def name(): String = "foursquare";

override def description(): String = "foursquare plugin";

def onModule(module: ScriptModule): Unit = {
module.registerScript("distance_score_magic", classOf[ScoreFactory]);
}
}
@@ -0,0 +1,42 @@
package com.foursquare.elasticsearch.scorer;

import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.field.data.NumericDocFieldData;
import org.elasticsearch.index.mapper.geo.GeoPointDocFieldData;
import org.elasticsearch.script.AbstractFloatSearchScript;
import org.elasticsearch.script.ExecutableScript;
import org.elasticsearch.script.NativeScriptFactory;
import org.elasticsearch.search.lookup.DocLookup;

import java.util.Map;

/**
* Note: assumes that the point field is point
*/
case class CombinedDistanceDocumentScorerSearchScript(val lat: Double,
val lon: Double,
val weight1: Float,
val weight2: Float) extends AbstractFloatSearchScript {

override def runAsFloat(): Float = {
val myDoc: DocLookup = doc();
val point: GeoPointDocFieldData = myDoc.get("point").asInstanceOf[GeoPointDocFieldData];
val popularity: Double = myDoc.numeric("decayedPopularity1").asInstanceOf[NumericDocFieldData[_]].getDoubleValue()
// up to you to remove score from here or not..., also, possibly, add more weights options
val myScore: Float = (score() *
(1 + weight1 * math.pow(((1.0 * (math.pow(point.distanceInKm(lat, lon), 2.0))) + 1.0), -1.0)
+ popularity * weight2)).toFloat;
myScore
}
}

class ScoreFactory extends NativeScriptFactory {
def newScript(@Nullable params: Map[String, Object]): ExecutableScript = {
val lat: Double = if (params == null) 1 else XContentMapValues.nodeDoubleValue(params.get("lat"), 0);
val lon: Double = if (params == null) 1 else XContentMapValues.nodeDoubleValue(params.get("lon"), 0);
val weight1: Float = if(params == null) 1 else XContentMapValues.nodeFloatValue(params.get("weight1"), 5000.0f);
val weight2: Float = if(params == null) 1 else XContentMapValues.nodeFloatValue(params.get("weight2"), 0.05f);
return new CombinedDistanceDocumentScorerSearchScript(lat, lon, weight1, weight2);
}
}

0 comments on commit 95c71e7

Please sign in to comment.