Support custom score queries; add __str__ method

erikrose · Jul 6, 2011 · 37d11dc · 37d11dc
1 parent 2f11b71
commit 37d11dc
Show file tree

Hide file tree

Showing 3 changed files with 68 additions and 12 deletions.
diff --git a/docs/queries.rst b/docs/queries.rst
@@ -41,11 +41,11 @@ will do a query for "taco trucks" that are
 
 This is however equivalent to the more succinct::
 
-S('taco trucks', style='korean', price='FREE')
+    S('taco trucks', style='korean', price='FREE')
 
 
-Complicate Filtering
-~~~~~~~~~~~~~~~~~~~~
+Complicated Filtering
+~~~~~~~~~~~~~~~~~~~~~
 
 Sometimes you want something complicated.  For that we have the ``F`` (filter)
 object.
@@ -56,7 +56,7 @@ will find you "thai" or "korean" style taco trucks.
 Let's say you only want "korean" tacos if you can get it for "FREE" or "thai"
 tacos at any price::
 
-S('taco trucks').filter(F(style='korean', price='FREE')|F(style='thai'))
+    S('taco trucks').filter(F(style='korean', price='FREE')|F(style='thai'))
 
 
 Facets
@@ -71,6 +71,21 @@ Facets can also be scripted_::
 
 .. _scripted: http://www.elasticsearch.org/guide/reference/api/search/facets/terms-facet.html
 
+Custom Scoring
+--------------
+
+You can affect the match score calculated for each result by applying a custom
+score script_ to a query, which allows you to specify explicitly how the score
+is derived from each item's values::
+
+    S('taco trucks').score('_score * doc["menu_options"].value')
+
+will do a query for "taco trucks" and rank them based on how many menu options
+they have.
+
+.. _script: http://www.elasticsearch.org/guide/reference/modules/scripting.html
+
+
 Results
 -------
 

diff --git a/elasticutils/__init__.py b/elasticutils/__init__.py
@@ -88,6 +88,7 @@ def __init__(self, query=None, type=None, result_transform=None,
         self.facets = {}
         self.objects = []
         self.ordering = []
+        self.score_ = None
         self.type = type
         self.total = None
         self.result_transform = result_transform
@@ -100,6 +101,7 @@ def _clone(self):
         new.facets = dict(self.facets)
         new.objects = list(self.objects)
         new.ordering = list(self.ordering)
+        new.score_ = self.score_
         new.type = self.type
         new.total = self.total
         new.result_transform = self.result_transform
@@ -129,6 +131,16 @@ def facet(self, field, script=None, global_=False):
         self.facets[field] = facet
         return self
 
+    def score(self, script, params=None):
+        """
+        Custom score queries allow you to use a script to calculate a score by
+        which your results will be ordered (higher scores before lower scores).
+        For more information:
+        http://www.elasticsearch.org/guide/reference/query-dsl/custom-score-query.html
+        """
+        self.score_ = dict(script=script, params=params)
+        return self
+
     def order_by(self, *fields):
         new = self._clone()
         for field in fields:
@@ -138,8 +150,7 @@ def order_by(self, *fields):
                 new.ordering.append(field)
         return new
 
-    def execute(self, start=0, stop=None):
-        es = get_es()
+    def _build_query(self, start=0, stop=None):
         query = dict(query=self.query)
         if self.filter_:
             query['filter'] = self.filter_
@@ -152,7 +163,18 @@ def execute(self, start=0, stop=None):
         if self.ordering:
             query['sort'] = self.ordering
 
-        self.offset = query.get('from', 0)
+        if self.score_:
+            query['query'] = dict(custom_score=dict(query=query['query'], script=self.score_['script']))
+            if (self.score_['params']):
+                query['query']['custom_score']['params'] = self.score_['params']
+
+        return query
+
+    def execute(self, start=0, stop=None):
+        es = get_es()
+        query = self._build_query(start, stop)
+        self.offset = query.get('from', 0);
+
         self.results = es.search(query, settings.ES_INDEX, self.type)
 
         if self.result_transform:
@@ -207,3 +229,7 @@ def __getitem__(self, k):
             k -= self.offset
 
         return self.objects.__getitem__(k)
+
+    def __str__(self):
+        query = self._build_query()
+        return str(query)
diff --git a/tests/tests.py b/tests/tests.py
@@ -14,11 +14,11 @@ class QueryTest(TestCase):
     @classmethod
     def setup_class(cls):
         es = get_es()
-        data1 = dict(id=1, foo='bar', tag='awesome')
-        data2 = dict(id=2, foo='barf', tag='boring')
-        data3 = dict(id=3, foo='car', tag='awesome')
-        data4 = dict(id=4, foo='duck', tag='boat')
-        data5 = dict(id=5, foo='train car', tag='awesome')
+        data1 = dict(id=1, foo='bar', tag='awesome', width='2')
+        data2 = dict(id=2, foo='barf', tag='boring', width='7')
+        data3 = dict(id=3, foo='car', tag='awesome', width='5')
+        data4 = dict(id=4, foo='duck', tag='boat', width='11')
+        data5 = dict(id=5, foo='train car', tag='awesome', width='7')
 
         for data in (data1, data2, data3, data4, data5):
             es.index(data, 'test', 'boondongles', bulk=True)
@@ -53,6 +53,21 @@ def test_facet(self):
         eq_(S().facet('tag').get_facet('tag'),
             dict(awesome=3, boring=1, boat=1))
 
+    def test_custom_score(self):
+        """
+        This query selects all the boondongles with the tag 'awesome' and then
+        applies the custom score script to rank them based on their 'width'
+        parameter. Since the tag being queried for matches exactly, each starts
+        with a score of 1.0. The script then multiplies that by the width to
+        get their final scores; hence, the calculated scores will be equal to
+        the width. The results are ordered by highest score first, so we can
+        expect them to be id 5 with a score of 7.0, then id 3 with a score of
+        5.0, then id 1 with a score of 2.0.
+        """
+        res = S(tag='awesome')
+        res = res.score(script='_score * doc["width"].value').get_results()
+        eq_([d['_source']['id'] for d in res], [5, 3, 1])
+
     @classmethod
     def teardown_class(cls):
         es = get_es()