Skip to content

Loading…

Spatial filters stomp on existing filters in ElasticSearch backend #566

Closed
wants to merge 2 commits into from

2 participants

@joshdrake

Narrow filters are applied before the spatial queries using a query filter. If spatial filtering is performed, these filters are stomped on. This can be easily reproduced with a basic case:

ninth_and_mass = Point(-95.23592948913574, 38.96753407043678)
max_dist = D(mi=2)
sqs = SearchQuerySet().models(Shop).dwithin('location', ninth_and_mass, max_dist)

This will generate a filter fragment looking like this:

{
    "filtered": {
        "filter": {
            "geo_distance": {
                "distance": 3.218688, 
                "location": {
                    "lat": 38.967534, 
                    "lon": -95.235929
                }
            }
        }
    }
}

... when it should be this:

{
    "filtered": {
        "filter": {
            "and": [
                {
                    "fquery": {
                        "_cache": true, 
                        "query": {
                            "query_string": {
                                "query": "django_ct:(shops.shop)"
                            }
                        }
                    }
                }, 
                {
                    "geo_distance": {
                        "distance": 3.218688, 
                        "location": {
                            "lat": 38.967534, 
                            "lon": -95.235929
                        }
                    }
                }
            ]
        }
    }
}

I've submitted a pull request to correct this issue.

@jezdez

I'm not sure why this is needed, can you elaborate?

@jezdez

Er, sorry, I thought I had created a separate pull for this commit. Basically, the distance information returned by ElasticSearch isn't processed by when building search result objects. This commit implements it in a similar fashion as in the Solr backend (L379). The canonical example of this usage is shown in the docs here.

There are a few things going on in this commit of note:

  • When sorting by distance, the geo_sort variable is set to True (L302)
  • The point used in the .distance() call is passed to the _process_results method, along with geo_sort (L499)
  • The _point_of_origin additional field is set to the distance point so the Haystack can calculate the distance information with GeoPy. This is needed because ElasticSearch only returns distance information when sorting by distance. (L592)
  • If distance was one of the sorting parameters, then ElasticSearch has distance information in the results. In this case, we populate the distance field with that returned directly from ElasticSearch, rather than letting GeoPy waste cycles doing it. (L594)

Sorry about the confusion, let me know if there's anything else I can help with.

@jezdez

I've pulled the first commit, please feel free to open a new pull request about d5cc42f.

@jezdez jezdez closed this
@jezdez jezdez added a commit that referenced this pull request
@jezdez jezdez Fixed distance handling in result parser of the elasticsearch backend…
…. This is basically the second part of #566. Thanks to Josh Drake for the initial patch.
9d92d4d
@floppya floppya added a commit to floppya/django-haystack that referenced this pull request
@jezdez jezdez Fixed distance handling in result parser of the elasticsearch backend…
…. This is basically the second part of #566. Thanks to Josh Drake for the initial patch.
4125e5c
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Showing with 41 additions and 4 deletions.
  1. +41 −4 haystack/backends/elasticsearch_backend.py
View
45 haystack/backends/elasticsearch_backend.py
@@ -299,6 +299,7 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
"unit" : "km"
}
}
+ geo_sort = True
else:
if field == 'distance':
warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")
@@ -415,9 +416,11 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
from haystack.utils.geo import generate_bounding_box
((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2'])
+
kwargs['query'].setdefault('filtered', {})
kwargs['query']['filtered'].setdefault('filter', {})
- kwargs['query']['filtered']['filter'] = {
+
+ within_filter = {
"geo_bounding_box": {
within['field']: {
"top_left": {
@@ -432,11 +435,25 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
},
}
+ if kwargs['query']['filtered']['filter']:
+ compound_filter = {
+ "and": [
+ kwargs['query']['filtered']['filter'],
+ within_filter,
+ ]
+ }
+ kwargs['query']['filtered']['filter'] = compound_filter
+ else:
+ kwargs['query']['filtered']['filter'] = within_filter
+
+
if dwithin is not None:
lng, lat = dwithin['point'].get_coords()
+
kwargs['query'].setdefault('filtered', {})
kwargs['query']['filtered'].setdefault('filter', {})
- kwargs['query']['filtered']['filter'] = {
+
+ dwithin_filter = {
"geo_distance": {
"distance": dwithin['distance'].km,
dwithin['field']: {
@@ -446,6 +463,17 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
}
}
+ if kwargs['query']['filtered']['filter']:
+ compound_filter = {
+ "and": [
+ kwargs['query']['filtered']['filter'],
+ dwithin_filter
+ ]
+ }
+ kwargs['query']['filtered']['filter'] = compound_filter
+ else:
+ kwargs['query']['filtered']['filter'] = dwithin_filter
+
# Remove the "filtered" key if we're not filtering. Otherwise,
# Elasticsearch will blow up.
if not kwargs['query']['filtered'].get('filter'):
@@ -468,7 +496,7 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e)
raw_results = {}
- return self._process_results(raw_results, highlight=highlight, result_class=result_class)
+ return self._process_results(raw_results, highlight=highlight, result_class=result_class, distance_point=distance_point, geo_sort=geo_sort)
def more_like_this(self, model_instance, additional_query_string=None,
start_offset=0, end_offset=None, models=None,
@@ -507,7 +535,7 @@ def more_like_this(self, model_instance, additional_query_string=None,
return self._process_results(raw_results, result_class=result_class)
- def _process_results(self, raw_results, highlight=False, result_class=None):
+ def _process_results(self, raw_results, highlight=False, result_class=None, distance_point=None, geo_sort=False):
from haystack import connections
results = []
hits = raw_results.get('hits', {}).get('total', 0)
@@ -560,6 +588,15 @@ def _process_results(self, raw_results, highlight=False, result_class=None):
if 'highlight' in raw_result:
additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '')
+ if distance_point:
+ additional_fields['_point_of_origin'] = distance_point
+
+ if geo_sort and raw_result.get('sort'):
+ from haystack.utils.geo import Distance
+ additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0]))
+ else:
+ additional_fields['_distance'] = None
+
result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields)
results.append(result)
else:
Something went wrong with that request. Please try again.