diff --git a/.travis.yml b/.travis.yml index eee4d5886..2f15d444d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,10 +26,21 @@ addons: before_install: - mkdir -p $HOME/download-cache + - > + if [[ $VERSION_ES == '>=2.0.0,<3.0.0' ]]; + then + wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz + tar zxf elasticsearch-2.2.1.tar.gz + elasticsearch-2.2.1/bin/elasticsearch -d -Dhttp.port=9200 + else + wget https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-1.7.5.tar.gz + tar zxf elasticsearch-1.7.5.tar.gz + elasticsearch-1.7.5/bin/elasticsearch -d -Dhttp.port=9200 + fi install: - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install mock==1.0.1; fi - - pip install requests "Django${DJANGO_VERSION}" + - pip install requests "Django${DJANGO_VERSION}" "elasticsearch${VERSION_ES}" - python setup.py clean build install before_script: @@ -41,16 +52,15 @@ script: env: matrix: - - DJANGO_VERSION=">=1.8,<1.9" - - DJANGO_VERSION=">=1.9,<1.10" + - DJANGO_VERSION=">=1.8,<1.9" VERSION_ES=">=1.0.0,<2.0.0" + - DJANGO_VERSION=">=1.9,<1.10" VERSION_ES=">=1.0.0,<2.0.0" + - DJANGO_VERSION=">=1.8,<1.9" VERSION_ES=">=2.0.0,<3.0.0" + - DJANGO_VERSION=">=1.9,<1.10" VERSION_ES=">=2.0.0,<3.0.0" matrix: allow_failures: - python: "pypy" -services: - - elasticsearch - notifications: irc: "irc.freenode.org#haystack" email: false diff --git a/haystack/backends/elasticsearch2_backend.py b/haystack/backends/elasticsearch2_backend.py new file mode 100644 index 000000000..4c92d4c03 --- /dev/null +++ b/haystack/backends/elasticsearch2_backend.py @@ -0,0 +1,1045 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals + +import datetime +import re +import warnings + +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured +from django.utils import six + +import haystack +from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query +from haystack.constants import DEFAULT_OPERATOR, DJANGO_CT, DJANGO_ID, FUZZY_MAX_EXPANSIONS, FUZZY_MIN_SIM, ID +from haystack.exceptions import MissingDependency, MoreLikeThisError, SkipDocument +from haystack.inputs import Clean, Exact, PythonData, Raw +from haystack.models import SearchResult +from haystack.utils import get_identifier, get_model_ct +from haystack.utils import log as logging +from haystack.utils.app_loading import haystack_get_model + +try: + import elasticsearch + if not ((2, 0, 0) <= elasticsearch.__version__ < (3, 0, 0)): + raise ImportError + from elasticsearch.helpers import bulk, scan + from elasticsearch.exceptions import NotFoundError +except ImportError: + raise MissingDependency("The 'elasticsearch2' backend requires the installation of 'elasticsearch>=2.0.0,<3.0.0'. Please refer to the documentation.") + + +DATETIME_REGEX = re.compile( + r'^(?P\d{4})-(?P\d{2})-(?P\d{2})T' + r'(?P\d{2}):(?P\d{2}):(?P\d{2})(\.\d+)?$') + + +class Elasticsearch2SearchBackend(BaseSearchBackend): + # Word reserved by Elasticsearch for special use. + RESERVED_WORDS = ( + 'AND', + 'NOT', + 'OR', + 'TO', + ) + + # Characters reserved by Elasticsearch for special use. + # The '\\' must come first, so as not to overwrite the other slash replacements. + RESERVED_CHARACTERS = ( + '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', + '[', ']', '^', '"', '~', '*', '?', ':', '/', + ) + + # Settings to add an n-gram & edge n-gram analyzer. + DEFAULT_SETTINGS = { + 'settings': { + "analysis": { + "analyzer": { + "ngram_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["haystack_ngram", "lowercase"] + }, + "edgengram_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["haystack_edgengram", "lowercase"] + } + }, + "tokenizer": { + "haystack_ngram_tokenizer": { + "type": "nGram", + "min_gram": 3, + "max_gram": 15, + }, + "haystack_edgengram_tokenizer": { + "type": "edgeNGram", + "min_gram": 2, + "max_gram": 15, + "side": "front" + } + }, + "filter": { + "haystack_ngram": { + "type": "nGram", + "min_gram": 3, + "max_gram": 15 + }, + "haystack_edgengram": { + "type": "edgeNGram", + "min_gram": 2, + "max_gram": 15 + } + } + } + } + } + + def __init__(self, connection_alias, **connection_options): + super(Elasticsearch2SearchBackend, self).__init__(connection_alias, **connection_options) + + if 'URL' not in connection_options: + raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias) + + if 'INDEX_NAME' not in connection_options: + raise ImproperlyConfigured("You must specify a 'INDEX_NAME' in your settings for connection '%s'." % connection_alias) + + self.conn = elasticsearch.Elasticsearch(connection_options['URL'], timeout=self.timeout, **connection_options.get('KWARGS', {})) + self.index_name = connection_options['INDEX_NAME'] + self.log = logging.getLogger('haystack') + self.setup_complete = False + self.existing_mapping = {} + self.content_field_name = None + + def setup(self): + """ + Defers loading until needed. + """ + # Get the existing mapping & cache it. We'll compare it + # during the ``update`` & if it doesn't match, we'll put the new + # mapping. + try: + self.existing_mapping = self.conn.indices.get_mapping(index=self.index_name) + except NotFoundError: + pass + except Exception: + if not self.silently_fail: + raise + + unified_index = haystack.connections[self.connection_alias].get_unified_index() + self.content_field_name, field_mapping = self.build_schema(unified_index.all_searchfields()) + current_mapping = { + 'modelresult': { + 'properties': field_mapping, + } + } + + if current_mapping != self.existing_mapping: + try: + # Make sure the index is there first. + self.conn.indices.create(index=self.index_name, body=self.DEFAULT_SETTINGS, ignore=400) + self.conn.indices.put_mapping(index=self.index_name, doc_type='modelresult', body=current_mapping) + self.existing_mapping = current_mapping + except Exception: + if not self.silently_fail: + raise + + self.setup_complete = True + + def update(self, index, iterable, commit=True): + """ + Updates the backend when given a SearchIndex and a collection of + documents. + + :param index: The SearchIndex to update. + :param iterable: The collection of documents. + :param commit: True to refresh the search index after the update. + """ + if not self.setup_complete: + try: + self.setup() + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to add documents to Elasticsearch: %s", e, exc_info=True) + return + + prepped_docs = [] + + for obj in iterable: + try: + prepped_data = index.full_prepare(obj) + final_data = {} + + # Convert the data to make sure it's happy. + for key, value in prepped_data.items(): + final_data[key] = self._from_python(value) + final_data['_id'] = final_data[ID] + + prepped_docs.append(final_data) + except SkipDocument: + self.log.debug(u"Indexing for object `%s` skipped", obj) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + # We'll log the object identifier but won't include the actual object + # to avoid the possibility of that generating encoding errors while + # processing the log message: + self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, + extra={"data": {"index": index, + "object": get_identifier(obj)}}) + + bulk(self.conn, prepped_docs, index=self.index_name, doc_type='modelresult') + + if commit: + self.conn.indices.refresh(index=self.index_name) + + def remove(self, obj_or_string, commit=True): + """ + Removes a document/object from the backend. Can be either a model + instance or the identifier (i.e. ``app_name.model_name.id``) in the + event the object no longer exists. + + :param obj_or_string: The model instance or the identifier. + :param commit: True to refresh the search index after the remove. + """ + doc_id = get_identifier(obj_or_string) + + if not self.setup_complete: + try: + self.setup() + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, + exc_info=True) + return + + try: + self.conn.delete(index=self.index_name, doc_type='modelresult', id=doc_id, ignore=404) + + if commit: + self.conn.indices.refresh(index=self.index_name) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, exc_info=True) + + def clear(self, models=None, commit=True): + """ + Clears the backend of all documents/objects for a collection of models. + + :param models: List or tuple of models to clear. + :param commit: Not used. + """ + if models is not None: + assert isinstance(models, (list, tuple)) + + try: + if models is None: + self.conn.indices.delete(index=self.index_name, ignore=404) + self.setup_complete = False + self.existing_mapping = {} + self.content_field_name = None + else: + models_to_delete = [] + + for model in models: + models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model))) + + # Delete using scroll API + query = {'query': {'query_string': {'query': " OR ".join(models_to_delete)}}} + generator = scan(self.conn, query=query, index=self.index_name, doc_type='modelresult') + actions = ({ + '_op_type': 'delete', + '_id': doc['_id'], + } for doc in generator) + bulk(self.conn, actions=actions, index=self.index_name, doc_type='modelresult') + self.conn.indices.refresh(index=self.index_name) + + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + if models is not None: + self.log.error("Failed to clear Elasticsearch index of models '%s': %s", + ','.join(models_to_delete), e, exc_info=True) + else: + self.log.error("Failed to clear Elasticsearch index: %s", e, exc_info=True) + + def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, + fields='', highlight=False, facets=None, + date_facets=None, query_facets=None, + narrow_queries=None, spelling_query=None, + within=None, dwithin=None, distance_point=None, + models=None, limit_to_registered_models=None, + result_class=None): + index = haystack.connections[self.connection_alias].get_unified_index() + content_field = index.document_field + + if query_string == '*:*': + kwargs = { + 'query': { + "match_all": {} + }, + } + else: + kwargs = { + 'query': { + 'query_string': { + 'default_field': content_field, + 'default_operator': DEFAULT_OPERATOR, + 'query': query_string, + 'analyze_wildcard': True, + 'auto_generate_phrase_queries': True, + 'fuzzy_min_sim': FUZZY_MIN_SIM, + 'fuzzy_max_expansions': FUZZY_MAX_EXPANSIONS, + }, + }, + } + + # so far, no filters + filters = [] + + if fields: + if isinstance(fields, (list, set)): + fields = " ".join(fields) + + kwargs['fields'] = fields + + if sort_by is not None: + order_list = [] + for field, direction in sort_by: + if field == 'distance' and distance_point: + # Do the geo-enabled sort. + lng, lat = distance_point['point'].get_coords() + sort_kwargs = { + "_geo_distance": { + distance_point['field']: [lng, lat], + "order": direction, + "unit": "km" + } + } + else: + if field == 'distance': + warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") + + # Regular sorting. + sort_kwargs = {field: {'order': direction}} + + order_list.append(sort_kwargs) + + kwargs['sort'] = order_list + + if start_offset is not None: + kwargs['from'] = start_offset + + if end_offset is not None: + kwargs['size'] = end_offset - start_offset + + if highlight is True: + kwargs['highlight'] = { + 'fields': { + content_field: {'store': 'yes'}, + } + } + + if self.include_spelling: + kwargs['suggest'] = { + 'suggest': { + 'text': spelling_query or query_string, + 'term': { + # Using content_field here will result in suggestions of stemmed words. + 'field': '_all', + }, + }, + } + + if narrow_queries is None: + narrow_queries = set() + + if facets is not None: + kwargs.setdefault('aggs', {}) + + for facet_fieldname, extra_options in facets.items(): + facet_options = { + 'meta': { + '_type': 'terms', + }, + 'terms': { + 'field': facet_fieldname, + } + } + if 'order' in extra_options: + facet_options['meta']['order'] = extra_options.pop('order') + # Special cases for options applied at the facet level (not the terms level). + if extra_options.pop('global_scope', False): + # Renamed "global_scope" since "global" is a python keyword. + facet_options['global'] = True + if 'facet_filter' in extra_options: + facet_options['facet_filter'] = extra_options.pop('facet_filter') + facet_options['terms'].update(extra_options) + kwargs['aggs'][facet_fieldname] = facet_options + + if date_facets is not None: + kwargs.setdefault('aggs', {}) + + for facet_fieldname, value in date_facets.items(): + # Need to detect on gap_by & only add amount if it's more than one. + interval = value.get('gap_by').lower() + + # Need to detect on amount (can't be applied on months or years). + if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'): + # Just the first character is valid for use. + interval = "%s%s" % (value['gap_amount'], interval[:1]) + + kwargs['aggs'][facet_fieldname] = { + 'meta': { + '_type': 'date_histogram', + }, + 'date_histogram': { + 'field': facet_fieldname, + 'interval': interval, + }, + 'aggs': { + facet_fieldname: { + 'date_range': { + 'field': facet_fieldname, + 'ranges': [ + { + 'from': self._from_python(value.get('start_date')), + 'to': self._from_python(value.get('end_date')), + } + ] + } + } + } + } + + if query_facets is not None: + kwargs.setdefault('aggs', {}) + + for facet_fieldname, value in query_facets: + kwargs['aggs'][facet_fieldname] = { + 'meta': { + '_type': 'query', + }, + 'filter': { + 'query_string': { + 'query': value, + } + }, + } + + if limit_to_registered_models is None: + limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + + if models and len(models): + model_choices = sorted(get_model_ct(model) for model in models) + elif limit_to_registered_models: + # Using narrow queries, limit the results to only models handled + # with the current routers. + model_choices = self.build_models_list() + else: + model_choices = [] + + if len(model_choices) > 0: + filters.append({"terms": {DJANGO_CT: model_choices}}) + + for q in narrow_queries: + filters.append({ + 'query_string': { + 'query': q + } + }) + + if within is not None: + from haystack.utils.geo import generate_bounding_box + + ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2']) + within_filter = { + "geo_bounding_box": { + within['field']: { + "top_left": { + "lat": north, + "lon": west + }, + "bottom_right": { + "lat": south, + "lon": east + } + } + }, + } + filters.append(within_filter) + + if dwithin is not None: + lng, lat = dwithin['point'].get_coords() + + # NB: the 1.0.0 release of elasticsearch introduce an + # incompatible change on the distance filter formating + if elasticsearch.VERSION >= (1, 0, 0): + distance = "%(dist).6f%(unit)s" % { + 'dist': dwithin['distance'].km, + 'unit': "km" + } + else: + distance = dwithin['distance'].km + + dwithin_filter = { + "geo_distance": { + "distance": distance, + dwithin['field']: { + "lat": lat, + "lon": lng + } + } + } + filters.append(dwithin_filter) + + # if we want to filter, change the query type to filteres + if filters: + kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}} + if len(filters) == 1: + kwargs['query']['filtered']["filter"] = filters[0] + else: + kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}} + + return kwargs + + @log_query + def search(self, query_string, **kwargs): + if len(query_string) == 0: + return { + 'results': [], + 'hits': 0, + } + + if not self.setup_complete: + self.setup() + + search_kwargs = self.build_search_kwargs(query_string, **kwargs) + search_kwargs['from'] = kwargs.get('start_offset', 0) + + order_fields = set() + for order in search_kwargs.get('sort', []): + for key in order.keys(): + order_fields.add(key) + + geo_sort = '_geo_distance' in order_fields + + end_offset = kwargs.get('end_offset') + start_offset = kwargs.get('start_offset', 0) + if end_offset is not None and end_offset > start_offset: + search_kwargs['size'] = end_offset - start_offset + + try: + raw_results = self.conn.search(body=search_kwargs, + index=self.index_name, + doc_type='modelresult', + _source=True) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e, exc_info=True) + raw_results = {} + + return self._process_results(raw_results, + highlight=kwargs.get('highlight'), + result_class=kwargs.get('result_class', SearchResult), + distance_point=kwargs.get('distance_point'), + geo_sort=geo_sort) + + def more_like_this(self, model_instance, additional_query_string=None, + start_offset=0, end_offset=None, models=None, + limit_to_registered_models=None, result_class=None, **kwargs): + from haystack import connections + + if not self.setup_complete: + self.setup() + + # Deferred models will have a different class ("RealClass_Deferred_fieldname") + # which won't be in our registry: + model_klass = model_instance._meta.concrete_model + + index = connections[self.connection_alias].get_unified_index().get_index(model_klass) + field_name = index.get_content_field() + params = {} + + if start_offset is not None: + params['from_'] = start_offset + + if end_offset is not None: + params['size'] = end_offset - start_offset + + doc_id = get_identifier(model_instance) + + try: + # More like this Query + # https://www.elastic.co/guide/en/elasticsearch/reference/2.2/query-dsl-mlt-query.html + mlt_query = { + 'query': { + 'more_like_this': { + 'fields': [field_name], + 'like': [{ + "_id": doc_id + }] + } + } + } + + narrow_queries = [] + + if additional_query_string and additional_query_string != '*:*': + additional_filter = { + "query": { + "query_string": { + "query": additional_query_string + } + } + } + narrow_queries.append(additional_filter) + + if limit_to_registered_models is None: + limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + + if models and len(models): + model_choices = sorted(get_model_ct(model) for model in models) + elif limit_to_registered_models: + # Using narrow queries, limit the results to only models handled + # with the current routers. + model_choices = self.build_models_list() + else: + model_choices = [] + + if len(model_choices) > 0: + model_filter = {"terms": {DJANGO_CT: model_choices}} + narrow_queries.append(model_filter) + + if len(narrow_queries) > 0: + mlt_query = { + "query": { + "filtered": { + 'query': mlt_query['query'], + 'filter': { + 'bool': { + 'must': list(narrow_queries) + } + } + } + } + } + + raw_results = self.conn.search( + body=mlt_query, + index=self.index_name, + doc_type='modelresult', + _source=True, **params) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to fetch More Like This from Elasticsearch for document '%s': %s", + doc_id, e, exc_info=True) + raw_results = {} + + return self._process_results(raw_results, result_class=result_class) + + def _process_results(self, raw_results, highlight=False, + result_class=None, distance_point=None, + geo_sort=False): + from haystack import connections + results = [] + hits = raw_results.get('hits', {}).get('total', 0) + facets = {} + spelling_suggestion = None + + if result_class is None: + result_class = SearchResult + + if self.include_spelling and 'suggest' in raw_results: + raw_suggest = raw_results['suggest'].get('suggest') + if raw_suggest: + spelling_suggestion = ' '.join([word['text'] if len(word['options']) == 0 else word['options'][0]['text'] for word in raw_suggest]) + + if 'aggregations' in raw_results: + facets = { + 'fields': {}, + 'dates': {}, + 'queries': {}, + } + + for facet_fieldname, facet_info in raw_results['aggregations'].items(): + facet_type = facet_info['meta']['_type'] + if facet_type == 'terms': + facets['fields'][facet_fieldname] = [(individual['key'], individual['doc_count']) for individual in facet_info['buckets']] + if 'order' in facet_info['meta']: + if facet_info['meta']['order'] == 'reverse_count': + srt = sorted(facets['fields'][facet_fieldname], key=lambda x: x[1]) + facets['fields'][facet_fieldname] = srt + elif facet_type == 'date_histogram': + # Elasticsearch provides UTC timestamps with an extra three + # decimals of precision, which datetime barfs on. + facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['key'] / 1000), individual['doc_count']) for individual in facet_info['buckets']] + elif facet_type == 'query': + facets['queries'][facet_fieldname] = facet_info['doc_count'] + + unified_index = connections[self.connection_alias].get_unified_index() + indexed_models = unified_index.get_indexed_models() + content_field = unified_index.document_field + + for raw_result in raw_results.get('hits', {}).get('hits', []): + source = raw_result['_source'] + app_label, model_name = source[DJANGO_CT].split('.') + additional_fields = {} + model = haystack_get_model(app_label, model_name) + + if model and model in indexed_models: + for key, value in source.items(): + index = unified_index.get_index(model) + string_key = str(key) + + if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): + additional_fields[string_key] = index.fields[string_key].convert(value) + else: + additional_fields[string_key] = self._to_python(value) + + del(additional_fields[DJANGO_CT]) + del(additional_fields[DJANGO_ID]) + + if 'highlight' in raw_result: + additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '') + + if distance_point: + additional_fields['_point_of_origin'] = distance_point + + if geo_sort and raw_result.get('sort'): + from haystack.utils.geo import Distance + additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0])) + else: + additional_fields['_distance'] = None + + result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields) + results.append(result) + else: + hits -= 1 + + return { + 'results': results, + 'hits': hits, + 'facets': facets, + 'spelling_suggestion': spelling_suggestion, + } + + def build_schema(self, fields): + content_field_name = '' + mapping = { + DJANGO_CT: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, + DJANGO_ID: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, + } + + for field_name, field_class in fields.items(): + field_mapping = FIELD_MAPPINGS.get(field_class.field_type, DEFAULT_FIELD_MAPPING).copy() + if field_class.boost != 1.0: + field_mapping['boost'] = field_class.boost + + if field_class.document is True: + content_field_name = field_class.index_fieldname + + # Do this last to override `text` fields. + if field_mapping['type'] == 'string': + if field_class.indexed is False or hasattr(field_class, 'facet_for'): + field_mapping['index'] = 'not_analyzed' + del field_mapping['analyzer'] + + mapping[field_class.index_fieldname] = field_mapping + + return content_field_name, mapping + + def _iso_datetime(self, value): + """ + If value appears to be something datetime-like, return it in ISO format. + + Otherwise, return None. + """ + if hasattr(value, 'strftime'): + if hasattr(value, 'hour'): + return value.isoformat() + else: + return '%sT00:00:00' % value.isoformat() + + def _from_python(self, value): + """Convert more Python data types to ES-understandable JSON.""" + iso = self._iso_datetime(value) + if iso: + return iso + elif isinstance(value, six.binary_type): + # TODO: Be stricter. + return six.text_type(value, errors='replace') + elif isinstance(value, set): + return list(value) + return value + + def _to_python(self, value): + """Convert values from ElasticSearch to native Python values.""" + if isinstance(value, (int, float, complex, list, tuple, bool)): + return value + + if isinstance(value, six.string_types): + possible_datetime = DATETIME_REGEX.search(value) + + if possible_datetime: + date_values = possible_datetime.groupdict() + + for dk, dv in date_values.items(): + date_values[dk] = int(dv) + + return datetime.datetime( + date_values['year'], date_values['month'], + date_values['day'], date_values['hour'], + date_values['minute'], date_values['second']) + + try: + # This is slightly gross but it's hard to tell otherwise what the + # string's original type might have been. Be careful who you trust. + converted_value = eval(value) + + # Try to handle most built-in types. + if isinstance( + converted_value, + (int, list, tuple, set, dict, float, complex)): + return converted_value + except Exception: + # If it fails (SyntaxError or its ilk) or we don't trust it, + # continue on. + pass + + return value + +# DRL_FIXME: Perhaps move to something where, if none of these +# match, call a custom method on the form that returns, per-backend, +# the right type of storage? +DEFAULT_FIELD_MAPPING = {'type': 'string', 'analyzer': 'snowball'} +FIELD_MAPPINGS = { + 'edge_ngram': {'type': 'string', 'analyzer': 'edgengram_analyzer'}, + 'ngram': {'type': 'string', 'analyzer': 'ngram_analyzer'}, + 'date': {'type': 'date'}, + 'datetime': {'type': 'date'}, + + 'location': {'type': 'geo_point'}, + 'boolean': {'type': 'boolean'}, + 'float': {'type': 'float'}, + 'long': {'type': 'long'}, + 'integer': {'type': 'long'}, +} + + +# Sucks that this is almost an exact copy of what's in the Solr backend, +# but we can't import due to dependencies. +class Elasticsearch2SearchQuery(BaseSearchQuery): + def matching_all_fragment(self): + return '*:*' + + def build_query_fragment(self, field, filter_type, value): + from haystack import connections + query_frag = '' + + if not hasattr(value, 'input_type_name'): + # Handle when we've got a ``ValuesListQuerySet``... + if hasattr(value, 'values_list'): + value = list(value) + + if isinstance(value, six.string_types): + # It's not an ``InputType``. Assume ``Clean``. + value = Clean(value) + else: + value = PythonData(value) + + # Prepare the query using the InputType. + prepared_value = value.prepare(self) + + if not isinstance(prepared_value, (set, list, tuple)): + # Then convert whatever we get back to what pysolr wants if needed. + prepared_value = self.backend._from_python(prepared_value) + + # 'content' is a special reserved word, much like 'pk' in + # Django's ORM layer. It indicates 'no special field'. + if field == 'content': + index_fieldname = '' + else: + index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field) + + filter_types = { + 'contains': u'%s', + 'startswith': u'%s*', + 'exact': u'%s', + 'gt': u'{%s TO *}', + 'gte': u'[%s TO *]', + 'lt': u'{* TO %s}', + 'lte': u'[* TO %s]', + 'fuzzy': u'%s~', + } + + if value.post_process is False: + query_frag = prepared_value + else: + if filter_type in ['contains', 'startswith', 'fuzzy']: + if value.input_type_name == 'exact': + query_frag = prepared_value + else: + # Iterate over terms & incorportate the converted form of each into the query. + terms = [] + + if isinstance(prepared_value, six.string_types): + for possible_value in prepared_value.split(' '): + terms.append(filter_types[filter_type] % self.backend._from_python(possible_value)) + else: + terms.append(filter_types[filter_type] % self.backend._from_python(prepared_value)) + + if len(terms) == 1: + query_frag = terms[0] + else: + query_frag = u"(%s)" % " AND ".join(terms) + elif filter_type == 'in': + in_options = [] + + for possible_value in prepared_value: + in_options.append(u'"%s"' % self.backend._from_python(possible_value)) + + query_frag = u"(%s)" % " OR ".join(in_options) + elif filter_type == 'range': + start = self.backend._from_python(prepared_value[0]) + end = self.backend._from_python(prepared_value[1]) + query_frag = u'["%s" TO "%s"]' % (start, end) + elif filter_type == 'exact': + if value.input_type_name == 'exact': + query_frag = prepared_value + else: + prepared_value = Exact(prepared_value).prepare(self) + query_frag = filter_types[filter_type] % prepared_value + else: + if value.input_type_name != 'exact': + prepared_value = Exact(prepared_value).prepare(self) + + query_frag = filter_types[filter_type] % prepared_value + + if len(query_frag) and not isinstance(value, Raw): + if not query_frag.startswith('(') and not query_frag.endswith(')'): + query_frag = "(%s)" % query_frag + + return u"%s%s" % (index_fieldname, query_frag) + + def build_alt_parser_query(self, parser_name, query_string='', **kwargs): + if query_string: + kwargs['v'] = query_string + + kwarg_bits = [] + + for key in sorted(kwargs.keys()): + if isinstance(kwargs[key], six.string_types) and ' ' in kwargs[key]: + kwarg_bits.append(u"%s='%s'" % (key, kwargs[key])) + else: + kwarg_bits.append(u"%s=%s" % (key, kwargs[key])) + + return u"{!%s %s}" % (parser_name, ' '.join(kwarg_bits)) + + def build_params(self, spelling_query=None, **kwargs): + search_kwargs = { + 'start_offset': self.start_offset, + 'result_class': self.result_class + } + order_by_list = None + + if self.order_by: + if order_by_list is None: + order_by_list = [] + + for field in self.order_by: + direction = 'asc' + if field.startswith('-'): + direction = 'desc' + field = field[1:] + order_by_list.append((field, direction)) + + search_kwargs['sort_by'] = order_by_list + + if self.date_facets: + search_kwargs['date_facets'] = self.date_facets + + if self.distance_point: + search_kwargs['distance_point'] = self.distance_point + + if self.dwithin: + search_kwargs['dwithin'] = self.dwithin + + if self.end_offset is not None: + search_kwargs['end_offset'] = self.end_offset + + if self.facets: + search_kwargs['facets'] = self.facets + + if self.fields: + search_kwargs['fields'] = self.fields + + if self.highlight: + search_kwargs['highlight'] = self.highlight + + if self.models: + search_kwargs['models'] = self.models + + if self.narrow_queries: + search_kwargs['narrow_queries'] = self.narrow_queries + + if self.query_facets: + search_kwargs['query_facets'] = self.query_facets + + if self.within: + search_kwargs['within'] = self.within + + if spelling_query: + search_kwargs['spelling_query'] = spelling_query + + return search_kwargs + + def run(self, spelling_query=None, **kwargs): + """Builds and executes the query. Returns a list of search results.""" + final_query = self.build_query() + search_kwargs = self.build_params(spelling_query, **kwargs) + + if kwargs: + search_kwargs.update(kwargs) + + results = self.backend.search(final_query, **search_kwargs) + self._results = results.get('results', []) + self._hit_count = results.get('hits', 0) + self._facet_counts = self.post_process_facets(results) + self._spelling_suggestion = results.get('spelling_suggestion', None) + + def run_mlt(self, **kwargs): + """Builds and executes the query. Returns a list of search results.""" + if self._more_like_this is False or self._mlt_instance is None: + raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.") + + additional_query_string = self.build_query() + search_kwargs = { + 'start_offset': self.start_offset, + 'result_class': self.result_class, + 'models': self.models + } + + if self.end_offset is not None: + search_kwargs['end_offset'] = self.end_offset - self.start_offset + + results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs) + self._results = results.get('results', []) + self._hit_count = results.get('hits', 0) + + +class Elasticsearch2SearchEngine(BaseEngine): + backend = Elasticsearch2SearchBackend + query = Elasticsearch2SearchQuery diff --git a/setup.py b/setup.py index a6cb4ac52..0ad15e441 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,6 @@ ] tests_require = [ - 'elasticsearch>=1.0.0,<2.0.0', 'pysolr>=3.3.2', 'whoosh>=2.5.4,<3.0', 'python-dateutil', diff --git a/test_haystack/elasticsearch2_tests/__init__.py b/test_haystack/elasticsearch2_tests/__init__.py new file mode 100644 index 000000000..ba6384f46 --- /dev/null +++ b/test_haystack/elasticsearch2_tests/__init__.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +import warnings + +from django.conf import settings + +from ..utils import unittest + +warnings.simplefilter('ignore', Warning) + + +def setup(): + try: + import elasticsearch + if not ((2, 0, 0) <= elasticsearch.__version__ < (3, 0, 0)): + raise ImportError + from elasticsearch import Elasticsearch, exceptions + except ImportError: + raise unittest.SkipTest("'elasticsearch>=2.0.0,<3.0.0' not installed.") + + url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] + es = Elasticsearch(url) + try: + es.info() + except exceptions.ConnectionError as e: + raise unittest.SkipTest("elasticsearch not running on %r" % url, e) diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py new file mode 100644 index 000000000..14fd3b1aa --- /dev/null +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -0,0 +1,1498 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals + +import datetime +import logging as std_logging +import operator +import unittest +from decimal import Decimal + +import elasticsearch +from django.apps import apps +from django.conf import settings +from django.test import TestCase +from django.test.utils import override_settings + +from haystack import connections, indexes, reset_search_queries +from haystack.exceptions import SkipDocument +from haystack.inputs import AutoQuery +from haystack.models import SearchResult +from haystack.query import RelatedSearchQuerySet, SearchQuerySet, SQ +from haystack.utils import log as logging +from haystack.utils.geo import Point +from haystack.utils.loading import UnifiedIndex +from ..core.models import AFourthMockModel, AnotherMockModel, ASixthMockModel, MockModel +from ..mocks import MockSearchResult + +test_pickling = True + +try: + import cPickle as pickle +except ImportError: + try: + import pickle + except ImportError: + test_pickling = False + + +def clear_elasticsearch_index(): + # Wipe it clean. + raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) + try: + raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) + raw_es.indices.refresh() + except elasticsearch.TransportError: + pass + + # Since we've just completely deleted the index, we'll reset setup_complete so the next access will + # correctly define the mappings: + connections['elasticsearch'].get_backend().setup_complete = False + + +class Elasticsearch2MockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + name = indexes.CharField(model_attr='author', faceted=True) + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return MockModel + + +class Elasticsearch2MockSearchIndexWithSkipDocument(Elasticsearch2MockSearchIndex): + def prepare_text(self, obj): + if obj.author == 'daniel3': + raise SkipDocument + return u"Indexed!\n%s" % obj.id + + +class Elasticsearch2MockSpellingIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True) + name = indexes.CharField(model_attr='author', faceted=True) + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return MockModel + + def prepare_text(self, obj): + return obj.foo + + +class Elasticsearch2MaintainTypeMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + month = indexes.CharField(indexed=False) + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def prepare_month(self, obj): + return "%02d" % obj.pub_date.month + + def get_model(self): + return MockModel + + +class Elasticsearch2MockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(model_attr='foo', document=True) + name = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return MockModel + + +class Elasticsearch2AnotherMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True) + name = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return AnotherMockModel + + def prepare_text(self, obj): + return u"You might be searching for the user %s" % obj.author + + +class Elasticsearch2BoostMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField( + document=True, use_template=True, + template_name='search/indexes/core/mockmodel_template.txt' + ) + author = indexes.CharField(model_attr='author', weight=2.0) + editor = indexes.CharField(model_attr='editor') + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return AFourthMockModel + + def prepare(self, obj): + data = super(Elasticsearch2BoostMockSearchIndex, self).prepare(obj) + + if obj.pk == 4: + data['boost'] = 5.0 + + return data + + +class Elasticsearch2FacetingMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True) + author = indexes.CharField(model_attr='author', faceted=True) + editor = indexes.CharField(model_attr='editor', faceted=True) + pub_date = indexes.DateField(model_attr='pub_date', faceted=True) + facet_field = indexes.FacetCharField(model_attr='author') + + def prepare_text(self, obj): + return '%s %s' % (obj.author, obj.editor) + + def get_model(self): + return AFourthMockModel + + +class Elasticsearch2RoundTripSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, default='') + name = indexes.CharField() + is_active = indexes.BooleanField() + post_count = indexes.IntegerField() + average_rating = indexes.FloatField() + price = indexes.DecimalField() + pub_date = indexes.DateField() + created = indexes.DateTimeField() + tags = indexes.MultiValueField() + sites = indexes.MultiValueField() + + def get_model(self): + return MockModel + + def prepare(self, obj): + prepped = super(Elasticsearch2RoundTripSearchIndex, self).prepare(obj) + prepped.update({ + 'text': 'This is some example text.', + 'name': 'Mister Pants', + 'is_active': True, + 'post_count': 25, + 'average_rating': 3.6, + 'price': Decimal('24.99'), + 'pub_date': datetime.date(2009, 11, 21), + 'created': datetime.datetime(2009, 11, 21, 21, 31, 00), + 'tags': ['staff', 'outdoor', 'activist', 'scientist'], + 'sites': [3, 5, 1], + }) + return prepped + + +class Elasticsearch2ComplexFacetsMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, default='') + name = indexes.CharField(faceted=True) + is_active = indexes.BooleanField(faceted=True) + post_count = indexes.IntegerField() + post_count_i = indexes.FacetIntegerField(facet_for='post_count') + average_rating = indexes.FloatField(faceted=True) + pub_date = indexes.DateField(faceted=True) + created = indexes.DateTimeField(faceted=True) + sites = indexes.MultiValueField(faceted=True) + + def get_model(self): + return MockModel + + +class Elasticsearch2AutocompleteMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(model_attr='foo', document=True) + name = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pub_date') + text_auto = indexes.EdgeNgramField(model_attr='foo') + name_auto = indexes.EdgeNgramField(model_attr='author') + + def get_model(self): + return MockModel + + +class Elasticsearch2SpatialSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(model_attr='name', document=True) + location = indexes.LocationField() + + def prepare_location(self, obj): + return "%s,%s" % (obj.lat, obj.lon) + + def get_model(self): + return ASixthMockModel + + +class TestSettings(TestCase): + def test_kwargs_are_passed_on(self): + from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend + backend = ElasticsearchSearchBackend('alias', **{ + 'URL': settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], + 'INDEX_NAME': 'testing', + 'KWARGS': {'max_retries': 42} + }) + + self.assertEqual(backend.conn.transport.max_retries, 42) + + +class Elasticsearch2SearchBackendTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchBackendTestCase, self).setUp() + + # Wipe it clean. + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + self.smmidni = Elasticsearch2MockSearchIndexWithSkipDocument() + self.smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() + + # Force the backend to rebuild the mapping each time. + self.sb.existing_mapping = {} + self.sb.setup() + + self.sample_objs = [] + + for i in range(1, 4): + mock = MockModel() + mock.id = i + mock.author = 'daniel%s' % i + mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) + self.sample_objs.append(mock) + + def tearDown(self): + connections['elasticsearch']._index = self.old_ui + super(Elasticsearch2SearchBackendTestCase, self).tearDown() + self.sb.silently_fail = True + + def raw_search(self, query): + try: + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) + except elasticsearch.TransportError: + return {} + + def test_non_silent(self): + bad_sb = connections['elasticsearch'].backend('bad', URL='http://omg.wtf.bbq:1000/', INDEX_NAME='whatver', + SILENTLY_FAIL=False, TIMEOUT=1) + + try: + bad_sb.update(self.smmi, self.sample_objs) + self.fail() + except: + pass + + try: + bad_sb.remove('core.mockmodel.1') + self.fail() + except: + pass + + try: + bad_sb.clear() + self.fail() + except: + pass + + try: + bad_sb.search('foo') + self.fail() + except: + pass + + def test_update_no_documents(self): + url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] + index_name = settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME'] + + sb = connections['elasticsearch'].backend('elasticsearch', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=True) + self.assertEqual(sb.update(self.smmi, []), None) + + sb = connections['elasticsearch'].backend('elasticsearch', URL=url, INDEX_NAME=index_name, + SILENTLY_FAIL=False) + try: + sb.update(self.smmi, []) + self.fail() + except: + pass + + def test_update(self): + self.sb.update(self.smmi, self.sample_objs) + + # Check what Elasticsearch thinks is there. + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + self.assertEqual( + sorted([res['_source'] for res in self.raw_search('*:*')['hits']['hits']], key=lambda x: x['id']), [ + { + 'django_id': '1', + 'django_ct': 'core.mockmodel', + 'name': 'daniel1', + 'name_exact': 'daniel1', + 'text': 'Indexed!\n1', + 'pub_date': '2009-02-24T00:00:00', + 'id': 'core.mockmodel.1' + }, + { + 'django_id': '2', + 'django_ct': 'core.mockmodel', + 'name': 'daniel2', + 'name_exact': 'daniel2', + 'text': 'Indexed!\n2', + 'pub_date': '2009-02-23T00:00:00', + 'id': 'core.mockmodel.2' + }, + { + 'django_id': '3', + 'django_ct': 'core.mockmodel', + 'name': 'daniel3', + 'name_exact': 'daniel3', + 'text': 'Indexed!\n3', + 'pub_date': '2009-02-22T00:00:00', + 'id': 'core.mockmodel.3' + } + ]) + + def test_update_with_SkipDocument_raised(self): + self.sb.update(self.smmidni, self.sample_objs) + + # Check what Elasticsearch thinks is there. + res = self.raw_search('*:*')['hits'] + self.assertEqual(res['total'], 2) + self.assertListEqual( + sorted([x['_source']['id'] for x in res['hits']]), + ['core.mockmodel.1', 'core.mockmodel.2'] + ) + + def test_remove(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + + self.sb.remove(self.sample_objs[0]) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 2) + self.assertEqual(sorted([res['_source'] for res in self.raw_search('*:*')['hits']['hits']], + key=operator.itemgetter('django_id')), [ + { + 'django_id': '2', + 'django_ct': 'core.mockmodel', + 'name': 'daniel2', + 'name_exact': 'daniel2', + 'text': 'Indexed!\n2', + 'pub_date': '2009-02-23T00:00:00', + 'id': 'core.mockmodel.2' + }, + { + 'django_id': '3', + 'django_ct': 'core.mockmodel', + 'name': 'daniel3', + 'name_exact': 'daniel3', + 'text': 'Indexed!\n3', + 'pub_date': '2009-02-22T00:00:00', + 'id': 'core.mockmodel.3' + } + ]) + + def test_remove_succeeds_on_404(self): + self.sb.silently_fail = False + self.sb.remove('core.mockmodel.421') + + def test_clear(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear() + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) + + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear([AnotherMockModel]) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear([MockModel]) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) + + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear([AnotherMockModel, MockModel]) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) + + def test_search(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + + self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('*:*')['hits'], 3) + self.assertEqual(set([result.pk for result in self.sb.search('*:*')['results']]), {u'2', u'1', u'3'}) + + self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('Index', highlight=True)['hits'], 3) + self.assertEqual( + sorted([result.highlighted[0] for result in self.sb.search('Index', highlight=True)['results']]), + [u'Indexed!\n1', u'Indexed!\n2', u'Indexed!\n3']) + + self.assertEqual(self.sb.search('Indx')['hits'], 0) + self.assertEqual(self.sb.search('indaxed')['spelling_suggestion'], 'indexed') + self.assertEqual(self.sb.search('arf', spelling_query='indexyd')['spelling_suggestion'], 'indexed') + + self.assertEqual(self.sb.search('', facets={'name': {}}), {'hits': 0, 'results': []}) + results = self.sb.search('Index', facets={'name': {}}) + self.assertEqual(results['hits'], 3) + self.assertSetEqual( + set(results['facets']['fields']['name']), + {('daniel3', 1), ('daniel2', 1), ('daniel1', 1)} + ) + + self.assertEqual(self.sb.search('', date_facets={ + 'pub_date': {'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 4, 1), + 'gap_by': 'month', 'gap_amount': 1}}), {'hits': 0, 'results': []}) + results = self.sb.search('Index', date_facets={ + 'pub_date': {'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 4, 1), + 'gap_by': 'month', 'gap_amount': 1}}) + self.assertEqual(results['hits'], 3) + self.assertEqual(results['facets']['dates']['pub_date'], [(datetime.datetime(2009, 2, 1, 0, 0), 3)]) + + self.assertEqual(self.sb.search('', query_facets=[('name', '[* TO e]')]), {'hits': 0, 'results': []}) + results = self.sb.search('Index', query_facets=[('name', '[* TO e]')]) + self.assertEqual(results['hits'], 3) + self.assertEqual(results['facets']['queries'], {u'name': 3}) + + self.assertEqual(self.sb.search('', narrow_queries={'name:daniel1'}), {'hits': 0, 'results': []}) + results = self.sb.search('Index', narrow_queries={'name:daniel1'}) + self.assertEqual(results['hits'], 1) + + # Ensure that swapping the ``result_class`` works. + self.assertTrue( + isinstance(self.sb.search(u'index', result_class=MockSearchResult)['results'][0], MockSearchResult)) + + # Check the use of ``limit_to_registered_models``. + self.assertEqual(self.sb.search('', limit_to_registered_models=False), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('*:*', limit_to_registered_models=False)['hits'], 3) + self.assertEqual( + sorted([result.pk for result in self.sb.search('*:*', limit_to_registered_models=False)['results']]), + ['1', '2', '3']) + + # Stow. + old_limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = False + + self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('*:*')['hits'], 3) + self.assertEqual(sorted([result.pk for result in self.sb.search('*:*')['results']]), ['1', '2', '3']) + + # Restore. + settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = old_limit_to_registered_models + + def test_spatial_search_parameters(self): + p1 = Point(1.23, 4.56) + kwargs = self.sb.build_search_kwargs('*:*', distance_point={'field': 'location', 'point': p1}, + sort_by=(('distance', 'desc'),)) + + self.assertIn('sort', kwargs) + self.assertEqual(1, len(kwargs['sort'])) + geo_d = kwargs['sort'][0]['_geo_distance'] + + # ElasticSearch supports the GeoJSON-style lng, lat pairs so unlike Solr the values should be + # in the same order as we used to create the Point(): + # http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-geo-distance-filter.html#_lat_lon_as_array_4 + + self.assertDictEqual(geo_d, {'location': [1.23, 4.56], 'unit': 'km', 'order': 'desc'}) + + def test_more_like_this(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + + # A functional MLT example with enough data to work is below. Rely on + # this to ensure the API is correct enough. + self.assertEqual(self.sb.more_like_this(self.sample_objs[0])['hits'], 0) + self.assertEqual([result.pk for result in self.sb.more_like_this(self.sample_objs[0])['results']], []) + + def test_build_schema(self): + old_ui = connections['elasticsearch'].get_unified_index() + + (content_field_name, mapping) = self.sb.build_schema(old_ui.all_searchfields()) + self.assertEqual(content_field_name, 'text') + self.assertEqual(len(mapping), 4 + 2) # +2 management fields + self.assertEqual(mapping, { + 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'text': {'type': 'string', 'analyzer': 'snowball'}, + 'pub_date': {'type': 'date'}, + 'name': {'type': 'string', 'analyzer': 'snowball'}, + 'name_exact': {'index': 'not_analyzed', 'type': 'string'} + }) + + ui = UnifiedIndex() + ui.build(indexes=[Elasticsearch2ComplexFacetsMockSearchIndex()]) + (content_field_name, mapping) = self.sb.build_schema(ui.all_searchfields()) + self.assertEqual(content_field_name, 'text') + self.assertEqual(len(mapping), 15 + 2) # +2 management fields + self.assertEqual(mapping, { + 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'name': {'type': 'string', 'analyzer': 'snowball'}, + 'is_active_exact': {'type': 'boolean'}, + 'created': {'type': 'date'}, + 'post_count': {'type': 'long'}, + 'created_exact': {'type': 'date'}, + 'sites_exact': {'index': 'not_analyzed', 'type': 'string'}, + 'is_active': {'type': 'boolean'}, + 'sites': {'type': 'string', 'analyzer': 'snowball'}, + 'post_count_i': {'type': 'long'}, + 'average_rating': {'type': 'float'}, + 'text': {'type': 'string', 'analyzer': 'snowball'}, + 'pub_date_exact': {'type': 'date'}, + 'name_exact': {'index': 'not_analyzed', 'type': 'string'}, + 'pub_date': {'type': 'date'}, + 'average_rating_exact': {'type': 'float'} + }) + + def test_verify_type(self): + old_ui = connections['elasticsearch'].get_unified_index() + ui = UnifiedIndex() + smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() + ui.build(indexes=[smtmmi]) + connections['elasticsearch']._index = ui + sb = connections['elasticsearch'].get_backend() + sb.update(smtmmi, self.sample_objs) + + self.assertEqual(sb.search('*:*')['hits'], 3) + self.assertEqual([result.month for result in sb.search('*:*')['results']], [u'02', u'02', u'02']) + connections['elasticsearch']._index = old_ui + + +class CaptureHandler(std_logging.Handler): + logs_seen = [] + + def emit(self, record): + CaptureHandler.logs_seen.append(record) + + +class FailedElasticsearch2SearchBackendTestCase(TestCase): + def setUp(self): + self.sample_objs = [] + + for i in range(1, 4): + mock = MockModel() + mock.id = i + mock.author = 'daniel%s' % i + mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) + self.sample_objs.append(mock) + + # Stow. + # Point the backend at a URL that doesn't exist so we can watch the + # sparks fly. + self.old_es_url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] + settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = "%s/foo/" % self.old_es_url + self.cap = CaptureHandler() + logging.getLogger('haystack').addHandler(self.cap) + config = apps.get_app_config('haystack') + logging.getLogger('haystack').removeHandler(config.stream) + + # Setup the rest of the bits. + self.old_ui = connections['elasticsearch'].get_unified_index() + ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + ui.build(indexes=[self.smmi]) + connections['elasticsearch']._index = ui + self.sb = connections['elasticsearch'].get_backend() + + def tearDown(self): + # Restore. + settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = self.old_es_url + connections['elasticsearch']._index = self.old_ui + config = apps.get_app_config('haystack') + logging.getLogger('haystack').removeHandler(self.cap) + logging.getLogger('haystack').addHandler(config.stream) + + @unittest.expectedFailure + def test_all_cases(self): + # Prior to the addition of the try/except bits, these would all fail miserably. + self.assertEqual(len(CaptureHandler.logs_seen), 0) + + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(len(CaptureHandler.logs_seen), 1) + + self.sb.remove(self.sample_objs[0]) + self.assertEqual(len(CaptureHandler.logs_seen), 2) + + self.sb.search('search') + self.assertEqual(len(CaptureHandler.logs_seen), 3) + + self.sb.more_like_this(self.sample_objs[0]) + self.assertEqual(len(CaptureHandler.logs_seen), 4) + + self.sb.clear([MockModel]) + self.assertEqual(len(CaptureHandler.logs_seen), 5) + + self.sb.clear() + self.assertEqual(len(CaptureHandler.logs_seen), 6) + + +class LiveElasticsearch2SearchQueryTestCase(TestCase): + fixtures = ['base_data.json'] + + def setUp(self): + super(LiveElasticsearch2SearchQueryTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() + self.sq = connections['elasticsearch'].get_query() + + # Force indexing of the content. + self.smmi.update(using='elasticsearch') + + def tearDown(self): + connections['elasticsearch']._index = self.old_ui + super(LiveElasticsearch2SearchQueryTestCase, self).tearDown() + + def test_log_query(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + + with self.settings(DEBUG=False): + len(self.sq.get_results()) + self.assertEqual(len(connections['elasticsearch'].queries), 0) + + with self.settings(DEBUG=True): + # Redefine it to clear out the cached results. + self.sq = connections['elasticsearch'].query(using='elasticsearch') + self.sq.add_filter(SQ(name='bar')) + len(self.sq.get_results()) + self.assertEqual(len(connections['elasticsearch'].queries), 1) + self.assertEqual(connections['elasticsearch'].queries[0]['query_string'], + 'name:(bar)') + + # And again, for good measure. + self.sq = connections['elasticsearch'].query('elasticsearch') + self.sq.add_filter(SQ(name='bar')) + self.sq.add_filter(SQ(text='moof')) + len(self.sq.get_results()) + self.assertEqual(len(connections['elasticsearch'].queries), 2) + self.assertEqual(connections['elasticsearch'].queries[0]['query_string'], + 'name:(bar)') + self.assertEqual(connections['elasticsearch'].queries[1]['query_string'], + u'(name:(bar) AND text:(moof))') + + +lssqstc_all_loaded = None + + +@override_settings(DEBUG=True) +class LiveElasticsearch2SearchQuerySetTestCase(TestCase): + """Used to test actual implementation details of the SearchQuerySet.""" + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2SearchQuerySetTestCase, self).setUp() + + # Stow. + self.old_ui = connections['elasticsearch'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch') + self.rsqs = RelatedSearchQuerySet('elasticsearch') + + # Ugly but not constantly reindexing saves us almost 50% runtime. + global lssqstc_all_loaded + + if lssqstc_all_loaded is None: + lssqstc_all_loaded = True + + # Wipe it clean. + clear_elasticsearch_index() + + # Force indexing of the content. + self.smmi.update(using='elasticsearch') + + def tearDown(self): + # Restore. + connections['elasticsearch']._index = self.old_ui + super(LiveElasticsearch2SearchQuerySetTestCase, self).tearDown() + + def test_load_all(self): + sqs = self.sqs.order_by('pub_date').load_all() + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertTrue(len(sqs) > 0) + self.assertEqual(sqs[2].object.foo, + u'In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.') + + def test_iter(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + sqs = self.sqs.all() + results = sorted([int(result.pk) for result in sqs]) + self.assertEqual(results, list(range(1, 24))) + self.assertEqual(len(connections['elasticsearch'].queries), 3) + + def test_slice(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + results = self.sqs.all().order_by('pub_date') + self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) + self.assertEqual(len(connections['elasticsearch'].queries), 1) + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + results = self.sqs.all().order_by('pub_date') + self.assertEqual(int(results[21].pk), 22) + self.assertEqual(len(connections['elasticsearch'].queries), 1) + + def test_values_slicing(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + + # TODO: this would be a good candidate for refactoring into a TestCase subclass shared across backends + + # The values will come back as strings because Hasytack doesn't assume PKs are integers. + # We'll prepare this set once since we're going to query the same results in multiple ways: + expected_pks = [str(i) for i in [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]] + + results = self.sqs.all().order_by('pub_date').values('pk') + self.assertListEqual([i['pk'] for i in results[1:11]], expected_pks) + + results = self.sqs.all().order_by('pub_date').values_list('pk') + self.assertListEqual([i[0] for i in results[1:11]], expected_pks) + + results = self.sqs.all().order_by('pub_date').values_list('pk', flat=True) + self.assertListEqual(results[1:11], expected_pks) + + self.assertEqual(len(connections['elasticsearch'].queries), 3) + + def test_count(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + sqs = self.sqs.all() + self.assertEqual(sqs.count(), 23) + self.assertEqual(sqs.count(), 23) + self.assertEqual(len(sqs), 23) + self.assertEqual(sqs.count(), 23) + # Should only execute one query to count the length of the result set. + self.assertEqual(len(connections['elasticsearch'].queries), 1) + + def test_manual_iter(self): + results = self.sqs.all() + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + results = set([int(result.pk) for result in results._manual_iter()]) + self.assertEqual(results, + {2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20}) + self.assertEqual(len(connections['elasticsearch'].queries), 3) + + def test_fill_cache(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + results = self.sqs.all() + self.assertEqual(len(results._result_cache), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) + results._fill_cache(0, 10) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) + self.assertEqual(len(connections['elasticsearch'].queries), 1) + results._fill_cache(10, 20) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) + self.assertEqual(len(connections['elasticsearch'].queries), 2) + + def test_cache_is_full(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + self.assertEqual(self.sqs._cache_is_full(), False) + results = self.sqs.all() + fire_the_iterator_and_fill_cache = [result for result in results] + self.assertEqual(results._cache_is_full(), True) + self.assertEqual(len(connections['elasticsearch'].queries), 3) + + def test___and__(self): + sqs1 = self.sqs.filter(content='foo') + sqs2 = self.sqs.filter(content='bar') + sqs = sqs1 & sqs2 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 2) + self.assertEqual(sqs.query.build_query(), u'((foo) AND (bar))') + + # Now for something more complex... + sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) + sqs4 = self.sqs.filter(content='bar') + sqs = sqs3 & sqs4 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 3) + self.assertEqual(sqs.query.build_query(), u'(NOT (title:(moof)) AND ((foo) OR (baz)) AND (bar))') + + def test___or__(self): + sqs1 = self.sqs.filter(content='foo') + sqs2 = self.sqs.filter(content='bar') + sqs = sqs1 | sqs2 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 2) + self.assertEqual(sqs.query.build_query(), u'((foo) OR (bar))') + + # Now for something more complex... + sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) + sqs4 = self.sqs.filter(content='bar').models(MockModel) + sqs = sqs3 | sqs4 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 2) + self.assertEqual(sqs.query.build_query(), u'((NOT (title:(moof)) AND ((foo) OR (baz))) OR (bar))') + + def test_auto_query(self): + # Ensure bits in exact matches get escaped properly as well. + # This will break horrifically if escaping isn't working. + sqs = self.sqs.auto_query('"pants:rule"') + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(repr(sqs.query.query_filter), '') + self.assertEqual(sqs.query.build_query(), u'("pants\\:rule")') + self.assertEqual(len(sqs), 0) + + # Regressions + + def test_regression_proper_start_offsets(self): + sqs = self.sqs.filter(text='index') + self.assertNotEqual(sqs.count(), 0) + + id_counts = {} + + for item in sqs: + if item.id in id_counts: + id_counts[item.id] += 1 + else: + id_counts[item.id] = 1 + + for key, value in id_counts.items(): + if value > 1: + self.fail("Result with id '%s' seen more than once in the results." % key) + + def test_regression_raw_search_breaks_slicing(self): + sqs = self.sqs.raw_search('text:index') + page_1 = [result.pk for result in sqs[0:10]] + page_2 = [result.pk for result in sqs[10:20]] + + for pk in page_2: + if pk in page_1: + self.fail("Result with id '%s' seen more than once in the results." % pk) + + # RelatedSearchQuerySet Tests + + def test_related_load_all(self): + sqs = self.rsqs.order_by('pub_date').load_all() + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertTrue(len(sqs) > 0) + self.assertEqual(sqs[2].object.foo, + u'In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.') + + def test_related_load_all_queryset(self): + sqs = self.rsqs.load_all().order_by('pub_date') + self.assertEqual(len(sqs._load_all_querysets), 0) + + sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=1)) + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs._load_all_querysets), 1) + self.assertEqual(sorted([obj.object.id for obj in sqs]), list(range(2, 24))) + + sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=10)) + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs._load_all_querysets), 1) + self.assertEqual(set([obj.object.id for obj in sqs]), {12, 17, 11, 16, 23, 15, 22, 14, 19, 21, 13, 18, 20}) + self.assertEqual(set([obj.object.id for obj in sqs[10:20]]), {21, 22, 23}) + + def test_related_iter(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + sqs = self.rsqs.all() + results = set([int(result.pk) for result in sqs]) + self.assertEqual(results, + {2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20}) + self.assertEqual(len(connections['elasticsearch'].queries), 3) + + def test_related_slice(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + results = self.rsqs.all().order_by('pub_date') + self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) + self.assertEqual(len(connections['elasticsearch'].queries), 1) + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + results = self.rsqs.all().order_by('pub_date') + self.assertEqual(int(results[21].pk), 22) + self.assertEqual(len(connections['elasticsearch'].queries), 1) + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + results = self.rsqs.all().order_by('pub_date') + self.assertEqual(set([int(result.pk) for result in results[20:30]]), {21, 22, 23}) + self.assertEqual(len(connections['elasticsearch'].queries), 1) + + def test_related_manual_iter(self): + results = self.rsqs.all() + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + results = sorted([int(result.pk) for result in results._manual_iter()]) + self.assertEqual(results, list(range(1, 24))) + self.assertEqual(len(connections['elasticsearch'].queries), 3) + + def test_related_fill_cache(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + results = self.rsqs.all() + self.assertEqual(len(results._result_cache), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) + results._fill_cache(0, 10) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) + self.assertEqual(len(connections['elasticsearch'].queries), 1) + results._fill_cache(10, 20) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) + self.assertEqual(len(connections['elasticsearch'].queries), 2) + + def test_related_cache_is_full(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch'].queries), 0) + self.assertEqual(self.rsqs._cache_is_full(), False) + results = self.rsqs.all() + fire_the_iterator_and_fill_cache = [result for result in results] + self.assertEqual(results._cache_is_full(), True) + self.assertEqual(len(connections['elasticsearch'].queries), 3) + + def test_quotes_regression(self): + sqs = self.sqs.auto_query(u"44°48'40''N 20°28'32''E") + # Should not have empty terms. + self.assertEqual(sqs.query.build_query(), u"(44\xb048'40''N 20\xb028'32''E)") + # Should not cause Elasticsearch to 500. + self.assertEqual(sqs.count(), 0) + + sqs = self.sqs.auto_query('blazing') + self.assertEqual(sqs.query.build_query(), u'(blazing)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('blazing saddles') + self.assertEqual(sqs.query.build_query(), u'(blazing saddles)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles') + self.assertEqual(sqs.query.build_query(), u'(\\"blazing saddles)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles"') + self.assertEqual(sqs.query.build_query(), u'("blazing saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing saddles"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'saddles"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'\'saddles"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \')') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \'\\")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles" mel') + self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles" mel brooks') + self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel brooks)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing saddles" brooks') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" brooks)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing saddles" "brooks') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" \\"brooks)') + self.assertEqual(sqs.count(), 0) + + def test_query_generation(self): + sqs = self.sqs.filter(SQ(content=AutoQuery("hello world")) | SQ(title=AutoQuery("hello world"))) + self.assertEqual(sqs.query.build_query(), u"((hello world) OR title:(hello world))") + + def test_result_class(self): + # Assert that we're defaulting to ``SearchResult``. + sqs = self.sqs.all() + self.assertTrue(isinstance(sqs[0], SearchResult)) + + # Custom class. + sqs = self.sqs.result_class(MockSearchResult).all() + self.assertTrue(isinstance(sqs[0], MockSearchResult)) + + # Reset to default. + sqs = self.sqs.result_class(None).all() + self.assertTrue(isinstance(sqs[0], SearchResult)) + + +@override_settings(DEBUG=True) +class LiveElasticsearch2SpellingTestCase(TestCase): + """Used to test actual implementation details of the SearchQuerySet.""" + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2SpellingTestCase, self).setUp() + + # Stow. + self.old_ui = connections['elasticsearch'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSpellingIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch') + + # Wipe it clean. + clear_elasticsearch_index() + + # Reboot the schema. + self.sb = connections['elasticsearch'].get_backend() + self.sb.setup() + + self.smmi.update(using='elasticsearch') + + def tearDown(self): + # Restore. + connections['elasticsearch']._index = self.old_ui + super(LiveElasticsearch2SpellingTestCase, self).tearDown() + + def test_spelling(self): + self.assertEqual(self.sqs.auto_query('structurd').spelling_suggestion(), 'structured') + self.assertEqual(self.sqs.spelling_suggestion('structurd'), 'structured') + self.assertEqual(self.sqs.auto_query('srchindex instanc').spelling_suggestion(), 'searchindex instance') + self.assertEqual(self.sqs.spelling_suggestion('srchindex instanc'), 'searchindex instance') + + +class LiveElasticsearch2MoreLikeThisTestCase(TestCase): + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2MoreLikeThisTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + self.old_ui = connections['elasticsearch'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockModelSearchIndex() + self.sammi = Elasticsearch2AnotherMockModelSearchIndex() + self.ui.build(indexes=[self.smmi, self.sammi]) + connections['elasticsearch']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch') + + self.smmi.update(using='elasticsearch') + self.sammi.update(using='elasticsearch') + + def tearDown(self): + # Restore. + connections['elasticsearch']._index = self.old_ui + super(LiveElasticsearch2MoreLikeThisTestCase, self).tearDown() + + def test_more_like_this(self): + mlt = self.sqs.more_like_this(MockModel.objects.get(pk=1)) + results = [result.pk for result in mlt] + self.assertEqual(mlt.count(), 11) + self.assertEqual(set(results), {u'10', u'5', u'2', u'21', u'4', u'6', u'23', u'9', u'14'}) + self.assertEqual(len(results), 10) + + alt_mlt = self.sqs.filter(name='daniel3').more_like_this(MockModel.objects.get(pk=2)) + results = [result.pk for result in alt_mlt] + self.assertEqual(alt_mlt.count(), 9) + self.assertEqual(set(results), {u'2', u'16', u'3', u'19', u'4', u'17', u'10', u'22', u'23'}) + self.assertEqual(len(results), 9) + + alt_mlt_with_models = self.sqs.models(MockModel).more_like_this(MockModel.objects.get(pk=1)) + results = [result.pk for result in alt_mlt_with_models] + self.assertEqual(alt_mlt_with_models.count(), 10) + self.assertEqual(set(results), {u'10', u'5', u'21', u'2', u'4', u'6', u'23', u'9', u'14', u'16'}) + self.assertEqual(len(results), 10) + + if hasattr(MockModel.objects, 'defer'): + # Make sure MLT works with deferred bits. + mi = MockModel.objects.defer('foo').get(pk=1) + self.assertEqual(mi._deferred, True) + deferred = self.sqs.models(MockModel).more_like_this(mi) + self.assertEqual(deferred.count(), 0) + self.assertEqual([result.pk for result in deferred], []) + self.assertEqual(len([result.pk for result in deferred]), 0) + + # Ensure that swapping the ``result_class`` works. + self.assertTrue( + isinstance(self.sqs.result_class(MockSearchResult).more_like_this(MockModel.objects.get(pk=1))[0], + MockSearchResult)) + + +class LiveElasticsearch2AutocompleteTestCase(TestCase): + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2AutocompleteTestCase, self).setUp() + + # Stow. + self.old_ui = connections['elasticsearch'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2AutocompleteMockModelSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch') + + # Wipe it clean. + clear_elasticsearch_index() + + # Reboot the schema. + self.sb = connections['elasticsearch'].get_backend() + self.sb.setup() + + self.smmi.update(using='elasticsearch') + + def tearDown(self): + # Restore. + connections['elasticsearch']._index = self.old_ui + super(LiveElasticsearch2AutocompleteTestCase, self).tearDown() + + def test_build_schema(self): + self.sb = connections['elasticsearch'].get_backend() + content_name, mapping = self.sb.build_schema(self.ui.all_searchfields()) + self.assertEqual(mapping, { + 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'name_auto': { + 'type': 'string', + 'analyzer': 'edgengram_analyzer', + }, + 'text': { + 'type': 'string', + 'analyzer': 'snowball', + }, + 'pub_date': { + 'type': 'date' + }, + 'name': { + 'type': 'string', + 'analyzer': 'snowball', + }, + 'text_auto': { + 'type': 'string', + 'analyzer': 'edgengram_analyzer', + } + }) + + def test_autocomplete(self): + autocomplete = self.sqs.autocomplete(text_auto='mod') + self.assertEqual(autocomplete.count(), 16) + self.assertEqual(set([result.pk for result in autocomplete]), + {'1', '12', '6', '14', '7', '4', '23', '17', '13', '18', '20', '22', '19', '15', '10', '2'}) + self.assertTrue('mod' in autocomplete[0].text.lower()) + self.assertTrue('mod' in autocomplete[1].text.lower()) + self.assertTrue('mod' in autocomplete[2].text.lower()) + self.assertTrue('mod' in autocomplete[3].text.lower()) + self.assertTrue('mod' in autocomplete[4].text.lower()) + self.assertEqual(len([result.pk for result in autocomplete]), 16) + + # Test multiple words. + autocomplete_2 = self.sqs.autocomplete(text_auto='your mod') + self.assertEqual(autocomplete_2.count(), 13) + self.assertEqual(set([result.pk for result in autocomplete_2]), + {'1', '6', '2', '14', '12', '13', '10', '19', '4', '20', '23', '22', '15'}) + map_results = {result.pk: result for result in autocomplete_2} + self.assertTrue('your' in map_results['1'].text.lower()) + self.assertTrue('mod' in map_results['1'].text.lower()) + self.assertTrue('your' in map_results['6'].text.lower()) + self.assertTrue('mod' in map_results['6'].text.lower()) + self.assertTrue('your' in map_results['2'].text.lower()) + self.assertEqual(len([result.pk for result in autocomplete_2]), 13) + + # Test multiple fields. + autocomplete_3 = self.sqs.autocomplete(text_auto='Django', name_auto='dan') + self.assertEqual(autocomplete_3.count(), 4) + self.assertEqual(set([result.pk for result in autocomplete_3]), {'12', '1', '22', '14'}) + self.assertEqual(len([result.pk for result in autocomplete_3]), 4) + + # Test numbers in phrases + autocomplete_4 = self.sqs.autocomplete(text_auto='Jen 867') + self.assertEqual(autocomplete_4.count(), 1) + self.assertEqual(set([result.pk for result in autocomplete_4]), {'20'}) + + # Test numbers alone + autocomplete_4 = self.sqs.autocomplete(text_auto='867') + self.assertEqual(autocomplete_4.count(), 1) + self.assertEqual(set([result.pk for result in autocomplete_4]), {'20'}) + + +class LiveElasticsearch2RoundTripTestCase(TestCase): + def setUp(self): + super(LiveElasticsearch2RoundTripTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch'].get_unified_index() + self.ui = UnifiedIndex() + self.srtsi = Elasticsearch2RoundTripSearchIndex() + self.ui.build(indexes=[self.srtsi]) + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() + + self.sqs = SearchQuerySet('elasticsearch') + + # Fake indexing. + mock = MockModel() + mock.id = 1 + self.sb.update(self.srtsi, [mock]) + + def tearDown(self): + # Restore. + connections['elasticsearch']._index = self.old_ui + super(LiveElasticsearch2RoundTripTestCase, self).tearDown() + + def test_round_trip(self): + results = self.sqs.filter(id='core.mockmodel.1') + + # Sanity check. + self.assertEqual(results.count(), 1) + + # Check the individual fields. + result = results[0] + self.assertEqual(result.id, 'core.mockmodel.1') + self.assertEqual(result.text, 'This is some example text.') + self.assertEqual(result.name, 'Mister Pants') + self.assertEqual(result.is_active, True) + self.assertEqual(result.post_count, 25) + self.assertEqual(result.average_rating, 3.6) + self.assertEqual(result.price, u'24.99') + self.assertEqual(result.pub_date, datetime.date(2009, 11, 21)) + self.assertEqual(result.created, datetime.datetime(2009, 11, 21, 21, 31, 00)) + self.assertEqual(result.tags, ['staff', 'outdoor', 'activist', 'scientist']) + self.assertEqual(result.sites, [3, 5, 1]) + + +@unittest.skipUnless(test_pickling, 'Skipping pickling tests') +class LiveElasticsearch2PickleTestCase(TestCase): + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2PickleTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockModelSearchIndex() + self.sammi = Elasticsearch2AnotherMockModelSearchIndex() + self.ui.build(indexes=[self.smmi, self.sammi]) + connections['elasticsearch']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch') + + self.smmi.update(using='elasticsearch') + self.sammi.update(using='elasticsearch') + + def tearDown(self): + # Restore. + connections['elasticsearch']._index = self.old_ui + super(LiveElasticsearch2PickleTestCase, self).tearDown() + + def test_pickling(self): + results = self.sqs.all() + + for res in results: + # Make sure the cache is full. + pass + + in_a_pickle = pickle.dumps(results) + like_a_cuke = pickle.loads(in_a_pickle) + self.assertEqual(len(like_a_cuke), len(results)) + self.assertEqual(like_a_cuke[0].id, results[0].id) + + +class Elasticsearch2BoostBackendTestCase(TestCase): + def setUp(self): + super(Elasticsearch2BoostBackendTestCase, self).setUp() + + # Wipe it clean. + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2BoostMockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() + + self.sample_objs = [] + + for i in range(1, 5): + mock = AFourthMockModel() + mock.id = i + + if i % 2: + mock.author = 'daniel' + mock.editor = 'david' + else: + mock.author = 'david' + mock.editor = 'daniel' + + mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) + self.sample_objs.append(mock) + + def tearDown(self): + connections['elasticsearch']._index = self.old_ui + super(Elasticsearch2BoostBackendTestCase, self).tearDown() + + def raw_search(self, query): + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) + + def test_boost(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 4) + + results = SearchQuerySet(using='elasticsearch').filter(SQ(author='daniel') | SQ(editor='daniel')) + + self.assertEqual(set([result.id for result in results]), + {'core.afourthmockmodel.4', 'core.afourthmockmodel.3', 'core.afourthmockmodel.1', + 'core.afourthmockmodel.2'}) + + def test__to_python(self): + self.assertEqual(self.sb._to_python('abc'), 'abc') + self.assertEqual(self.sb._to_python('1'), 1) + self.assertEqual(self.sb._to_python('2653'), 2653) + self.assertEqual(self.sb._to_python('25.5'), 25.5) + self.assertEqual(self.sb._to_python('[1, 2, 3]'), [1, 2, 3]) + self.assertEqual(self.sb._to_python('{"a": 1, "b": 2, "c": 3}'), {'a': 1, 'c': 3, 'b': 2}) + self.assertEqual(self.sb._to_python('2009-05-09T16:14:00'), datetime.datetime(2009, 5, 9, 16, 14)) + self.assertEqual(self.sb._to_python('2009-05-09T00:00:00'), datetime.datetime(2009, 5, 9, 0, 0)) + self.assertEqual(self.sb._to_python(None), None) + + +class RecreateIndexTestCase(TestCase): + def setUp(self): + self.raw_es = elasticsearch.Elasticsearch( + settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) + + def test_recreate_index(self): + clear_elasticsearch_index() + + sb = connections['elasticsearch'].get_backend() + sb.silently_fail = True + sb.setup() + + original_mapping = self.raw_es.indices.get_mapping(index=sb.index_name) + + sb.clear() + sb.setup() + + try: + updated_mapping = self.raw_es.indices.get_mapping(sb.index_name) + except elasticsearch.NotFoundError: + self.fail("There is no mapping after recreating the index") + + self.assertEqual(original_mapping, updated_mapping, + "Mapping after recreating the index differs from the original one") + + +class Elasticsearch2FacetingTestCase(TestCase): + def setUp(self): + super(Elasticsearch2FacetingTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2FacetingMockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() + + # Force the backend to rebuild the mapping each time. + self.sb.existing_mapping = {} + self.sb.setup() + + self.sample_objs = [] + + for i in range(1, 10): + mock = AFourthMockModel() + mock.id = i + if i > 5: + mock.editor = 'George Taylor' + else: + mock.editor = 'Perry White' + if i % 2: + mock.author = 'Daniel Lindsley' + else: + mock.author = 'Dan Watson' + mock.pub_date = datetime.date(2013, 9, (i % 4) + 1) + self.sample_objs.append(mock) + + def tearDown(self): + connections['elasticsearch']._index = self.old_ui + super(Elasticsearch2FacetingTestCase, self).tearDown() + + def test_facet(self): + self.sb.update(self.smmi, self.sample_objs) + counts = SearchQuerySet('elasticsearch').facet('author').facet('editor').facet_counts() + self.assertEqual(counts['fields']['author'], [ + ('Daniel Lindsley', 5), + ('Dan Watson', 4), + ]) + self.assertEqual(counts['fields']['editor'], [ + ('Perry White', 5), + ('George Taylor', 4), + ]) + counts = SearchQuerySet('elasticsearch').filter(content='white').facet('facet_field', + order='reverse_count').facet_counts() + self.assertEqual(counts['fields']['facet_field'], [ + ('Dan Watson', 2), + ('Daniel Lindsley', 3), + ]) + + def test_multiple_narrow(self): + self.sb.update(self.smmi, self.sample_objs) + counts = SearchQuerySet('elasticsearch').narrow('editor_exact:"Perry White"').narrow( + 'author_exact:"Daniel Lindsley"').facet('author').facet_counts() + self.assertEqual(counts['fields']['author'], [ + ('Daniel Lindsley', 3), + ]) + + def test_narrow(self): + self.sb.update(self.smmi, self.sample_objs) + counts = SearchQuerySet('elasticsearch').facet('author').facet('editor').narrow( + 'editor_exact:"Perry White"').facet_counts() + self.assertEqual(counts['fields']['author'], [ + ('Daniel Lindsley', 3), + ('Dan Watson', 2), + ]) + self.assertEqual(counts['fields']['editor'], [ + ('Perry White', 5), + ]) + + def test_date_facet(self): + self.sb.update(self.smmi, self.sample_objs) + start = datetime.date(2013, 9, 1) + end = datetime.date(2013, 9, 30) + # Facet by day + counts = SearchQuerySet('elasticsearch').date_facet('pub_date', start_date=start, end_date=end, + gap_by='day').facet_counts() + self.assertEqual(counts['dates']['pub_date'], [ + (datetime.datetime(2013, 9, 1), 2), + (datetime.datetime(2013, 9, 2), 3), + (datetime.datetime(2013, 9, 3), 2), + (datetime.datetime(2013, 9, 4), 2), + ]) + # By month + counts = SearchQuerySet('elasticsearch').date_facet('pub_date', start_date=start, end_date=end, + gap_by='month').facet_counts() + self.assertEqual(counts['dates']['pub_date'], [ + (datetime.datetime(2013, 9, 1), 9), + ]) diff --git a/test_haystack/elasticsearch2_tests/test_inputs.py b/test_haystack/elasticsearch2_tests/test_inputs.py new file mode 100644 index 000000000..adc87d16d --- /dev/null +++ b/test_haystack/elasticsearch2_tests/test_inputs.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import, division, print_function, unicode_literals + +from django.test import TestCase + +from haystack import connections, inputs + + +class Elasticsearch2InputTestCase(TestCase): + def setUp(self): + super(Elasticsearch2InputTestCase, self).setUp() + self.query_obj = connections['elasticsearch'].get_query() + + def test_raw_init(self): + raw = inputs.Raw('hello OR there, :you') + self.assertEqual(raw.query_string, 'hello OR there, :you') + self.assertEqual(raw.kwargs, {}) + self.assertEqual(raw.post_process, False) + + raw = inputs.Raw('hello OR there, :you', test='really') + self.assertEqual(raw.query_string, 'hello OR there, :you') + self.assertEqual(raw.kwargs, {'test': 'really'}) + self.assertEqual(raw.post_process, False) + + def test_raw_prepare(self): + raw = inputs.Raw('hello OR there, :you') + self.assertEqual(raw.prepare(self.query_obj), 'hello OR there, :you') + + def test_clean_init(self): + clean = inputs.Clean('hello OR there, :you') + self.assertEqual(clean.query_string, 'hello OR there, :you') + self.assertEqual(clean.post_process, True) + + def test_clean_prepare(self): + clean = inputs.Clean('hello OR there, :you') + self.assertEqual(clean.prepare(self.query_obj), 'hello or there, \\:you') + + def test_exact_init(self): + exact = inputs.Exact('hello OR there, :you') + self.assertEqual(exact.query_string, 'hello OR there, :you') + self.assertEqual(exact.post_process, True) + + def test_exact_prepare(self): + exact = inputs.Exact('hello OR there, :you') + self.assertEqual(exact.prepare(self.query_obj), u'"hello OR there, :you"') + + exact = inputs.Exact('hello OR there, :you', clean=True) + self.assertEqual(exact.prepare(self.query_obj), u'"hello or there, \\:you"') + + def test_not_init(self): + not_it = inputs.Not('hello OR there, :you') + self.assertEqual(not_it.query_string, 'hello OR there, :you') + self.assertEqual(not_it.post_process, True) + + def test_not_prepare(self): + not_it = inputs.Not('hello OR there, :you') + self.assertEqual(not_it.prepare(self.query_obj), u'NOT (hello or there, \\:you)') + + def test_autoquery_init(self): + autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') + self.assertEqual(autoquery.query_string, 'panic -don\'t "froody dude"') + self.assertEqual(autoquery.post_process, False) + + def test_autoquery_prepare(self): + autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') + self.assertEqual(autoquery.prepare(self.query_obj), u'panic NOT don\'t "froody dude"') + + def test_altparser_init(self): + altparser = inputs.AltParser('dismax') + self.assertEqual(altparser.parser_name, 'dismax') + self.assertEqual(altparser.query_string, '') + self.assertEqual(altparser.kwargs, {}) + self.assertEqual(altparser.post_process, False) + + altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) + self.assertEqual(altparser.parser_name, 'dismax') + self.assertEqual(altparser.query_string, 'douglas adams') + self.assertEqual(altparser.kwargs, {'mm': 1, 'qf': 'author'}) + self.assertEqual(altparser.post_process, False) + + def test_altparser_prepare(self): + altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) + self.assertEqual(altparser.prepare(self.query_obj), + u"""{!dismax mm=1 qf=author v='douglas adams'}""") diff --git a/test_haystack/elasticsearch2_tests/test_query.py b/test_haystack/elasticsearch2_tests/test_query.py new file mode 100644 index 000000000..c66191c59 --- /dev/null +++ b/test_haystack/elasticsearch2_tests/test_query.py @@ -0,0 +1,209 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals + +import datetime + +import elasticsearch +from django.test import TestCase + +from haystack import connections +from haystack.inputs import Exact +from haystack.models import SearchResult +from haystack.query import SearchQuerySet, SQ +from haystack.utils.geo import D, Point +from ..core.models import AnotherMockModel, MockModel + + +class Elasticsearch2SearchQueryTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchQueryTestCase, self).setUp() + self.sq = connections['elasticsearch'].get_query() + + def test_build_query_all(self): + self.assertEqual(self.sq.build_query(), '*:*') + + def test_build_query_single_word(self): + self.sq.add_filter(SQ(content='hello')) + self.assertEqual(self.sq.build_query(), '(hello)') + + def test_build_query_boolean(self): + self.sq.add_filter(SQ(content=True)) + self.assertEqual(self.sq.build_query(), '(True)') + + def test_regression_slash_search(self): + self.sq.add_filter(SQ(content='hello/')) + self.assertEqual(self.sq.build_query(), '(hello\\/)') + + def test_build_query_datetime(self): + self.sq.add_filter(SQ(content=datetime.datetime(2009, 5, 8, 11, 28))) + self.assertEqual(self.sq.build_query(), '(2009-05-08T11:28:00)') + + def test_build_query_multiple_words_and(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_filter(SQ(content='world')) + self.assertEqual(self.sq.build_query(), '((hello) AND (world))') + + def test_build_query_multiple_words_not(self): + self.sq.add_filter(~SQ(content='hello')) + self.sq.add_filter(~SQ(content='world')) + self.assertEqual(self.sq.build_query(), '(NOT ((hello)) AND NOT ((world)))') + + def test_build_query_multiple_words_or(self): + self.sq.add_filter(~SQ(content='hello')) + self.sq.add_filter(SQ(content='hello'), use_or=True) + self.assertEqual(self.sq.build_query(), '(NOT ((hello)) OR (hello))') + + def test_build_query_multiple_words_mixed(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(content='hello'), use_or=True) + self.sq.add_filter(~SQ(content='world')) + self.assertEqual(self.sq.build_query(), u'(((why) OR (hello)) AND NOT ((world)))') + + def test_build_query_phrase(self): + self.sq.add_filter(SQ(content='hello world')) + self.assertEqual(self.sq.build_query(), '(hello AND world)') + + self.sq.add_filter(SQ(content__exact='hello world')) + self.assertEqual(self.sq.build_query(), u'((hello AND world) AND ("hello world"))') + + def test_build_query_boost(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_boost('world', 5) + self.assertEqual(self.sq.build_query(), "(hello) world^5") + + def test_build_query_multiple_filter_types(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(pub_date__lte=Exact('2009-02-10 01:59:00'))) + self.sq.add_filter(SQ(author__gt='daniel')) + self.sq.add_filter(SQ(created__lt=Exact('2009-02-12 12:13:00'))) + self.sq.add_filter(SQ(title__gte='B')) + self.sq.add_filter(SQ(id__in=[1, 2, 3])) + self.sq.add_filter(SQ(rating__range=[3, 5])) + self.assertEqual(self.sq.build_query(), + u'((why) AND pub_date:([* TO "2009-02-10 01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12 12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') + + def test_build_query_multiple_filter_types_with_datetimes(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0))) + self.sq.add_filter(SQ(author__gt='daniel')) + self.sq.add_filter(SQ(created__lt=datetime.datetime(2009, 2, 12, 12, 13, 0))) + self.sq.add_filter(SQ(title__gte='B')) + self.sq.add_filter(SQ(id__in=[1, 2, 3])) + self.sq.add_filter(SQ(rating__range=[3, 5])) + self.assertEqual(self.sq.build_query(), + u'((why) AND pub_date:([* TO "2009-02-10T01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12T12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') + + def test_build_query_in_filter_multiple_words(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article"])) + self.assertEqual(self.sq.build_query(), u'((why) AND title:("A Famous Paper" OR "An Infamous Article"))') + + def test_build_query_in_filter_datetime(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(pub_date__in=[datetime.datetime(2009, 7, 6, 1, 56, 21)])) + self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:("2009-07-06T01:56:21"))') + + def test_build_query_in_with_set(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__in={"A Famous Paper", "An Infamous Article"})) + self.assertTrue('((why) AND title:(' in self.sq.build_query()) + self.assertTrue('"A Famous Paper"' in self.sq.build_query()) + self.assertTrue('"An Infamous Article"' in self.sq.build_query()) + + def test_build_query_wildcard_filter_types(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__startswith='haystack')) + self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack*))') + + def test_build_query_fuzzy_filter_types(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__fuzzy='haystack')) + self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack~))') + + def test_clean(self): + self.assertEqual(self.sq.clean('hello world'), 'hello world') + self.assertEqual(self.sq.clean('hello AND world'), 'hello and world') + self.assertEqual(self.sq.clean('hello AND OR NOT TO + - && || ! ( ) { } [ ] ^ " ~ * ? : \ / world'), + 'hello and or not to \\+ \\- \\&& \\|| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ \\/ world') + self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), + 'so please NOTe i am in a bAND and bORed') + + def test_build_query_with_models(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_model(MockModel) + self.assertEqual(self.sq.build_query(), '(hello)') + + self.sq.add_model(AnotherMockModel) + self.assertEqual(self.sq.build_query(), u'(hello)') + + def test_set_result_class(self): + # Assert that we're defaulting to ``SearchResult``. + self.assertTrue(issubclass(self.sq.result_class, SearchResult)) + + # Custom class. + class IttyBittyResult(object): + pass + + self.sq.set_result_class(IttyBittyResult) + self.assertTrue(issubclass(self.sq.result_class, IttyBittyResult)) + + # Reset to default. + self.sq.set_result_class(None) + self.assertTrue(issubclass(self.sq.result_class, SearchResult)) + + def test_in_filter_values_list(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__in=[1, 2, 3])) + self.assertEqual(self.sq.build_query(), u'((why) AND title:("1" OR "2" OR "3"))') + + def test_narrow_sq(self): + sqs = SearchQuerySet(using='elasticsearch').narrow(SQ(foo='moof')) + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.narrow_queries), 1) + self.assertEqual(sqs.query.narrow_queries.pop(), 'foo:(moof)') + + +class Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase, self).setUp() + self.backend = connections['elasticsearch'].get_backend() + self._elasticsearch_version = elasticsearch.VERSION + elasticsearch.VERSION = (0, 9, 9) + + def tearDown(self): + elasticsearch.VERSION = self._elasticsearch_version + + def test_build_query_with_dwithin_range(self): + """ + Test build_search_kwargs with dwithin range for Elasticsearch versions < 1.0.0 + """ + search_kwargs = self.backend.build_search_kwargs('where', dwithin={ + 'field': "location_field", + 'point': Point(1.2345678, 2.3456789), + 'distance': D(m=500) + }) + self.assertEqual(search_kwargs['query']['filtered']['filter']['bool']['must'][1]['geo_distance'], + {'distance': 0.5, 'location_field': {'lat': 2.3456789, 'lon': 1.2345678}}) + + +class Elasticsearch2SearchQuerySpatialAfterReleaseTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchQuerySpatialAfterReleaseTestCase, self).setUp() + self.backend = connections['elasticsearch'].get_backend() + self._elasticsearch_version = elasticsearch.VERSION + elasticsearch.VERSION = (1, 0, 0) + + def tearDown(self): + elasticsearch.VERSION = self._elasticsearch_version + + def test_build_query_with_dwithin_range(self): + """ + Test build_search_kwargs with dwithin range for Elasticsearch versions >= 1.0.0 + """ + search_kwargs = self.backend.build_search_kwargs('where', dwithin={ + 'field': "location_field", + 'point': Point(1.2345678, 2.3456789), + 'distance': D(m=500) + }) + self.assertEqual(search_kwargs['query']['filtered']['filter']['bool']['must'][1]['geo_distance'], + {'distance': "0.500000km", 'location_field': {'lat': 2.3456789, 'lon': 1.2345678}}) diff --git a/test_haystack/elasticsearch_tests/__init__.py b/test_haystack/elasticsearch_tests/__init__.py index 4066af099..1736e1590 100644 --- a/test_haystack/elasticsearch_tests/__init__.py +++ b/test_haystack/elasticsearch_tests/__init__.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# -*- coding: utf-8 -*- import unittest import warnings @@ -8,8 +8,12 @@ warnings.simplefilter('ignore', Warning) + def setup(): try: + import elasticsearch + if not ((1, 0, 0) <= elasticsearch.__version__ < (2, 0, 0)): + raise ImportError from elasticsearch import Elasticsearch, ElasticsearchException except ImportError: raise unittest.SkipTest("elasticsearch-py not installed.") @@ -18,5 +22,5 @@ def setup(): try: es.info() except ElasticsearchException as e: - raise unittest.SkipTest("elasticsearch not running on %r" % settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], e) - + raise unittest.SkipTest( + "elasticsearch not running on %r" % settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], e) diff --git a/test_haystack/settings.py b/test_haystack/settings.py index d676c0de0..14cfb7517 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -93,3 +93,11 @@ 'INCLUDE_SPELLING': True, }, } + +if os.getenv('VERSION_ES') == ">=2.0.0,<3.0.0": + HAYSTACK_CONNECTIONS['elasticsearch'] = { + 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', + 'URL': '127.0.0.1:9200/', + 'INDEX_NAME': 'test_default', + 'INCLUDE_SPELLING': True, + } diff --git a/tox.ini b/tox.ini index b63dc9c0f..1c71c9bbc 100644 --- a/tox.ini +++ b/tox.ini @@ -1,13 +1,21 @@ [tox] envlist = docs, - py27-django1.8, - py27-django1.9, - py34-django1.8, - py34-django1.9, - py35-django1.8, - py35-django1.9, - pypy-django1.8, - pypy-django1.9, + py27-django1.8-es1.x, + py27-django1.9-es1.x, + py34-django1.8-es1.x, + py34-django1.9-es1.x, + py35-django1.8-es1.x, + py35-django1.9-es1.x, + pypy-django1.8-es1.x, + pypy-django1.9-es1.x, + py27-django1.8-es2.x, + py27-django1.9-es2.x, + py34-django1.8-es2.x, + py34-django1.9-es2.x, + py35-django1.8-es2.x, + py35-django1.9-es2.x, + pypy-django1.8-es2.x, + pypy-django1.9-es2.x, [base] deps = requests @@ -20,54 +28,140 @@ deps = deps = Django>=1.8,<1.9 +[es2.x] +deps = + elasticsearch>=2.0.0,<3.0.0 + +[es1.x] +deps = + elasticsearch>=1.0.0,<2.0.0 + [testenv] commands = python test_haystack/solr_tests/server/wait-for-solr python {toxinidir}/setup.py test -[testenv:pypy-django1.8] +[testenv:pypy-django1.8-es1.x] +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:pypy-django1.9-es1.x] +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:py27-django1.8-es1.x] +basepython = python2.7 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:py27-django1.9-es1.x] +basepython = python2.7 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:py34-django1.8-es1.x] +basepython = python3.4 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:py34-django1.9-es1.x] +basepython = python3.4 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:py35-django1.8-es1.x] +basepython = python3.5 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:py35-django1.9-es1.x] +basepython = python3.5 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:pypy-django1.8-es2.x] +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:pypy-django1.9] +[testenv:pypy-django1.9-es2.x] +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} -[testenv:py27-django1.8] +[testenv:py27-django1.8-es2.x] basepython = python2.7 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:py27-django1.9] +[testenv:py27-django1.9-es2.x] basepython = python2.7 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} -[testenv:py34-django1.8] +[testenv:py34-django1.8-es2.x] basepython = python3.4 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:py34-django1.9] +[testenv:py34-django1.9-es2.x] basepython = python3.4 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} -[testenv:py35-django1.8] +[testenv:py35-django1.8-es2.x] basepython = python3.5 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:py35-django1.9] +[testenv:py35-django1.9-es2.x] basepython = python3.5 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps}