From c69344c51f5210f5e115d479a3427245faa9c01b Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 16:18:19 +0100 Subject: [PATCH 01/51] Elasticsearch 2.x support --- haystack/backends/elasticsearch2_backend.py | 1045 ++++++++++++ setup.py | 2 +- .../elasticsearch2_tests/__init__.py | 22 + .../elasticsearch2_tests/test_backend.py | 1500 +++++++++++++++++ .../elasticsearch2_tests/test_inputs.py | 85 + .../elasticsearch2_tests/test_query.py | 209 +++ test_haystack/mocks.py | 3 +- test_haystack/settings.py | 6 + 8 files changed, 2870 insertions(+), 2 deletions(-) create mode 100644 haystack/backends/elasticsearch2_backend.py create mode 100644 test_haystack/elasticsearch2_tests/__init__.py create mode 100644 test_haystack/elasticsearch2_tests/test_backend.py create mode 100644 test_haystack/elasticsearch2_tests/test_inputs.py create mode 100644 test_haystack/elasticsearch2_tests/test_query.py diff --git a/haystack/backends/elasticsearch2_backend.py b/haystack/backends/elasticsearch2_backend.py new file mode 100644 index 000000000..4c92d4c03 --- /dev/null +++ b/haystack/backends/elasticsearch2_backend.py @@ -0,0 +1,1045 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals + +import datetime +import re +import warnings + +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured +from django.utils import six + +import haystack +from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query +from haystack.constants import DEFAULT_OPERATOR, DJANGO_CT, DJANGO_ID, FUZZY_MAX_EXPANSIONS, FUZZY_MIN_SIM, ID +from haystack.exceptions import MissingDependency, MoreLikeThisError, SkipDocument +from haystack.inputs import Clean, Exact, PythonData, Raw +from haystack.models import SearchResult +from haystack.utils import get_identifier, get_model_ct +from haystack.utils import log as logging +from haystack.utils.app_loading import haystack_get_model + +try: + import elasticsearch + if not ((2, 0, 0) <= elasticsearch.__version__ < (3, 0, 0)): + raise ImportError + from elasticsearch.helpers import bulk, scan + from elasticsearch.exceptions import NotFoundError +except ImportError: + raise MissingDependency("The 'elasticsearch2' backend requires the installation of 'elasticsearch>=2.0.0,<3.0.0'. Please refer to the documentation.") + + +DATETIME_REGEX = re.compile( + r'^(?P\d{4})-(?P\d{2})-(?P\d{2})T' + r'(?P\d{2}):(?P\d{2}):(?P\d{2})(\.\d+)?$') + + +class Elasticsearch2SearchBackend(BaseSearchBackend): + # Word reserved by Elasticsearch for special use. + RESERVED_WORDS = ( + 'AND', + 'NOT', + 'OR', + 'TO', + ) + + # Characters reserved by Elasticsearch for special use. + # The '\\' must come first, so as not to overwrite the other slash replacements. + RESERVED_CHARACTERS = ( + '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', + '[', ']', '^', '"', '~', '*', '?', ':', '/', + ) + + # Settings to add an n-gram & edge n-gram analyzer. + DEFAULT_SETTINGS = { + 'settings': { + "analysis": { + "analyzer": { + "ngram_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["haystack_ngram", "lowercase"] + }, + "edgengram_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["haystack_edgengram", "lowercase"] + } + }, + "tokenizer": { + "haystack_ngram_tokenizer": { + "type": "nGram", + "min_gram": 3, + "max_gram": 15, + }, + "haystack_edgengram_tokenizer": { + "type": "edgeNGram", + "min_gram": 2, + "max_gram": 15, + "side": "front" + } + }, + "filter": { + "haystack_ngram": { + "type": "nGram", + "min_gram": 3, + "max_gram": 15 + }, + "haystack_edgengram": { + "type": "edgeNGram", + "min_gram": 2, + "max_gram": 15 + } + } + } + } + } + + def __init__(self, connection_alias, **connection_options): + super(Elasticsearch2SearchBackend, self).__init__(connection_alias, **connection_options) + + if 'URL' not in connection_options: + raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias) + + if 'INDEX_NAME' not in connection_options: + raise ImproperlyConfigured("You must specify a 'INDEX_NAME' in your settings for connection '%s'." % connection_alias) + + self.conn = elasticsearch.Elasticsearch(connection_options['URL'], timeout=self.timeout, **connection_options.get('KWARGS', {})) + self.index_name = connection_options['INDEX_NAME'] + self.log = logging.getLogger('haystack') + self.setup_complete = False + self.existing_mapping = {} + self.content_field_name = None + + def setup(self): + """ + Defers loading until needed. + """ + # Get the existing mapping & cache it. We'll compare it + # during the ``update`` & if it doesn't match, we'll put the new + # mapping. + try: + self.existing_mapping = self.conn.indices.get_mapping(index=self.index_name) + except NotFoundError: + pass + except Exception: + if not self.silently_fail: + raise + + unified_index = haystack.connections[self.connection_alias].get_unified_index() + self.content_field_name, field_mapping = self.build_schema(unified_index.all_searchfields()) + current_mapping = { + 'modelresult': { + 'properties': field_mapping, + } + } + + if current_mapping != self.existing_mapping: + try: + # Make sure the index is there first. + self.conn.indices.create(index=self.index_name, body=self.DEFAULT_SETTINGS, ignore=400) + self.conn.indices.put_mapping(index=self.index_name, doc_type='modelresult', body=current_mapping) + self.existing_mapping = current_mapping + except Exception: + if not self.silently_fail: + raise + + self.setup_complete = True + + def update(self, index, iterable, commit=True): + """ + Updates the backend when given a SearchIndex and a collection of + documents. + + :param index: The SearchIndex to update. + :param iterable: The collection of documents. + :param commit: True to refresh the search index after the update. + """ + if not self.setup_complete: + try: + self.setup() + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to add documents to Elasticsearch: %s", e, exc_info=True) + return + + prepped_docs = [] + + for obj in iterable: + try: + prepped_data = index.full_prepare(obj) + final_data = {} + + # Convert the data to make sure it's happy. + for key, value in prepped_data.items(): + final_data[key] = self._from_python(value) + final_data['_id'] = final_data[ID] + + prepped_docs.append(final_data) + except SkipDocument: + self.log.debug(u"Indexing for object `%s` skipped", obj) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + # We'll log the object identifier but won't include the actual object + # to avoid the possibility of that generating encoding errors while + # processing the log message: + self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, + extra={"data": {"index": index, + "object": get_identifier(obj)}}) + + bulk(self.conn, prepped_docs, index=self.index_name, doc_type='modelresult') + + if commit: + self.conn.indices.refresh(index=self.index_name) + + def remove(self, obj_or_string, commit=True): + """ + Removes a document/object from the backend. Can be either a model + instance or the identifier (i.e. ``app_name.model_name.id``) in the + event the object no longer exists. + + :param obj_or_string: The model instance or the identifier. + :param commit: True to refresh the search index after the remove. + """ + doc_id = get_identifier(obj_or_string) + + if not self.setup_complete: + try: + self.setup() + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, + exc_info=True) + return + + try: + self.conn.delete(index=self.index_name, doc_type='modelresult', id=doc_id, ignore=404) + + if commit: + self.conn.indices.refresh(index=self.index_name) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, exc_info=True) + + def clear(self, models=None, commit=True): + """ + Clears the backend of all documents/objects for a collection of models. + + :param models: List or tuple of models to clear. + :param commit: Not used. + """ + if models is not None: + assert isinstance(models, (list, tuple)) + + try: + if models is None: + self.conn.indices.delete(index=self.index_name, ignore=404) + self.setup_complete = False + self.existing_mapping = {} + self.content_field_name = None + else: + models_to_delete = [] + + for model in models: + models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model))) + + # Delete using scroll API + query = {'query': {'query_string': {'query': " OR ".join(models_to_delete)}}} + generator = scan(self.conn, query=query, index=self.index_name, doc_type='modelresult') + actions = ({ + '_op_type': 'delete', + '_id': doc['_id'], + } for doc in generator) + bulk(self.conn, actions=actions, index=self.index_name, doc_type='modelresult') + self.conn.indices.refresh(index=self.index_name) + + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + if models is not None: + self.log.error("Failed to clear Elasticsearch index of models '%s': %s", + ','.join(models_to_delete), e, exc_info=True) + else: + self.log.error("Failed to clear Elasticsearch index: %s", e, exc_info=True) + + def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, + fields='', highlight=False, facets=None, + date_facets=None, query_facets=None, + narrow_queries=None, spelling_query=None, + within=None, dwithin=None, distance_point=None, + models=None, limit_to_registered_models=None, + result_class=None): + index = haystack.connections[self.connection_alias].get_unified_index() + content_field = index.document_field + + if query_string == '*:*': + kwargs = { + 'query': { + "match_all": {} + }, + } + else: + kwargs = { + 'query': { + 'query_string': { + 'default_field': content_field, + 'default_operator': DEFAULT_OPERATOR, + 'query': query_string, + 'analyze_wildcard': True, + 'auto_generate_phrase_queries': True, + 'fuzzy_min_sim': FUZZY_MIN_SIM, + 'fuzzy_max_expansions': FUZZY_MAX_EXPANSIONS, + }, + }, + } + + # so far, no filters + filters = [] + + if fields: + if isinstance(fields, (list, set)): + fields = " ".join(fields) + + kwargs['fields'] = fields + + if sort_by is not None: + order_list = [] + for field, direction in sort_by: + if field == 'distance' and distance_point: + # Do the geo-enabled sort. + lng, lat = distance_point['point'].get_coords() + sort_kwargs = { + "_geo_distance": { + distance_point['field']: [lng, lat], + "order": direction, + "unit": "km" + } + } + else: + if field == 'distance': + warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") + + # Regular sorting. + sort_kwargs = {field: {'order': direction}} + + order_list.append(sort_kwargs) + + kwargs['sort'] = order_list + + if start_offset is not None: + kwargs['from'] = start_offset + + if end_offset is not None: + kwargs['size'] = end_offset - start_offset + + if highlight is True: + kwargs['highlight'] = { + 'fields': { + content_field: {'store': 'yes'}, + } + } + + if self.include_spelling: + kwargs['suggest'] = { + 'suggest': { + 'text': spelling_query or query_string, + 'term': { + # Using content_field here will result in suggestions of stemmed words. + 'field': '_all', + }, + }, + } + + if narrow_queries is None: + narrow_queries = set() + + if facets is not None: + kwargs.setdefault('aggs', {}) + + for facet_fieldname, extra_options in facets.items(): + facet_options = { + 'meta': { + '_type': 'terms', + }, + 'terms': { + 'field': facet_fieldname, + } + } + if 'order' in extra_options: + facet_options['meta']['order'] = extra_options.pop('order') + # Special cases for options applied at the facet level (not the terms level). + if extra_options.pop('global_scope', False): + # Renamed "global_scope" since "global" is a python keyword. + facet_options['global'] = True + if 'facet_filter' in extra_options: + facet_options['facet_filter'] = extra_options.pop('facet_filter') + facet_options['terms'].update(extra_options) + kwargs['aggs'][facet_fieldname] = facet_options + + if date_facets is not None: + kwargs.setdefault('aggs', {}) + + for facet_fieldname, value in date_facets.items(): + # Need to detect on gap_by & only add amount if it's more than one. + interval = value.get('gap_by').lower() + + # Need to detect on amount (can't be applied on months or years). + if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'): + # Just the first character is valid for use. + interval = "%s%s" % (value['gap_amount'], interval[:1]) + + kwargs['aggs'][facet_fieldname] = { + 'meta': { + '_type': 'date_histogram', + }, + 'date_histogram': { + 'field': facet_fieldname, + 'interval': interval, + }, + 'aggs': { + facet_fieldname: { + 'date_range': { + 'field': facet_fieldname, + 'ranges': [ + { + 'from': self._from_python(value.get('start_date')), + 'to': self._from_python(value.get('end_date')), + } + ] + } + } + } + } + + if query_facets is not None: + kwargs.setdefault('aggs', {}) + + for facet_fieldname, value in query_facets: + kwargs['aggs'][facet_fieldname] = { + 'meta': { + '_type': 'query', + }, + 'filter': { + 'query_string': { + 'query': value, + } + }, + } + + if limit_to_registered_models is None: + limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + + if models and len(models): + model_choices = sorted(get_model_ct(model) for model in models) + elif limit_to_registered_models: + # Using narrow queries, limit the results to only models handled + # with the current routers. + model_choices = self.build_models_list() + else: + model_choices = [] + + if len(model_choices) > 0: + filters.append({"terms": {DJANGO_CT: model_choices}}) + + for q in narrow_queries: + filters.append({ + 'query_string': { + 'query': q + } + }) + + if within is not None: + from haystack.utils.geo import generate_bounding_box + + ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2']) + within_filter = { + "geo_bounding_box": { + within['field']: { + "top_left": { + "lat": north, + "lon": west + }, + "bottom_right": { + "lat": south, + "lon": east + } + } + }, + } + filters.append(within_filter) + + if dwithin is not None: + lng, lat = dwithin['point'].get_coords() + + # NB: the 1.0.0 release of elasticsearch introduce an + # incompatible change on the distance filter formating + if elasticsearch.VERSION >= (1, 0, 0): + distance = "%(dist).6f%(unit)s" % { + 'dist': dwithin['distance'].km, + 'unit': "km" + } + else: + distance = dwithin['distance'].km + + dwithin_filter = { + "geo_distance": { + "distance": distance, + dwithin['field']: { + "lat": lat, + "lon": lng + } + } + } + filters.append(dwithin_filter) + + # if we want to filter, change the query type to filteres + if filters: + kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}} + if len(filters) == 1: + kwargs['query']['filtered']["filter"] = filters[0] + else: + kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}} + + return kwargs + + @log_query + def search(self, query_string, **kwargs): + if len(query_string) == 0: + return { + 'results': [], + 'hits': 0, + } + + if not self.setup_complete: + self.setup() + + search_kwargs = self.build_search_kwargs(query_string, **kwargs) + search_kwargs['from'] = kwargs.get('start_offset', 0) + + order_fields = set() + for order in search_kwargs.get('sort', []): + for key in order.keys(): + order_fields.add(key) + + geo_sort = '_geo_distance' in order_fields + + end_offset = kwargs.get('end_offset') + start_offset = kwargs.get('start_offset', 0) + if end_offset is not None and end_offset > start_offset: + search_kwargs['size'] = end_offset - start_offset + + try: + raw_results = self.conn.search(body=search_kwargs, + index=self.index_name, + doc_type='modelresult', + _source=True) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e, exc_info=True) + raw_results = {} + + return self._process_results(raw_results, + highlight=kwargs.get('highlight'), + result_class=kwargs.get('result_class', SearchResult), + distance_point=kwargs.get('distance_point'), + geo_sort=geo_sort) + + def more_like_this(self, model_instance, additional_query_string=None, + start_offset=0, end_offset=None, models=None, + limit_to_registered_models=None, result_class=None, **kwargs): + from haystack import connections + + if not self.setup_complete: + self.setup() + + # Deferred models will have a different class ("RealClass_Deferred_fieldname") + # which won't be in our registry: + model_klass = model_instance._meta.concrete_model + + index = connections[self.connection_alias].get_unified_index().get_index(model_klass) + field_name = index.get_content_field() + params = {} + + if start_offset is not None: + params['from_'] = start_offset + + if end_offset is not None: + params['size'] = end_offset - start_offset + + doc_id = get_identifier(model_instance) + + try: + # More like this Query + # https://www.elastic.co/guide/en/elasticsearch/reference/2.2/query-dsl-mlt-query.html + mlt_query = { + 'query': { + 'more_like_this': { + 'fields': [field_name], + 'like': [{ + "_id": doc_id + }] + } + } + } + + narrow_queries = [] + + if additional_query_string and additional_query_string != '*:*': + additional_filter = { + "query": { + "query_string": { + "query": additional_query_string + } + } + } + narrow_queries.append(additional_filter) + + if limit_to_registered_models is None: + limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + + if models and len(models): + model_choices = sorted(get_model_ct(model) for model in models) + elif limit_to_registered_models: + # Using narrow queries, limit the results to only models handled + # with the current routers. + model_choices = self.build_models_list() + else: + model_choices = [] + + if len(model_choices) > 0: + model_filter = {"terms": {DJANGO_CT: model_choices}} + narrow_queries.append(model_filter) + + if len(narrow_queries) > 0: + mlt_query = { + "query": { + "filtered": { + 'query': mlt_query['query'], + 'filter': { + 'bool': { + 'must': list(narrow_queries) + } + } + } + } + } + + raw_results = self.conn.search( + body=mlt_query, + index=self.index_name, + doc_type='modelresult', + _source=True, **params) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to fetch More Like This from Elasticsearch for document '%s': %s", + doc_id, e, exc_info=True) + raw_results = {} + + return self._process_results(raw_results, result_class=result_class) + + def _process_results(self, raw_results, highlight=False, + result_class=None, distance_point=None, + geo_sort=False): + from haystack import connections + results = [] + hits = raw_results.get('hits', {}).get('total', 0) + facets = {} + spelling_suggestion = None + + if result_class is None: + result_class = SearchResult + + if self.include_spelling and 'suggest' in raw_results: + raw_suggest = raw_results['suggest'].get('suggest') + if raw_suggest: + spelling_suggestion = ' '.join([word['text'] if len(word['options']) == 0 else word['options'][0]['text'] for word in raw_suggest]) + + if 'aggregations' in raw_results: + facets = { + 'fields': {}, + 'dates': {}, + 'queries': {}, + } + + for facet_fieldname, facet_info in raw_results['aggregations'].items(): + facet_type = facet_info['meta']['_type'] + if facet_type == 'terms': + facets['fields'][facet_fieldname] = [(individual['key'], individual['doc_count']) for individual in facet_info['buckets']] + if 'order' in facet_info['meta']: + if facet_info['meta']['order'] == 'reverse_count': + srt = sorted(facets['fields'][facet_fieldname], key=lambda x: x[1]) + facets['fields'][facet_fieldname] = srt + elif facet_type == 'date_histogram': + # Elasticsearch provides UTC timestamps with an extra three + # decimals of precision, which datetime barfs on. + facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['key'] / 1000), individual['doc_count']) for individual in facet_info['buckets']] + elif facet_type == 'query': + facets['queries'][facet_fieldname] = facet_info['doc_count'] + + unified_index = connections[self.connection_alias].get_unified_index() + indexed_models = unified_index.get_indexed_models() + content_field = unified_index.document_field + + for raw_result in raw_results.get('hits', {}).get('hits', []): + source = raw_result['_source'] + app_label, model_name = source[DJANGO_CT].split('.') + additional_fields = {} + model = haystack_get_model(app_label, model_name) + + if model and model in indexed_models: + for key, value in source.items(): + index = unified_index.get_index(model) + string_key = str(key) + + if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): + additional_fields[string_key] = index.fields[string_key].convert(value) + else: + additional_fields[string_key] = self._to_python(value) + + del(additional_fields[DJANGO_CT]) + del(additional_fields[DJANGO_ID]) + + if 'highlight' in raw_result: + additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '') + + if distance_point: + additional_fields['_point_of_origin'] = distance_point + + if geo_sort and raw_result.get('sort'): + from haystack.utils.geo import Distance + additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0])) + else: + additional_fields['_distance'] = None + + result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields) + results.append(result) + else: + hits -= 1 + + return { + 'results': results, + 'hits': hits, + 'facets': facets, + 'spelling_suggestion': spelling_suggestion, + } + + def build_schema(self, fields): + content_field_name = '' + mapping = { + DJANGO_CT: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, + DJANGO_ID: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, + } + + for field_name, field_class in fields.items(): + field_mapping = FIELD_MAPPINGS.get(field_class.field_type, DEFAULT_FIELD_MAPPING).copy() + if field_class.boost != 1.0: + field_mapping['boost'] = field_class.boost + + if field_class.document is True: + content_field_name = field_class.index_fieldname + + # Do this last to override `text` fields. + if field_mapping['type'] == 'string': + if field_class.indexed is False or hasattr(field_class, 'facet_for'): + field_mapping['index'] = 'not_analyzed' + del field_mapping['analyzer'] + + mapping[field_class.index_fieldname] = field_mapping + + return content_field_name, mapping + + def _iso_datetime(self, value): + """ + If value appears to be something datetime-like, return it in ISO format. + + Otherwise, return None. + """ + if hasattr(value, 'strftime'): + if hasattr(value, 'hour'): + return value.isoformat() + else: + return '%sT00:00:00' % value.isoformat() + + def _from_python(self, value): + """Convert more Python data types to ES-understandable JSON.""" + iso = self._iso_datetime(value) + if iso: + return iso + elif isinstance(value, six.binary_type): + # TODO: Be stricter. + return six.text_type(value, errors='replace') + elif isinstance(value, set): + return list(value) + return value + + def _to_python(self, value): + """Convert values from ElasticSearch to native Python values.""" + if isinstance(value, (int, float, complex, list, tuple, bool)): + return value + + if isinstance(value, six.string_types): + possible_datetime = DATETIME_REGEX.search(value) + + if possible_datetime: + date_values = possible_datetime.groupdict() + + for dk, dv in date_values.items(): + date_values[dk] = int(dv) + + return datetime.datetime( + date_values['year'], date_values['month'], + date_values['day'], date_values['hour'], + date_values['minute'], date_values['second']) + + try: + # This is slightly gross but it's hard to tell otherwise what the + # string's original type might have been. Be careful who you trust. + converted_value = eval(value) + + # Try to handle most built-in types. + if isinstance( + converted_value, + (int, list, tuple, set, dict, float, complex)): + return converted_value + except Exception: + # If it fails (SyntaxError or its ilk) or we don't trust it, + # continue on. + pass + + return value + +# DRL_FIXME: Perhaps move to something where, if none of these +# match, call a custom method on the form that returns, per-backend, +# the right type of storage? +DEFAULT_FIELD_MAPPING = {'type': 'string', 'analyzer': 'snowball'} +FIELD_MAPPINGS = { + 'edge_ngram': {'type': 'string', 'analyzer': 'edgengram_analyzer'}, + 'ngram': {'type': 'string', 'analyzer': 'ngram_analyzer'}, + 'date': {'type': 'date'}, + 'datetime': {'type': 'date'}, + + 'location': {'type': 'geo_point'}, + 'boolean': {'type': 'boolean'}, + 'float': {'type': 'float'}, + 'long': {'type': 'long'}, + 'integer': {'type': 'long'}, +} + + +# Sucks that this is almost an exact copy of what's in the Solr backend, +# but we can't import due to dependencies. +class Elasticsearch2SearchQuery(BaseSearchQuery): + def matching_all_fragment(self): + return '*:*' + + def build_query_fragment(self, field, filter_type, value): + from haystack import connections + query_frag = '' + + if not hasattr(value, 'input_type_name'): + # Handle when we've got a ``ValuesListQuerySet``... + if hasattr(value, 'values_list'): + value = list(value) + + if isinstance(value, six.string_types): + # It's not an ``InputType``. Assume ``Clean``. + value = Clean(value) + else: + value = PythonData(value) + + # Prepare the query using the InputType. + prepared_value = value.prepare(self) + + if not isinstance(prepared_value, (set, list, tuple)): + # Then convert whatever we get back to what pysolr wants if needed. + prepared_value = self.backend._from_python(prepared_value) + + # 'content' is a special reserved word, much like 'pk' in + # Django's ORM layer. It indicates 'no special field'. + if field == 'content': + index_fieldname = '' + else: + index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field) + + filter_types = { + 'contains': u'%s', + 'startswith': u'%s*', + 'exact': u'%s', + 'gt': u'{%s TO *}', + 'gte': u'[%s TO *]', + 'lt': u'{* TO %s}', + 'lte': u'[* TO %s]', + 'fuzzy': u'%s~', + } + + if value.post_process is False: + query_frag = prepared_value + else: + if filter_type in ['contains', 'startswith', 'fuzzy']: + if value.input_type_name == 'exact': + query_frag = prepared_value + else: + # Iterate over terms & incorportate the converted form of each into the query. + terms = [] + + if isinstance(prepared_value, six.string_types): + for possible_value in prepared_value.split(' '): + terms.append(filter_types[filter_type] % self.backend._from_python(possible_value)) + else: + terms.append(filter_types[filter_type] % self.backend._from_python(prepared_value)) + + if len(terms) == 1: + query_frag = terms[0] + else: + query_frag = u"(%s)" % " AND ".join(terms) + elif filter_type == 'in': + in_options = [] + + for possible_value in prepared_value: + in_options.append(u'"%s"' % self.backend._from_python(possible_value)) + + query_frag = u"(%s)" % " OR ".join(in_options) + elif filter_type == 'range': + start = self.backend._from_python(prepared_value[0]) + end = self.backend._from_python(prepared_value[1]) + query_frag = u'["%s" TO "%s"]' % (start, end) + elif filter_type == 'exact': + if value.input_type_name == 'exact': + query_frag = prepared_value + else: + prepared_value = Exact(prepared_value).prepare(self) + query_frag = filter_types[filter_type] % prepared_value + else: + if value.input_type_name != 'exact': + prepared_value = Exact(prepared_value).prepare(self) + + query_frag = filter_types[filter_type] % prepared_value + + if len(query_frag) and not isinstance(value, Raw): + if not query_frag.startswith('(') and not query_frag.endswith(')'): + query_frag = "(%s)" % query_frag + + return u"%s%s" % (index_fieldname, query_frag) + + def build_alt_parser_query(self, parser_name, query_string='', **kwargs): + if query_string: + kwargs['v'] = query_string + + kwarg_bits = [] + + for key in sorted(kwargs.keys()): + if isinstance(kwargs[key], six.string_types) and ' ' in kwargs[key]: + kwarg_bits.append(u"%s='%s'" % (key, kwargs[key])) + else: + kwarg_bits.append(u"%s=%s" % (key, kwargs[key])) + + return u"{!%s %s}" % (parser_name, ' '.join(kwarg_bits)) + + def build_params(self, spelling_query=None, **kwargs): + search_kwargs = { + 'start_offset': self.start_offset, + 'result_class': self.result_class + } + order_by_list = None + + if self.order_by: + if order_by_list is None: + order_by_list = [] + + for field in self.order_by: + direction = 'asc' + if field.startswith('-'): + direction = 'desc' + field = field[1:] + order_by_list.append((field, direction)) + + search_kwargs['sort_by'] = order_by_list + + if self.date_facets: + search_kwargs['date_facets'] = self.date_facets + + if self.distance_point: + search_kwargs['distance_point'] = self.distance_point + + if self.dwithin: + search_kwargs['dwithin'] = self.dwithin + + if self.end_offset is not None: + search_kwargs['end_offset'] = self.end_offset + + if self.facets: + search_kwargs['facets'] = self.facets + + if self.fields: + search_kwargs['fields'] = self.fields + + if self.highlight: + search_kwargs['highlight'] = self.highlight + + if self.models: + search_kwargs['models'] = self.models + + if self.narrow_queries: + search_kwargs['narrow_queries'] = self.narrow_queries + + if self.query_facets: + search_kwargs['query_facets'] = self.query_facets + + if self.within: + search_kwargs['within'] = self.within + + if spelling_query: + search_kwargs['spelling_query'] = spelling_query + + return search_kwargs + + def run(self, spelling_query=None, **kwargs): + """Builds and executes the query. Returns a list of search results.""" + final_query = self.build_query() + search_kwargs = self.build_params(spelling_query, **kwargs) + + if kwargs: + search_kwargs.update(kwargs) + + results = self.backend.search(final_query, **search_kwargs) + self._results = results.get('results', []) + self._hit_count = results.get('hits', 0) + self._facet_counts = self.post_process_facets(results) + self._spelling_suggestion = results.get('spelling_suggestion', None) + + def run_mlt(self, **kwargs): + """Builds and executes the query. Returns a list of search results.""" + if self._more_like_this is False or self._mlt_instance is None: + raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.") + + additional_query_string = self.build_query() + search_kwargs = { + 'start_offset': self.start_offset, + 'result_class': self.result_class, + 'models': self.models + } + + if self.end_offset is not None: + search_kwargs['end_offset'] = self.end_offset - self.start_offset + + results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs) + self._results = results.get('results', []) + self._hit_count = results.get('hits', 0) + + +class Elasticsearch2SearchEngine(BaseEngine): + backend = Elasticsearch2SearchBackend + query = Elasticsearch2SearchQuery diff --git a/setup.py b/setup.py index c8b77e7c7..5c44d97d2 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ ] tests_require = [ - 'elasticsearch>=1.0.0,<2.0.0', + 'elasticsearch>=2.0.0,<3.0.0', 'pysolr>=3.3.2', 'whoosh==2.5.4', 'python-dateutil', diff --git a/test_haystack/elasticsearch2_tests/__init__.py b/test_haystack/elasticsearch2_tests/__init__.py new file mode 100644 index 000000000..8433081cd --- /dev/null +++ b/test_haystack/elasticsearch2_tests/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +import warnings + +from django.conf import settings + +from ..utils import unittest + +warnings.simplefilter('ignore', Warning) + + +def setup(): + try: + from elasticsearch import Elasticsearch, ElasticsearchException + except ImportError: + raise unittest.SkipTest("elasticsearch-py not installed.") + + url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] + es = Elasticsearch(url) + try: + es.info() + except ElasticsearchException as e: + raise unittest.SkipTest("elasticsearch not running on %r" % url, e) diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py new file mode 100644 index 000000000..0ba061eda --- /dev/null +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -0,0 +1,1500 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals + +import datetime +import logging as std_logging +import operator +from decimal import Decimal + +import elasticsearch +from django.conf import settings +from django.test import TestCase +from django.test.utils import override_settings + +from haystack import connections, indexes, reset_search_queries +from haystack.exceptions import SkipDocument +from haystack.inputs import AutoQuery +from haystack.models import SearchResult +from haystack.query import RelatedSearchQuerySet, SearchQuerySet, SQ +from haystack.utils import log as logging +from haystack.utils.geo import Point +from haystack.utils.loading import UnifiedIndex +from ..core.models import AFourthMockModel, AnotherMockModel, ASixthMockModel, MockModel +from ..mocks import MockSearchResult +from ..utils import unittest + +test_pickling = True + +try: + import cPickle as pickle +except ImportError: + try: + import pickle + except ImportError: + test_pickling = False + + +def clear_elasticsearch_index(): + # Wipe it clean. + raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + try: + raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + raw_es.indices.refresh() + except elasticsearch.TransportError: + pass + + # Since we've just completely deleted the index, we'll reset setup_complete so the next access will + # correctly define the mappings: + connections['elasticsearch2'].get_backend().setup_complete = False + + +class Elasticsearch2MockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + name = indexes.CharField(model_attr='author', faceted=True) + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return MockModel + + +class Elasticsearch2MockSearchIndexWithSkipDocument(Elasticsearch2MockSearchIndex): + def prepare_text(self, obj): + if obj.author == 'daniel3': + raise SkipDocument + return u"Indexed!\n%s" % obj.id + + +class Elasticsearch2MockSpellingIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True) + name = indexes.CharField(model_attr='author', faceted=True) + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return MockModel + + def prepare_text(self, obj): + return obj.foo + + +class Elasticsearch2MaintainTypeMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + month = indexes.CharField(indexed=False) + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def prepare_month(self, obj): + return "%02d" % obj.pub_date.month + + def get_model(self): + return MockModel + + +class Elasticsearch2MockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(model_attr='foo', document=True) + name = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return MockModel + + +class Elasticsearch2AnotherMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True) + name = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return AnotherMockModel + + def prepare_text(self, obj): + return u"You might be searching for the user %s" % obj.author + + +class Elasticsearch2BoostMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField( + document=True, use_template=True, + template_name='search/indexes/core/mockmodel_template.txt' + ) + author = indexes.CharField(model_attr='author', weight=2.0) + editor = indexes.CharField(model_attr='editor') + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return AFourthMockModel + + def prepare(self, obj): + data = super(Elasticsearch2BoostMockSearchIndex, self).prepare(obj) + + if obj.pk == 4: + data['boost'] = 5.0 + + return data + + +class Elasticsearch2FacetingMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True) + author = indexes.CharField(model_attr='author', faceted=True) + editor = indexes.CharField(model_attr='editor', faceted=True) + pub_date = indexes.DateField(model_attr='pub_date', faceted=True) + facet_field = indexes.FacetCharField(model_attr='author') + + def prepare_text(self, obj): + return '%s %s' % (obj.author, obj.editor) + + def get_model(self): + return AFourthMockModel + + +class Elasticsearch2RoundTripSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, default='') + name = indexes.CharField() + is_active = indexes.BooleanField() + post_count = indexes.IntegerField() + average_rating = indexes.FloatField() + price = indexes.DecimalField() + pub_date = indexes.DateField() + created = indexes.DateTimeField() + tags = indexes.MultiValueField() + sites = indexes.MultiValueField() + + def get_model(self): + return MockModel + + def prepare(self, obj): + prepped = super(Elasticsearch2RoundTripSearchIndex, self).prepare(obj) + prepped.update({ + 'text': 'This is some example text.', + 'name': 'Mister Pants', + 'is_active': True, + 'post_count': 25, + 'average_rating': 3.6, + 'price': Decimal('24.99'), + 'pub_date': datetime.date(2009, 11, 21), + 'created': datetime.datetime(2009, 11, 21, 21, 31, 00), + 'tags': ['staff', 'outdoor', 'activist', 'scientist'], + 'sites': [3, 5, 1], + }) + return prepped + + +class Elasticsearch2ComplexFacetsMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, default='') + name = indexes.CharField(faceted=True) + is_active = indexes.BooleanField(faceted=True) + post_count = indexes.IntegerField() + post_count_i = indexes.FacetIntegerField(facet_for='post_count') + average_rating = indexes.FloatField(faceted=True) + pub_date = indexes.DateField(faceted=True) + created = indexes.DateTimeField(faceted=True) + sites = indexes.MultiValueField(faceted=True) + + def get_model(self): + return MockModel + + +class Elasticsearch2AutocompleteMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(model_attr='foo', document=True) + name = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pub_date') + text_auto = indexes.EdgeNgramField(model_attr='foo') + name_auto = indexes.EdgeNgramField(model_attr='author') + + def get_model(self): + return MockModel + + +class Elasticsearch2SpatialSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(model_attr='name', document=True) + location = indexes.LocationField() + + def prepare_location(self, obj): + return "%s,%s" % (obj.lat, obj.lon) + + def get_model(self): + return ASixthMockModel + + +class TestSettings(TestCase): + def test_kwargs_are_passed_on(self): + from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend + backend = ElasticsearchSearchBackend('alias', **{ + 'URL': settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'], + 'INDEX_NAME': 'testing', + 'KWARGS': {'max_retries': 42} + }) + + self.assertEqual(backend.conn.transport.max_retries, 42) + + +class Elasticsearch2SearchBackendTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchBackendTestCase, self).setUp() + + # Wipe it clean. + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + self.smmidni = Elasticsearch2MockSearchIndexWithSkipDocument() + self.smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + + # Force the backend to rebuild the mapping each time. + self.sb.existing_mapping = {} + self.sb.setup() + + self.sample_objs = [] + + for i in range(1, 4): + mock = MockModel() + mock.id = i + mock.author = 'daniel%s' % i + mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) + self.sample_objs.append(mock) + + def tearDown(self): + connections['elasticsearch2']._index = self.old_ui + super(Elasticsearch2SearchBackendTestCase, self).tearDown() + self.sb.silently_fail = True + + def raw_search(self, query): + try: + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + except elasticsearch.TransportError: + return {} + + def test_non_silent(self): + bad_sb = connections['elasticsearch2'].backend('bad', URL='http://omg.wtf.bbq:1000/', INDEX_NAME='whatver', + SILENTLY_FAIL=False, TIMEOUT=1) + + try: + bad_sb.update(self.smmi, self.sample_objs) + self.fail() + except: + pass + + try: + bad_sb.remove('core.mockmodel.1') + self.fail() + except: + pass + + try: + bad_sb.clear() + self.fail() + except: + pass + + try: + bad_sb.search('foo') + self.fail() + except: + pass + + def test_update_no_documents(self): + url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] + index_name = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME'] + + sb = connections['elasticsearch2'].backend('elasticsearch2', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=True) + self.assertEqual(sb.update(self.smmi, []), None) + + sb = connections['elasticsearch2'].backend('elasticsearch2', URL=url, INDEX_NAME=index_name, + SILENTLY_FAIL=False) + try: + sb.update(self.smmi, []) + self.fail() + except: + pass + + def test_update(self): + self.sb.update(self.smmi, self.sample_objs) + + # Check what Elasticsearch thinks is there. + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + self.assertEqual( + sorted([res['_source'] for res in self.raw_search('*:*')['hits']['hits']], key=lambda x: x['id']), [ + { + 'django_id': '1', + 'django_ct': 'core.mockmodel', + 'name': 'daniel1', + 'name_exact': 'daniel1', + 'text': 'Indexed!\n1', + 'pub_date': '2009-02-24T00:00:00', + 'id': 'core.mockmodel.1' + }, + { + 'django_id': '2', + 'django_ct': 'core.mockmodel', + 'name': 'daniel2', + 'name_exact': 'daniel2', + 'text': 'Indexed!\n2', + 'pub_date': '2009-02-23T00:00:00', + 'id': 'core.mockmodel.2' + }, + { + 'django_id': '3', + 'django_ct': 'core.mockmodel', + 'name': 'daniel3', + 'name_exact': 'daniel3', + 'text': 'Indexed!\n3', + 'pub_date': '2009-02-22T00:00:00', + 'id': 'core.mockmodel.3' + } + ]) + + def test_update_with_SkipDocument_raised(self): + self.sb.update(self.smmidni, self.sample_objs) + + # Check what Elasticsearch thinks is there. + res = self.raw_search('*:*')['hits'] + self.assertEqual(res['total'], 2) + self.assertListEqual( + sorted([x['_source']['id'] for x in res['hits']]), + ['core.mockmodel.1', 'core.mockmodel.2'] + ) + + def test_remove(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + + self.sb.remove(self.sample_objs[0]) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 2) + self.assertEqual(sorted([res['_source'] for res in self.raw_search('*:*')['hits']['hits']], + key=operator.itemgetter('django_id')), [ + { + 'django_id': '2', + 'django_ct': 'core.mockmodel', + 'name': 'daniel2', + 'name_exact': 'daniel2', + 'text': 'Indexed!\n2', + 'pub_date': '2009-02-23T00:00:00', + 'id': 'core.mockmodel.2' + }, + { + 'django_id': '3', + 'django_ct': 'core.mockmodel', + 'name': 'daniel3', + 'name_exact': 'daniel3', + 'text': 'Indexed!\n3', + 'pub_date': '2009-02-22T00:00:00', + 'id': 'core.mockmodel.3' + } + ]) + + def test_remove_succeeds_on_404(self): + self.sb.silently_fail = False + self.sb.remove('core.mockmodel.421') + + def test_clear(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear() + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) + + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear([AnotherMockModel]) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear([MockModel]) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) + + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear([AnotherMockModel, MockModel]) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) + + def test_search(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + + self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('*:*')['hits'], 3) + self.assertEqual(set([result.pk for result in self.sb.search('*:*')['results']]), set([u'2', u'1', u'3'])) + + self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('Index', highlight=True)['hits'], 3) + self.assertEqual( + sorted([result.highlighted[0] for result in self.sb.search('Index', highlight=True)['results']]), + [u'Indexed!\n1', u'Indexed!\n2', u'Indexed!\n3']) + + self.assertEqual(self.sb.search('Indx')['hits'], 0) + self.assertEqual(self.sb.search('indaxed')['spelling_suggestion'], 'indexed') + self.assertEqual(self.sb.search('arf', spelling_query='indexyd')['spelling_suggestion'], 'indexed') + + self.assertEqual(self.sb.search('', facets={'name': {}}), {'hits': 0, 'results': []}) + results = self.sb.search('Index', facets={'name': {}}) + self.assertEqual(results['hits'], 3) + self.assertSetEqual( + set(results['facets']['fields']['name']), + set([('daniel3', 1), ('daniel2', 1), ('daniel1', 1)]) + ) + + self.assertEqual(self.sb.search('', date_facets={ + 'pub_date': {'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 4, 1), + 'gap_by': 'month', 'gap_amount': 1}}), {'hits': 0, 'results': []}) + results = self.sb.search('Index', date_facets={ + 'pub_date': {'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 4, 1), + 'gap_by': 'month', 'gap_amount': 1}}) + self.assertEqual(results['hits'], 3) + self.assertEqual(results['facets']['dates']['pub_date'], [(datetime.datetime(2009, 2, 1, 0, 0), 3)]) + + self.assertEqual(self.sb.search('', query_facets=[('name', '[* TO e]')]), {'hits': 0, 'results': []}) + results = self.sb.search('Index', query_facets=[('name', '[* TO e]')]) + self.assertEqual(results['hits'], 3) + self.assertEqual(results['facets']['queries'], {u'name': 3}) + + self.assertEqual(self.sb.search('', narrow_queries=set(['name:daniel1'])), {'hits': 0, 'results': []}) + results = self.sb.search('Index', narrow_queries=set(['name:daniel1'])) + self.assertEqual(results['hits'], 1) + + # Ensure that swapping the ``result_class`` works. + self.assertTrue( + isinstance(self.sb.search(u'index', result_class=MockSearchResult)['results'][0], MockSearchResult)) + + # Check the use of ``limit_to_registered_models``. + self.assertEqual(self.sb.search('', limit_to_registered_models=False), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('*:*', limit_to_registered_models=False)['hits'], 3) + self.assertEqual( + sorted([result.pk for result in self.sb.search('*:*', limit_to_registered_models=False)['results']]), + ['1', '2', '3']) + + # Stow. + old_limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = False + + self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('*:*')['hits'], 3) + self.assertEqual(sorted([result.pk for result in self.sb.search('*:*')['results']]), ['1', '2', '3']) + + # Restore. + settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = old_limit_to_registered_models + + def test_spatial_search_parameters(self): + p1 = Point(1.23, 4.56) + kwargs = self.sb.build_search_kwargs('*:*', distance_point={'field': 'location', 'point': p1}, + sort_by=(('distance', 'desc'),)) + + self.assertIn('sort', kwargs) + self.assertEqual(1, len(kwargs['sort'])) + geo_d = kwargs['sort'][0]['_geo_distance'] + + # ElasticSearch supports the GeoJSON-style lng, lat pairs so unlike Solr the values should be + # in the same order as we used to create the Point(): + # http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-geo-distance-filter.html#_lat_lon_as_array_4 + + self.assertDictEqual(geo_d, {'location': [1.23, 4.56], 'unit': 'km', 'order': 'desc'}) + + def test_more_like_this(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + + # A functional MLT example with enough data to work is below. Rely on + # this to ensure the API is correct enough. + self.assertEqual(self.sb.more_like_this(self.sample_objs[0])['hits'], 0) + self.assertEqual([result.pk for result in self.sb.more_like_this(self.sample_objs[0])['results']], []) + + def test_build_schema(self): + old_ui = connections['elasticsearch2'].get_unified_index() + + (content_field_name, mapping) = self.sb.build_schema(old_ui.all_searchfields()) + self.assertEqual(content_field_name, 'text') + self.assertEqual(len(mapping), 4 + 2) # +2 management fields + self.assertEqual(mapping, { + 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'text': {'type': 'string', 'analyzer': 'snowball'}, + 'pub_date': {'type': 'date'}, + 'name': {'type': 'string', 'analyzer': 'snowball'}, + 'name_exact': {'index': 'not_analyzed', 'type': 'string'} + }) + + ui = UnifiedIndex() + ui.build(indexes=[Elasticsearch2ComplexFacetsMockSearchIndex()]) + (content_field_name, mapping) = self.sb.build_schema(ui.all_searchfields()) + self.assertEqual(content_field_name, 'text') + self.assertEqual(len(mapping), 15 + 2) # +2 management fields + self.assertEqual(mapping, { + 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'name': {'type': 'string', 'analyzer': 'snowball'}, + 'is_active_exact': {'type': 'boolean'}, + 'created': {'type': 'date'}, + 'post_count': {'type': 'long'}, + 'created_exact': {'type': 'date'}, + 'sites_exact': {'index': 'not_analyzed', 'type': 'string'}, + 'is_active': {'type': 'boolean'}, + 'sites': {'type': 'string', 'analyzer': 'snowball'}, + 'post_count_i': {'type': 'long'}, + 'average_rating': {'type': 'float'}, + 'text': {'type': 'string', 'analyzer': 'snowball'}, + 'pub_date_exact': {'type': 'date'}, + 'name_exact': {'index': 'not_analyzed', 'type': 'string'}, + 'pub_date': {'type': 'date'}, + 'average_rating_exact': {'type': 'float'} + }) + + def test_verify_type(self): + old_ui = connections['elasticsearch2'].get_unified_index() + ui = UnifiedIndex() + smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() + ui.build(indexes=[smtmmi]) + connections['elasticsearch2']._index = ui + sb = connections['elasticsearch2'].get_backend() + sb.update(smtmmi, self.sample_objs) + + self.assertEqual(sb.search('*:*')['hits'], 3) + self.assertEqual([result.month for result in sb.search('*:*')['results']], [u'02', u'02', u'02']) + connections['elasticsearch2']._index = old_ui + + +class CaptureHandler(std_logging.Handler): + logs_seen = [] + + def emit(self, record): + CaptureHandler.logs_seen.append(record) + + +class FailedElasticsearch2SearchBackendTestCase(TestCase): + def setUp(self): + self.sample_objs = [] + + for i in range(1, 4): + mock = MockModel() + mock.id = i + mock.author = 'daniel%s' % i + mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) + self.sample_objs.append(mock) + + # Stow. + # Point the backend at a URL that doesn't exist so we can watch the + # sparks fly. + self.old_es_url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] + settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] = "%s/foo/" % self.old_es_url + self.cap = CaptureHandler() + logging.getLogger('haystack').addHandler(self.cap) + import haystack + logging.getLogger('haystack').removeHandler(haystack.stream) + + # Setup the rest of the bits. + self.old_ui = connections['elasticsearch2'].get_unified_index() + ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = ui + self.sb = connections['elasticsearch2'].get_backend() + + def tearDown(self): + import haystack + # Restore. + settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] = self.old_es_url + connections['elasticsearch2']._index = self.old_ui + logging.getLogger('haystack').removeHandler(self.cap) + logging.getLogger('haystack').addHandler(haystack.stream) + + @unittest.expectedFailure + def test_all_cases(self): + # Prior to the addition of the try/except bits, these would all fail miserably. + self.assertEqual(len(CaptureHandler.logs_seen), 0) + + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(len(CaptureHandler.logs_seen), 1) + + self.sb.remove(self.sample_objs[0]) + self.assertEqual(len(CaptureHandler.logs_seen), 2) + + self.sb.search('search') + self.assertEqual(len(CaptureHandler.logs_seen), 3) + + self.sb.more_like_this(self.sample_objs[0]) + self.assertEqual(len(CaptureHandler.logs_seen), 4) + + self.sb.clear([MockModel]) + self.assertEqual(len(CaptureHandler.logs_seen), 5) + + self.sb.clear() + self.assertEqual(len(CaptureHandler.logs_seen), 6) + + +class LiveElasticsearch2SearchQueryTestCase(TestCase): + fixtures = ['initial_data.json'] + + def setUp(self): + super(LiveElasticsearch2SearchQueryTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + self.sq = connections['elasticsearch2'].get_query() + + # Force indexing of the content. + self.smmi.update(using='elasticsearch2') + + def tearDown(self): + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2SearchQueryTestCase, self).tearDown() + + def test_log_query(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + + with self.settings(DEBUG=False): + len(self.sq.get_results()) + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + + with self.settings(DEBUG=True): + # Redefine it to clear out the cached results. + self.sq = connections['elasticsearch2'].query(using='elasticsearch2') + self.sq.add_filter(SQ(name='bar')) + len(self.sq.get_results()) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(connections['elasticsearch2'].queries[0]['query_string'], + 'name:(bar)') + + # And again, for good measure. + self.sq = connections['elasticsearch2'].query('elasticsearch2') + self.sq.add_filter(SQ(name='bar')) + self.sq.add_filter(SQ(text='moof')) + len(self.sq.get_results()) + self.assertEqual(len(connections['elasticsearch2'].queries), 2) + self.assertEqual(connections['elasticsearch2'].queries[0]['query_string'], + 'name:(bar)') + self.assertEqual(connections['elasticsearch2'].queries[1]['query_string'], + u'(name:(bar) AND text:(moof))') + + +lssqstc_all_loaded = None + + +@override_settings(DEBUG=True) +class LiveElasticsearch2SearchQuerySetTestCase(TestCase): + """Used to test actual implementation details of the SearchQuerySet.""" + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2SearchQuerySetTestCase, self).setUp() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + self.rsqs = RelatedSearchQuerySet('elasticsearch2') + + # Ugly but not constantly reindexing saves us almost 50% runtime. + global lssqstc_all_loaded + + if lssqstc_all_loaded is None: + lssqstc_all_loaded = True + + # Wipe it clean. + clear_elasticsearch_index() + + # Force indexing of the content. + self.smmi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2SearchQuerySetTestCase, self).tearDown() + + def test_load_all(self): + sqs = self.sqs.order_by('pub_date').load_all() + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertTrue(len(sqs) > 0) + self.assertEqual(sqs[2].object.foo, + u'In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.') + + def test_iter(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + sqs = self.sqs.all() + results = sorted([int(result.pk) for result in sqs]) + self.assertEqual(results, list(range(1, 24))) + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + def test_slice(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.sqs.all().order_by('pub_date') + self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.sqs.all().order_by('pub_date') + self.assertEqual(int(results[21].pk), 22) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + + def test_values_slicing(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + + # TODO: this would be a good candidate for refactoring into a TestCase subclass shared across backends + + # The values will come back as strings because Hasytack doesn't assume PKs are integers. + # We'll prepare this set once since we're going to query the same results in multiple ways: + expected_pks = [str(i) for i in [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]] + + results = self.sqs.all().order_by('pub_date').values('pk') + self.assertListEqual([i['pk'] for i in results[1:11]], expected_pks) + + results = self.sqs.all().order_by('pub_date').values_list('pk') + self.assertListEqual([i[0] for i in results[1:11]], expected_pks) + + results = self.sqs.all().order_by('pub_date').values_list('pk', flat=True) + self.assertListEqual(results[1:11], expected_pks) + + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + def test_count(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + sqs = self.sqs.all() + self.assertEqual(sqs.count(), 23) + self.assertEqual(sqs.count(), 23) + self.assertEqual(len(sqs), 23) + self.assertEqual(sqs.count(), 23) + # Should only execute one query to count the length of the result set. + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + + def test_manual_iter(self): + results = self.sqs.all() + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = set([int(result.pk) for result in results._manual_iter()]) + self.assertEqual(results, + set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + def test_fill_cache(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.sqs.all() + self.assertEqual(len(results._result_cache), 0) + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results._fill_cache(0, 10) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + results._fill_cache(10, 20) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) + self.assertEqual(len(connections['elasticsearch2'].queries), 2) + + def test_cache_is_full(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(self.sqs._cache_is_full(), False) + results = self.sqs.all() + fire_the_iterator_and_fill_cache = [result for result in results] + self.assertEqual(results._cache_is_full(), True) + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + def test___and__(self): + sqs1 = self.sqs.filter(content='foo') + sqs2 = self.sqs.filter(content='bar') + sqs = sqs1 & sqs2 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 2) + self.assertEqual(sqs.query.build_query(), u'((foo) AND (bar))') + + # Now for something more complex... + sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) + sqs4 = self.sqs.filter(content='bar') + sqs = sqs3 & sqs4 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 3) + self.assertEqual(sqs.query.build_query(), u'(NOT (title:(moof)) AND ((foo) OR (baz)) AND (bar))') + + def test___or__(self): + sqs1 = self.sqs.filter(content='foo') + sqs2 = self.sqs.filter(content='bar') + sqs = sqs1 | sqs2 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 2) + self.assertEqual(sqs.query.build_query(), u'((foo) OR (bar))') + + # Now for something more complex... + sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) + sqs4 = self.sqs.filter(content='bar').models(MockModel) + sqs = sqs3 | sqs4 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 2) + self.assertEqual(sqs.query.build_query(), u'((NOT (title:(moof)) AND ((foo) OR (baz))) OR (bar))') + + def test_auto_query(self): + # Ensure bits in exact matches get escaped properly as well. + # This will break horrifically if escaping isn't working. + sqs = self.sqs.auto_query('"pants:rule"') + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(repr(sqs.query.query_filter), '') + self.assertEqual(sqs.query.build_query(), u'("pants\\:rule")') + self.assertEqual(len(sqs), 0) + + # Regressions + + def test_regression_proper_start_offsets(self): + sqs = self.sqs.filter(text='index') + self.assertNotEqual(sqs.count(), 0) + + id_counts = {} + + for item in sqs: + if item.id in id_counts: + id_counts[item.id] += 1 + else: + id_counts[item.id] = 1 + + for key, value in id_counts.items(): + if value > 1: + self.fail("Result with id '%s' seen more than once in the results." % key) + + def test_regression_raw_search_breaks_slicing(self): + sqs = self.sqs.raw_search('text:index') + page_1 = [result.pk for result in sqs[0:10]] + page_2 = [result.pk for result in sqs[10:20]] + + for pk in page_2: + if pk in page_1: + self.fail("Result with id '%s' seen more than once in the results." % pk) + + # RelatedSearchQuerySet Tests + + def test_related_load_all(self): + sqs = self.rsqs.order_by('pub_date').load_all() + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertTrue(len(sqs) > 0) + self.assertEqual(sqs[2].object.foo, + u'In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.') + + def test_related_load_all_queryset(self): + sqs = self.rsqs.load_all().order_by('pub_date') + self.assertEqual(len(sqs._load_all_querysets), 0) + + sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=1)) + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs._load_all_querysets), 1) + self.assertEqual(sorted([obj.object.id for obj in sqs]), list(range(2, 24))) + + sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=10)) + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs._load_all_querysets), 1) + self.assertEqual(set([obj.object.id for obj in sqs]), set([12, 17, 11, 16, 23, 15, 22, 14, 19, 21, 13, 18, 20])) + self.assertEqual(set([obj.object.id for obj in sqs[10:20]]), set([21, 22, 23])) + + def test_related_iter(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + sqs = self.rsqs.all() + results = set([int(result.pk) for result in sqs]) + self.assertEqual(results, + set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) + self.assertEqual(len(connections['elasticsearch2'].queries), 4) + + def test_related_slice(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.rsqs.all().order_by('pub_date') + self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.rsqs.all().order_by('pub_date') + self.assertEqual(int(results[21].pk), 22) + self.assertEqual(len(connections['elasticsearch2'].queries), 4) + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.rsqs.all().order_by('pub_date') + self.assertEqual(set([int(result.pk) for result in results[20:30]]), set([21, 22, 23])) + self.assertEqual(len(connections['elasticsearch2'].queries), 4) + + def test_related_manual_iter(self): + results = self.rsqs.all() + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = sorted([int(result.pk) for result in results._manual_iter()]) + self.assertEqual(results, list(range(1, 24))) + self.assertEqual(len(connections['elasticsearch2'].queries), 4) + + def test_related_fill_cache(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.rsqs.all() + self.assertEqual(len(results._result_cache), 0) + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results._fill_cache(0, 10) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + results._fill_cache(10, 20) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) + self.assertEqual(len(connections['elasticsearch2'].queries), 2) + + def test_related_cache_is_full(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(self.rsqs._cache_is_full(), False) + results = self.rsqs.all() + fire_the_iterator_and_fill_cache = [result for result in results] + self.assertEqual(results._cache_is_full(), True) + self.assertEqual(len(connections['elasticsearch2'].queries), 5) + + def test_quotes_regression(self): + sqs = self.sqs.auto_query(u"44°48'40''N 20°28'32''E") + # Should not have empty terms. + self.assertEqual(sqs.query.build_query(), u"(44\xb048'40''N 20\xb028'32''E)") + # Should not cause Elasticsearch to 500. + self.assertEqual(sqs.count(), 0) + + sqs = self.sqs.auto_query('blazing') + self.assertEqual(sqs.query.build_query(), u'(blazing)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('blazing saddles') + self.assertEqual(sqs.query.build_query(), u'(blazing saddles)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles') + self.assertEqual(sqs.query.build_query(), u'(\\"blazing saddles)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles"') + self.assertEqual(sqs.query.build_query(), u'("blazing saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing saddles"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'saddles"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'\'saddles"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \')') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \'\\")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles" mel') + self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles" mel brooks') + self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel brooks)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing saddles" brooks') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" brooks)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing saddles" "brooks') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" \\"brooks)') + self.assertEqual(sqs.count(), 0) + + def test_query_generation(self): + sqs = self.sqs.filter(SQ(content=AutoQuery("hello world")) | SQ(title=AutoQuery("hello world"))) + self.assertEqual(sqs.query.build_query(), u"((hello world) OR title:(hello world))") + + def test_result_class(self): + # Assert that we're defaulting to ``SearchResult``. + sqs = self.sqs.all() + self.assertTrue(isinstance(sqs[0], SearchResult)) + + # Custom class. + sqs = self.sqs.result_class(MockSearchResult).all() + self.assertTrue(isinstance(sqs[0], MockSearchResult)) + + # Reset to default. + sqs = self.sqs.result_class(None).all() + self.assertTrue(isinstance(sqs[0], SearchResult)) + + +@override_settings(DEBUG=True) +class LiveElasticsearch2SpellingTestCase(TestCase): + """Used to test actual implementation details of the SearchQuerySet.""" + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2SpellingTestCase, self).setUp() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSpellingIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + + # Wipe it clean. + clear_elasticsearch_index() + + # Reboot the schema. + self.sb = connections['elasticsearch2'].get_backend() + self.sb.setup() + + self.smmi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2SpellingTestCase, self).tearDown() + + def test_spelling(self): + self.assertEqual(self.sqs.auto_query('structurd').spelling_suggestion(), 'structured') + self.assertEqual(self.sqs.spelling_suggestion('structurd'), 'structured') + self.assertEqual(self.sqs.auto_query('srchindex instanc').spelling_suggestion(), 'searchindex instance') + self.assertEqual(self.sqs.spelling_suggestion('srchindex instanc'), 'searchindex instance') + + +class LiveElasticsearch2MoreLikeThisTestCase(TestCase): + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2MoreLikeThisTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockModelSearchIndex() + self.sammi = Elasticsearch2AnotherMockModelSearchIndex() + self.ui.build(indexes=[self.smmi, self.sammi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + + self.smmi.update(using='elasticsearch2') + self.sammi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2MoreLikeThisTestCase, self).tearDown() + + def test_more_like_this(self): + mlt = self.sqs.more_like_this(MockModel.objects.get(pk=1)) + results = [result.pk for result in mlt] + self.assertEqual(mlt.count(), 11) + self.assertEqual(set(results), set([u'10', u'5', u'2', u'21', u'4', u'6', u'23', u'9', u'14'])) + self.assertEqual(len(results), 10) + + alt_mlt = self.sqs.filter(name='daniel3').more_like_this(MockModel.objects.get(pk=2)) + results = [result.pk for result in alt_mlt] + self.assertEqual(alt_mlt.count(), 9) + self.assertEqual(set(results), set([u'2', u'16', u'3', u'19', u'4', u'17', u'10', u'22', u'23'])) + self.assertEqual(len(results), 9) + + alt_mlt_with_models = self.sqs.models(MockModel).more_like_this(MockModel.objects.get(pk=1)) + results = [result.pk for result in alt_mlt_with_models] + self.assertEqual(alt_mlt_with_models.count(), 10) + self.assertEqual(set(results), set([u'10', u'5', u'21', u'2', u'4', u'6', u'23', u'9', u'14', u'16'])) + self.assertEqual(len(results), 10) + + if hasattr(MockModel.objects, 'defer'): + # Make sure MLT works with deferred bits. + mi = MockModel.objects.defer('foo').get(pk=1) + self.assertEqual(mi._deferred, True) + deferred = self.sqs.models(MockModel).more_like_this(mi) + self.assertEqual(deferred.count(), 0) + self.assertEqual([result.pk for result in deferred], []) + self.assertEqual(len([result.pk for result in deferred]), 0) + + # Ensure that swapping the ``result_class`` works. + self.assertTrue( + isinstance(self.sqs.result_class(MockSearchResult).more_like_this(MockModel.objects.get(pk=1))[0], + MockSearchResult)) + + +class LiveElasticsearch2AutocompleteTestCase(TestCase): + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2AutocompleteTestCase, self).setUp() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2AutocompleteMockModelSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + + # Wipe it clean. + clear_elasticsearch_index() + + # Reboot the schema. + self.sb = connections['elasticsearch2'].get_backend() + self.sb.setup() + + self.smmi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2AutocompleteTestCase, self).tearDown() + + def test_build_schema(self): + self.sb = connections['elasticsearch2'].get_backend() + content_name, mapping = self.sb.build_schema(self.ui.all_searchfields()) + self.assertEqual(mapping, { + 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'name_auto': { + 'type': 'string', + 'analyzer': 'edgengram_analyzer', + }, + 'text': { + 'type': 'string', + 'analyzer': 'snowball', + }, + 'pub_date': { + 'type': 'date' + }, + 'name': { + 'type': 'string', + 'analyzer': 'snowball', + }, + 'text_auto': { + 'type': 'string', + 'analyzer': 'edgengram_analyzer', + } + }) + + def test_autocomplete(self): + autocomplete = self.sqs.autocomplete(text_auto='mod') + self.assertEqual(autocomplete.count(), 16) + self.assertEqual(set([result.pk for result in autocomplete]), set( + ['1', '12', '6', '14', '7', '4', '23', '17', '13', '18', '20', '22', '19', '15', '10', '2'])) + self.assertTrue('mod' in autocomplete[0].text.lower()) + self.assertTrue('mod' in autocomplete[1].text.lower()) + self.assertTrue('mod' in autocomplete[2].text.lower()) + self.assertTrue('mod' in autocomplete[3].text.lower()) + self.assertTrue('mod' in autocomplete[4].text.lower()) + self.assertEqual(len([result.pk for result in autocomplete]), 16) + + # Test multiple words. + autocomplete_2 = self.sqs.autocomplete(text_auto='your mod') + self.assertEqual(autocomplete_2.count(), 13) + self.assertEqual(set([result.pk for result in autocomplete_2]), + set(['1', '6', '2', '14', '12', '13', '10', '19', '4', '20', '23', '22', '15'])) + map_results = {result.pk: result for result in autocomplete_2} + self.assertTrue('your' in map_results['1'].text.lower()) + self.assertTrue('mod' in map_results['1'].text.lower()) + self.assertTrue('your' in map_results['6'].text.lower()) + self.assertTrue('mod' in map_results['6'].text.lower()) + self.assertTrue('your' in map_results['2'].text.lower()) + self.assertEqual(len([result.pk for result in autocomplete_2]), 13) + + # Test multiple fields. + autocomplete_3 = self.sqs.autocomplete(text_auto='Django', name_auto='dan') + self.assertEqual(autocomplete_3.count(), 4) + self.assertEqual(set([result.pk for result in autocomplete_3]), set(['12', '1', '22', '14'])) + self.assertEqual(len([result.pk for result in autocomplete_3]), 4) + + # Test numbers in phrases + autocomplete_4 = self.sqs.autocomplete(text_auto='Jen 867') + self.assertEqual(autocomplete_4.count(), 1) + self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) + + # Test numbers alone + autocomplete_4 = self.sqs.autocomplete(text_auto='867') + self.assertEqual(autocomplete_4.count(), 1) + self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) + + +class LiveElasticsearch2RoundTripTestCase(TestCase): + def setUp(self): + super(LiveElasticsearch2RoundTripTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.srtsi = Elasticsearch2RoundTripSearchIndex() + self.ui.build(indexes=[self.srtsi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + + self.sqs = SearchQuerySet('elasticsearch2') + + # Fake indexing. + mock = MockModel() + mock.id = 1 + self.sb.update(self.srtsi, [mock]) + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2RoundTripTestCase, self).tearDown() + + def test_round_trip(self): + results = self.sqs.filter(id='core.mockmodel.1') + + # Sanity check. + self.assertEqual(results.count(), 1) + + # Check the individual fields. + result = results[0] + self.assertEqual(result.id, 'core.mockmodel.1') + self.assertEqual(result.text, 'This is some example text.') + self.assertEqual(result.name, 'Mister Pants') + self.assertEqual(result.is_active, True) + self.assertEqual(result.post_count, 25) + self.assertEqual(result.average_rating, 3.6) + self.assertEqual(result.price, u'24.99') + self.assertEqual(result.pub_date, datetime.date(2009, 11, 21)) + self.assertEqual(result.created, datetime.datetime(2009, 11, 21, 21, 31, 00)) + self.assertEqual(result.tags, ['staff', 'outdoor', 'activist', 'scientist']) + self.assertEqual(result.sites, [3, 5, 1]) + + +@unittest.skipUnless(test_pickling, 'Skipping pickling tests') +class LiveElasticsearch2PickleTestCase(TestCase): + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2PickleTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockModelSearchIndex() + self.sammi = Elasticsearch2AnotherMockModelSearchIndex() + self.ui.build(indexes=[self.smmi, self.sammi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + + self.smmi.update(using='elasticsearch2') + self.sammi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2PickleTestCase, self).tearDown() + + def test_pickling(self): + results = self.sqs.all() + + for res in results: + # Make sure the cache is full. + pass + + in_a_pickle = pickle.dumps(results) + like_a_cuke = pickle.loads(in_a_pickle) + self.assertEqual(len(like_a_cuke), len(results)) + self.assertEqual(like_a_cuke[0].id, results[0].id) + + +class Elasticsearch2BoostBackendTestCase(TestCase): + def setUp(self): + super(Elasticsearch2BoostBackendTestCase, self).setUp() + + # Wipe it clean. + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2BoostMockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + + self.sample_objs = [] + + for i in range(1, 5): + mock = AFourthMockModel() + mock.id = i + + if i % 2: + mock.author = 'daniel' + mock.editor = 'david' + else: + mock.author = 'david' + mock.editor = 'daniel' + + mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) + self.sample_objs.append(mock) + + def tearDown(self): + connections['elasticsearch2']._index = self.old_ui + super(Elasticsearch2BoostBackendTestCase, self).tearDown() + + def raw_search(self, query): + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + + def test_boost(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 4) + + results = SearchQuerySet(using='elasticsearch2').filter(SQ(author='daniel') | SQ(editor='daniel')) + + self.assertEqual(set([result.id for result in results]), set([ + 'core.afourthmockmodel.4', + 'core.afourthmockmodel.3', + 'core.afourthmockmodel.1', + 'core.afourthmockmodel.2' + ])) + + def test__to_python(self): + self.assertEqual(self.sb._to_python('abc'), 'abc') + self.assertEqual(self.sb._to_python('1'), 1) + self.assertEqual(self.sb._to_python('2653'), 2653) + self.assertEqual(self.sb._to_python('25.5'), 25.5) + self.assertEqual(self.sb._to_python('[1, 2, 3]'), [1, 2, 3]) + self.assertEqual(self.sb._to_python('{"a": 1, "b": 2, "c": 3}'), {'a': 1, 'c': 3, 'b': 2}) + self.assertEqual(self.sb._to_python('2009-05-09T16:14:00'), datetime.datetime(2009, 5, 9, 16, 14)) + self.assertEqual(self.sb._to_python('2009-05-09T00:00:00'), datetime.datetime(2009, 5, 9, 0, 0)) + self.assertEqual(self.sb._to_python(None), None) + + +class RecreateIndexTestCase(TestCase): + def setUp(self): + self.raw_es = elasticsearch.Elasticsearch( + settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + + def test_recreate_index(self): + clear_elasticsearch_index() + + sb = connections['elasticsearch2'].get_backend() + sb.silently_fail = True + sb.setup() + + original_mapping = self.raw_es.indices.get_mapping(index=sb.index_name) + + sb.clear() + sb.setup() + + try: + updated_mapping = self.raw_es.indices.get_mapping(sb.index_name) + except elasticsearch.NotFoundError: + self.fail("There is no mapping after recreating the index") + + self.assertEqual(original_mapping, updated_mapping, + "Mapping after recreating the index differs from the original one") + + +class Elasticsearch2FacetingTestCase(TestCase): + def setUp(self): + super(Elasticsearch2FacetingTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2FacetingMockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + + # Force the backend to rebuild the mapping each time. + self.sb.existing_mapping = {} + self.sb.setup() + + self.sample_objs = [] + + for i in range(1, 10): + mock = AFourthMockModel() + mock.id = i + if i > 5: + mock.editor = 'George Taylor' + else: + mock.editor = 'Perry White' + if i % 2: + mock.author = 'Daniel Lindsley' + else: + mock.author = 'Dan Watson' + mock.pub_date = datetime.date(2013, 9, (i % 4) + 1) + self.sample_objs.append(mock) + + def tearDown(self): + connections['elasticsearch2']._index = self.old_ui + super(Elasticsearch2FacetingTestCase, self).tearDown() + + def test_facet(self): + self.sb.update(self.smmi, self.sample_objs) + counts = SearchQuerySet('elasticsearch2').facet('author').facet('editor').facet_counts() + self.assertEqual(counts['fields']['author'], [ + ('Daniel Lindsley', 5), + ('Dan Watson', 4), + ]) + self.assertEqual(counts['fields']['editor'], [ + ('Perry White', 5), + ('George Taylor', 4), + ]) + counts = SearchQuerySet('elasticsearch2').filter(content='white').facet('facet_field', + order='reverse_count').facet_counts() + self.assertEqual(counts['fields']['facet_field'], [ + ('Dan Watson', 2), + ('Daniel Lindsley', 3), + ]) + + def test_multiple_narrow(self): + self.sb.update(self.smmi, self.sample_objs) + counts = SearchQuerySet('elasticsearch2').narrow('editor_exact:"Perry White"').narrow( + 'author_exact:"Daniel Lindsley"').facet('author').facet_counts() + self.assertEqual(counts['fields']['author'], [ + ('Daniel Lindsley', 3), + ]) + + def test_narrow(self): + self.sb.update(self.smmi, self.sample_objs) + counts = SearchQuerySet('elasticsearch2').facet('author').facet('editor').narrow( + 'editor_exact:"Perry White"').facet_counts() + self.assertEqual(counts['fields']['author'], [ + ('Daniel Lindsley', 3), + ('Dan Watson', 2), + ]) + self.assertEqual(counts['fields']['editor'], [ + ('Perry White', 5), + ]) + + def test_date_facet(self): + self.sb.update(self.smmi, self.sample_objs) + start = datetime.date(2013, 9, 1) + end = datetime.date(2013, 9, 30) + # Facet by day + counts = SearchQuerySet('elasticsearch2').date_facet('pub_date', start_date=start, end_date=end, + gap_by='day').facet_counts() + self.assertEqual(counts['dates']['pub_date'], [ + (datetime.datetime(2013, 9, 1), 2), + (datetime.datetime(2013, 9, 2), 3), + (datetime.datetime(2013, 9, 3), 2), + (datetime.datetime(2013, 9, 4), 2), + ]) + # By month + counts = SearchQuerySet('elasticsearch2').date_facet('pub_date', start_date=start, end_date=end, + gap_by='month').facet_counts() + self.assertEqual(counts['dates']['pub_date'], [ + (datetime.datetime(2013, 9, 1), 9), + ]) diff --git a/test_haystack/elasticsearch2_tests/test_inputs.py b/test_haystack/elasticsearch2_tests/test_inputs.py new file mode 100644 index 000000000..777334fb2 --- /dev/null +++ b/test_haystack/elasticsearch2_tests/test_inputs.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import, division, print_function, unicode_literals + +from django.test import TestCase + +from haystack import connections, inputs + + +class Elasticsearch2InputTestCase(TestCase): + def setUp(self): + super(Elasticsearch2InputTestCase, self).setUp() + self.query_obj = connections['elasticsearch2'].get_query() + + def test_raw_init(self): + raw = inputs.Raw('hello OR there, :you') + self.assertEqual(raw.query_string, 'hello OR there, :you') + self.assertEqual(raw.kwargs, {}) + self.assertEqual(raw.post_process, False) + + raw = inputs.Raw('hello OR there, :you', test='really') + self.assertEqual(raw.query_string, 'hello OR there, :you') + self.assertEqual(raw.kwargs, {'test': 'really'}) + self.assertEqual(raw.post_process, False) + + def test_raw_prepare(self): + raw = inputs.Raw('hello OR there, :you') + self.assertEqual(raw.prepare(self.query_obj), 'hello OR there, :you') + + def test_clean_init(self): + clean = inputs.Clean('hello OR there, :you') + self.assertEqual(clean.query_string, 'hello OR there, :you') + self.assertEqual(clean.post_process, True) + + def test_clean_prepare(self): + clean = inputs.Clean('hello OR there, :you') + self.assertEqual(clean.prepare(self.query_obj), 'hello or there, \\:you') + + def test_exact_init(self): + exact = inputs.Exact('hello OR there, :you') + self.assertEqual(exact.query_string, 'hello OR there, :you') + self.assertEqual(exact.post_process, True) + + def test_exact_prepare(self): + exact = inputs.Exact('hello OR there, :you') + self.assertEqual(exact.prepare(self.query_obj), u'"hello OR there, :you"') + + exact = inputs.Exact('hello OR there, :you', clean=True) + self.assertEqual(exact.prepare(self.query_obj), u'"hello or there, \\:you"') + + def test_not_init(self): + not_it = inputs.Not('hello OR there, :you') + self.assertEqual(not_it.query_string, 'hello OR there, :you') + self.assertEqual(not_it.post_process, True) + + def test_not_prepare(self): + not_it = inputs.Not('hello OR there, :you') + self.assertEqual(not_it.prepare(self.query_obj), u'NOT (hello or there, \\:you)') + + def test_autoquery_init(self): + autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') + self.assertEqual(autoquery.query_string, 'panic -don\'t "froody dude"') + self.assertEqual(autoquery.post_process, False) + + def test_autoquery_prepare(self): + autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') + self.assertEqual(autoquery.prepare(self.query_obj), u'panic NOT don\'t "froody dude"') + + def test_altparser_init(self): + altparser = inputs.AltParser('dismax') + self.assertEqual(altparser.parser_name, 'dismax') + self.assertEqual(altparser.query_string, '') + self.assertEqual(altparser.kwargs, {}) + self.assertEqual(altparser.post_process, False) + + altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) + self.assertEqual(altparser.parser_name, 'dismax') + self.assertEqual(altparser.query_string, 'douglas adams') + self.assertEqual(altparser.kwargs, {'mm': 1, 'qf': 'author'}) + self.assertEqual(altparser.post_process, False) + + def test_altparser_prepare(self): + altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) + self.assertEqual(altparser.prepare(self.query_obj), + u"""{!dismax mm=1 qf=author v='douglas adams'}""") diff --git a/test_haystack/elasticsearch2_tests/test_query.py b/test_haystack/elasticsearch2_tests/test_query.py new file mode 100644 index 000000000..968180686 --- /dev/null +++ b/test_haystack/elasticsearch2_tests/test_query.py @@ -0,0 +1,209 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals + +import datetime + +import elasticsearch +from django.test import TestCase + +from haystack import connections +from haystack.inputs import Exact +from haystack.models import SearchResult +from haystack.query import SearchQuerySet, SQ +from haystack.utils.geo import D, Point +from ..core.models import AnotherMockModel, MockModel + + +class Elasticsearch2SearchQueryTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchQueryTestCase, self).setUp() + self.sq = connections['elasticsearch2'].get_query() + + def test_build_query_all(self): + self.assertEqual(self.sq.build_query(), '*:*') + + def test_build_query_single_word(self): + self.sq.add_filter(SQ(content='hello')) + self.assertEqual(self.sq.build_query(), '(hello)') + + def test_build_query_boolean(self): + self.sq.add_filter(SQ(content=True)) + self.assertEqual(self.sq.build_query(), '(True)') + + def test_regression_slash_search(self): + self.sq.add_filter(SQ(content='hello/')) + self.assertEqual(self.sq.build_query(), '(hello\\/)') + + def test_build_query_datetime(self): + self.sq.add_filter(SQ(content=datetime.datetime(2009, 5, 8, 11, 28))) + self.assertEqual(self.sq.build_query(), '(2009-05-08T11:28:00)') + + def test_build_query_multiple_words_and(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_filter(SQ(content='world')) + self.assertEqual(self.sq.build_query(), '((hello) AND (world))') + + def test_build_query_multiple_words_not(self): + self.sq.add_filter(~SQ(content='hello')) + self.sq.add_filter(~SQ(content='world')) + self.assertEqual(self.sq.build_query(), '(NOT ((hello)) AND NOT ((world)))') + + def test_build_query_multiple_words_or(self): + self.sq.add_filter(~SQ(content='hello')) + self.sq.add_filter(SQ(content='hello'), use_or=True) + self.assertEqual(self.sq.build_query(), '(NOT ((hello)) OR (hello))') + + def test_build_query_multiple_words_mixed(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(content='hello'), use_or=True) + self.sq.add_filter(~SQ(content='world')) + self.assertEqual(self.sq.build_query(), u'(((why) OR (hello)) AND NOT ((world)))') + + def test_build_query_phrase(self): + self.sq.add_filter(SQ(content='hello world')) + self.assertEqual(self.sq.build_query(), '(hello AND world)') + + self.sq.add_filter(SQ(content__exact='hello world')) + self.assertEqual(self.sq.build_query(), u'((hello AND world) AND ("hello world"))') + + def test_build_query_boost(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_boost('world', 5) + self.assertEqual(self.sq.build_query(), "(hello) world^5") + + def test_build_query_multiple_filter_types(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(pub_date__lte=Exact('2009-02-10 01:59:00'))) + self.sq.add_filter(SQ(author__gt='daniel')) + self.sq.add_filter(SQ(created__lt=Exact('2009-02-12 12:13:00'))) + self.sq.add_filter(SQ(title__gte='B')) + self.sq.add_filter(SQ(id__in=[1, 2, 3])) + self.sq.add_filter(SQ(rating__range=[3, 5])) + self.assertEqual(self.sq.build_query(), + u'((why) AND pub_date:([* TO "2009-02-10 01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12 12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') + + def test_build_query_multiple_filter_types_with_datetimes(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0))) + self.sq.add_filter(SQ(author__gt='daniel')) + self.sq.add_filter(SQ(created__lt=datetime.datetime(2009, 2, 12, 12, 13, 0))) + self.sq.add_filter(SQ(title__gte='B')) + self.sq.add_filter(SQ(id__in=[1, 2, 3])) + self.sq.add_filter(SQ(rating__range=[3, 5])) + self.assertEqual(self.sq.build_query(), + u'((why) AND pub_date:([* TO "2009-02-10T01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12T12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') + + def test_build_query_in_filter_multiple_words(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article"])) + self.assertEqual(self.sq.build_query(), u'((why) AND title:("A Famous Paper" OR "An Infamous Article"))') + + def test_build_query_in_filter_datetime(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(pub_date__in=[datetime.datetime(2009, 7, 6, 1, 56, 21)])) + self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:("2009-07-06T01:56:21"))') + + def test_build_query_in_with_set(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__in=set(["A Famous Paper", "An Infamous Article"]))) + self.assertTrue('((why) AND title:(' in self.sq.build_query()) + self.assertTrue('"A Famous Paper"' in self.sq.build_query()) + self.assertTrue('"An Infamous Article"' in self.sq.build_query()) + + def test_build_query_wildcard_filter_types(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__startswith='haystack')) + self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack*))') + + def test_build_query_fuzzy_filter_types(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__fuzzy='haystack')) + self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack~))') + + def test_clean(self): + self.assertEqual(self.sq.clean('hello world'), 'hello world') + self.assertEqual(self.sq.clean('hello AND world'), 'hello and world') + self.assertEqual(self.sq.clean('hello AND OR NOT TO + - && || ! ( ) { } [ ] ^ " ~ * ? : \ / world'), + 'hello and or not to \\+ \\- \\&& \\|| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ \\/ world') + self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), + 'so please NOTe i am in a bAND and bORed') + + def test_build_query_with_models(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_model(MockModel) + self.assertEqual(self.sq.build_query(), '(hello)') + + self.sq.add_model(AnotherMockModel) + self.assertEqual(self.sq.build_query(), u'(hello)') + + def test_set_result_class(self): + # Assert that we're defaulting to ``SearchResult``. + self.assertTrue(issubclass(self.sq.result_class, SearchResult)) + + # Custom class. + class IttyBittyResult(object): + pass + + self.sq.set_result_class(IttyBittyResult) + self.assertTrue(issubclass(self.sq.result_class, IttyBittyResult)) + + # Reset to default. + self.sq.set_result_class(None) + self.assertTrue(issubclass(self.sq.result_class, SearchResult)) + + def test_in_filter_values_list(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__in=[1, 2, 3])) + self.assertEqual(self.sq.build_query(), u'((why) AND title:("1" OR "2" OR "3"))') + + def test_narrow_sq(self): + sqs = SearchQuerySet(using='elasticsearch2').narrow(SQ(foo='moof')) + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.narrow_queries), 1) + self.assertEqual(sqs.query.narrow_queries.pop(), 'foo:(moof)') + + +class Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase, self).setUp() + self.backend = connections['elasticsearch2'].get_backend() + self._elasticsearch_version = elasticsearch.VERSION + elasticsearch.VERSION = (0, 9, 9) + + def tearDown(self): + elasticsearch.VERSION = self._elasticsearch_version + + def test_build_query_with_dwithin_range(self): + """ + Test build_search_kwargs with dwithin range for Elasticsearch versions < 1.0.0 + """ + search_kwargs = self.backend.build_search_kwargs('where', dwithin={ + 'field': "location_field", + 'point': Point(1.2345678, 2.3456789), + 'distance': D(m=500) + }) + self.assertEqual(search_kwargs['query']['filtered']['filter']['bool']['must'][1]['geo_distance'], + {'distance': 0.5, 'location_field': {'lat': 2.3456789, 'lon': 1.2345678}}) + + +class Elasticsearch2SearchQuerySpatialAfterReleaseTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchQuerySpatialAfterReleaseTestCase, self).setUp() + self.backend = connections['elasticsearch2'].get_backend() + self._elasticsearch_version = elasticsearch.VERSION + elasticsearch.VERSION = (1, 0, 0) + + def tearDown(self): + elasticsearch.VERSION = self._elasticsearch_version + + def test_build_query_with_dwithin_range(self): + """ + Test build_search_kwargs with dwithin range for Elasticsearch versions >= 1.0.0 + """ + search_kwargs = self.backend.build_search_kwargs('where', dwithin={ + 'field': "location_field", + 'point': Point(1.2345678, 2.3456789), + 'distance': D(m=500) + }) + self.assertEqual(search_kwargs['query']['filtered']['filter']['bool']['must'][1]['geo_distance'], + {'distance': "0.500000km", 'location_field': {'lat': 2.3456789, 'lon': 1.2345678}}) diff --git a/test_haystack/mocks.py b/test_haystack/mocks.py index 537af986a..3dad617fe 100644 --- a/test_haystack/mocks.py +++ b/test_haystack/mocks.py @@ -2,7 +2,8 @@ from __future__ import absolute_import, division, print_function, unicode_literals -from django.db.models.loading import get_model +from django.apps import apps +get_model = apps.get_model from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query from haystack.models import SearchResult diff --git a/test_haystack/settings.py b/test_haystack/settings.py index bfd25f26d..334e64951 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -67,6 +67,12 @@ 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, + 'elasticsearch2': { + 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', + 'URL': 'http://192.168.99.100:9200/', + 'INDEX_NAME': 'test_default', + 'INCLUDE_SPELLING': True, + }, 'simple': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', }, From f09f47f4867ba38aa5ca5101c0caa9cd83b4b6ec Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 16:29:00 +0100 Subject: [PATCH 02/51] Elasticsearch 2.x support - Fix localhost IP in elasticsearch2 settings --- test_haystack/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_haystack/settings.py b/test_haystack/settings.py index 334e64951..be8758d73 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -69,7 +69,7 @@ }, 'elasticsearch2': { 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', - 'URL': 'http://192.168.99.100:9200/', + 'URL': 'http://127.0.0.1:9200/', 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, From c79f6757e8b4b54304bd92b53502bd5008744166 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 17:26:30 +0100 Subject: [PATCH 03/51] Launchs ES 2.x on Travis - Port to connect 29200 --- .travis.yml | 3 +++ test_haystack/settings.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 6007a6bdb..4ba1b95f3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,6 +26,9 @@ addons: before_install: - mkdir -p $HOME/download-cache + - wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz + - tar zxf elasticsearch-2.2.1.tar.gz + - elasticsearch-2.2.1/bin/elasticsearch -Dtransport.tcp.port=29300 -Dhttp.port=29200 install: - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install mock==1.0.1; fi diff --git a/test_haystack/settings.py b/test_haystack/settings.py index be8758d73..54cbf500b 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -69,7 +69,7 @@ }, 'elasticsearch2': { 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', - 'URL': 'http://127.0.0.1:9200/', + 'URL': 'http://127.0.0.1:29200/', 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, From c2b01646da1d853f8d787ffc0913ef726a79f609 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 17:30:34 +0100 Subject: [PATCH 04/51] Launchs ES 2.x on Travis - daemonize ES 2.x --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4ba1b95f3..7f5cb1b7c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,7 @@ before_install: - mkdir -p $HOME/download-cache - wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz - tar zxf elasticsearch-2.2.1.tar.gz - - elasticsearch-2.2.1/bin/elasticsearch -Dtransport.tcp.port=29300 -Dhttp.port=29200 + - elasticsearch-2.2.1/bin/elasticsearch -d -Dtransport.tcp.port=29300 -Dhttp.port=29200 install: - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install mock==1.0.1; fi From 546dd7ef1b5ca618261f2aac0ead05ecfa0c11b4 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 17:39:33 +0100 Subject: [PATCH 05/51] Launchs ES 2.x on Travis - commented out ES 1.x service --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7f5cb1b7c..ea0ff3203 100644 --- a/.travis.yml +++ b/.travis.yml @@ -52,8 +52,8 @@ matrix: - env: DJANGO_VERSION=">=1.9,<1.10" - python: "pypy" -services: - - elasticsearch +#services: +# - elasticsearch notifications: irc: "irc.freenode.org#haystack" From 8a2f2b69593fd0bce165754ff1a1526bd1b0b8d1 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 18:28:28 +0100 Subject: [PATCH 06/51] Launchs ES 2.x on Travis - Fix catching exception on skipping tests --- test_haystack/elasticsearch2_tests/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_haystack/elasticsearch2_tests/__init__.py b/test_haystack/elasticsearch2_tests/__init__.py index 8433081cd..a456c9829 100644 --- a/test_haystack/elasticsearch2_tests/__init__.py +++ b/test_haystack/elasticsearch2_tests/__init__.py @@ -10,7 +10,7 @@ def setup(): try: - from elasticsearch import Elasticsearch, ElasticsearchException + from elasticsearch import Elasticsearch, exceptions except ImportError: raise unittest.SkipTest("elasticsearch-py not installed.") @@ -18,5 +18,5 @@ def setup(): es = Elasticsearch(url) try: es.info() - except ElasticsearchException as e: + except exceptions.ConnectionError as e: raise unittest.SkipTest("elasticsearch not running on %r" % url, e) From c19ca4efc5785b7c036f553494f13fa3c7bf410b Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 20:33:43 +0100 Subject: [PATCH 07/51] Use haystack_get_model on mocks.py --- test_haystack/mocks.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/test_haystack/mocks.py b/test_haystack/mocks.py index 3dad617fe..4d6cf0f25 100644 --- a/test_haystack/mocks.py +++ b/test_haystack/mocks.py @@ -1,14 +1,11 @@ # encoding: utf-8 - from __future__ import absolute_import, division, print_function, unicode_literals -from django.apps import apps -get_model = apps.get_model - from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query from haystack.models import SearchResult from haystack.routers import BaseRouter from haystack.utils import get_identifier +from haystack.utils.app_loading import haystack_get_model class MockMasterSlaveRouter(BaseRouter): @@ -36,7 +33,7 @@ def for_write(self, **hints): class MockSearchResult(SearchResult): def __init__(self, app_label, model_name, pk, score, **kwargs): super(MockSearchResult, self).__init__(app_label, model_name, pk, score, **kwargs) - self._model = get_model('core', model_name) + self._model = haystack_get_model('core', model_name) MOCK_SEARCH_RESULTS = [MockSearchResult('core', 'MockModel', i, 1 - (i / 100.0)) for i in range(1, 100)] MOCK_INDEX_DATA = {} @@ -79,7 +76,7 @@ def junk_sort(key): for i, result in enumerate(sliced): app_label, model_name, pk = result.split('.') - model = get_model(app_label, model_name) + model = haystack_get_model(app_label, model_name) if model: if model in indexed_models: From 6c956de219d45f481ac2f4d564cca95660325b8e Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 01:59:57 +0100 Subject: [PATCH 08/51] Uses a build matrix to test ES 1.x or ES 2.x --- .travis.yml | 29 +- setup.py | 1 - .../elasticsearch2_tests/__init__.py | 7 +- .../elasticsearch2_tests/test_backend.py | 270 +++++++++--------- .../elasticsearch2_tests/test_inputs.py | 2 +- .../elasticsearch2_tests/test_query.py | 8 +- test_haystack/elasticsearch_tests/__init__.py | 11 +- test_haystack/settings.py | 14 +- tox.ini | 126 ++++++-- 9 files changed, 289 insertions(+), 179 deletions(-) diff --git a/.travis.yml b/.travis.yml index ea0ff3203..2c06c7198 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,17 +26,26 @@ addons: before_install: - mkdir -p $HOME/download-cache - - wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz - - tar zxf elasticsearch-2.2.1.tar.gz - - elasticsearch-2.2.1/bin/elasticsearch -d -Dtransport.tcp.port=29300 -Dhttp.port=29200 + - > + if [[ $VERSION_ES == '>=1.0.0,<2.0.0' ]]; + then + wget https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-1.7.5.tar.gz + tar zxf elasticsearch-1.7.5.tar.gz + elasticsearch-1.7.5/bin/elasticsearch -d -Dhttp.port=9200 + else + wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz + tar zxf elasticsearch-2.2.1.tar.gz + elasticsearch-2.2.1/bin/elasticsearch -d -Dhttp.port=9200 + fi install: - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install mock==1.0.1; fi - - pip install requests "Django${DJANGO_VERSION}" + - pip install requests "Django${DJANGO_VERSION}" "elasticsearch${VERSION_ES}" - python setup.py clean build install before_script: - BACKGROUND_SOLR=true test_haystack/solr_tests/server/start-solr-test-server.sh + - sleep 15 script: - python test_haystack/solr_tests/server/wait-for-solr @@ -44,17 +53,17 @@ script: env: matrix: - - DJANGO_VERSION=">=1.8,<1.9" - - DJANGO_VERSION=">=1.9,<1.10" + - DJANGO_VERSION=">=1.8,<1.9" VERSION_ES=">=1.0.0,<2.0.0" + - DJANGO_VERSION=">=1.9,<1.10" VERSION_ES=">=1.0.0,<2.0.0" + - DJANGO_VERSION=">=1.8,<1.9" VERSION_ES=">=2.0.0,<3.0.0" + - DJANGO_VERSION=">=1.9,<1.10" VERSION_ES=">=2.0.0,<3.0.0" matrix: allow_failures: - - env: DJANGO_VERSION=">=1.9,<1.10" + - env: DJANGO_VERSION=">=1.9,<1.10" VERSION_ES=">=1.0.0,<2.0.0" + - env: DJANGO_VERSION=">=1.9,<1.10" VERSION_ES=">=2.0.0,<3.0.0" - python: "pypy" -#services: -# - elasticsearch - notifications: irc: "irc.freenode.org#haystack" email: false diff --git a/setup.py b/setup.py index 5c44d97d2..073c37701 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,6 @@ ] tests_require = [ - 'elasticsearch>=2.0.0,<3.0.0', 'pysolr>=3.3.2', 'whoosh==2.5.4', 'python-dateutil', diff --git a/test_haystack/elasticsearch2_tests/__init__.py b/test_haystack/elasticsearch2_tests/__init__.py index a456c9829..ba6384f46 100644 --- a/test_haystack/elasticsearch2_tests/__init__.py +++ b/test_haystack/elasticsearch2_tests/__init__.py @@ -10,11 +10,14 @@ def setup(): try: + import elasticsearch + if not ((2, 0, 0) <= elasticsearch.__version__ < (3, 0, 0)): + raise ImportError from elasticsearch import Elasticsearch, exceptions except ImportError: - raise unittest.SkipTest("elasticsearch-py not installed.") + raise unittest.SkipTest("'elasticsearch>=2.0.0,<3.0.0' not installed.") - url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] + url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] es = Elasticsearch(url) try: es.info() diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py index 0ba061eda..cdefaacc7 100644 --- a/test_haystack/elasticsearch2_tests/test_backend.py +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -36,16 +36,16 @@ def clear_elasticsearch_index(): # Wipe it clean. - raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) try: - raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) raw_es.indices.refresh() except elasticsearch.TransportError: pass # Since we've just completely deleted the index, we'll reset setup_complete so the next access will # correctly define the mappings: - connections['elasticsearch2'].get_backend().setup_complete = False + connections['elasticsearch'].get_backend().setup_complete = False class Elasticsearch2MockSearchIndex(indexes.SearchIndex, indexes.Indexable): @@ -217,7 +217,7 @@ class TestSettings(TestCase): def test_kwargs_are_passed_on(self): from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend backend = ElasticsearchSearchBackend('alias', **{ - 'URL': settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'], + 'URL': settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], 'INDEX_NAME': 'testing', 'KWARGS': {'max_retries': 42} }) @@ -230,18 +230,18 @@ def setUp(self): super(Elasticsearch2SearchBackendTestCase, self).setUp() # Wipe it clean. - self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() self.smmidni = Elasticsearch2MockSearchIndexWithSkipDocument() self.smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() # Force the backend to rebuild the mapping each time. self.sb.existing_mapping = {} @@ -257,19 +257,19 @@ def setUp(self): self.sample_objs.append(mock) def tearDown(self): - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(Elasticsearch2SearchBackendTestCase, self).tearDown() self.sb.silently_fail = True def raw_search(self, query): try: - return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) except elasticsearch.TransportError: return {} def test_non_silent(self): - bad_sb = connections['elasticsearch2'].backend('bad', URL='http://omg.wtf.bbq:1000/', INDEX_NAME='whatver', - SILENTLY_FAIL=False, TIMEOUT=1) + bad_sb = connections['elasticsearch'].backend('bad', URL='http://omg.wtf.bbq:1000/', INDEX_NAME='whatver', + SILENTLY_FAIL=False, TIMEOUT=1) try: bad_sb.update(self.smmi, self.sample_objs) @@ -296,14 +296,14 @@ def test_non_silent(self): pass def test_update_no_documents(self): - url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] - index_name = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME'] + url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] + index_name = settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME'] - sb = connections['elasticsearch2'].backend('elasticsearch2', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=True) + sb = connections['elasticsearch'].backend('elasticsearch', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=True) self.assertEqual(sb.update(self.smmi, []), None) - sb = connections['elasticsearch2'].backend('elasticsearch2', URL=url, INDEX_NAME=index_name, - SILENTLY_FAIL=False) + sb = connections['elasticsearch'].backend('elasticsearch', URL=url, INDEX_NAME=index_name, + SILENTLY_FAIL=False) try: sb.update(self.smmi, []) self.fail() @@ -502,7 +502,7 @@ def test_more_like_this(self): self.assertEqual([result.pk for result in self.sb.more_like_this(self.sample_objs[0])['results']], []) def test_build_schema(self): - old_ui = connections['elasticsearch2'].get_unified_index() + old_ui = connections['elasticsearch'].get_unified_index() (content_field_name, mapping) = self.sb.build_schema(old_ui.all_searchfields()) self.assertEqual(content_field_name, 'text') @@ -542,17 +542,17 @@ def test_build_schema(self): }) def test_verify_type(self): - old_ui = connections['elasticsearch2'].get_unified_index() + old_ui = connections['elasticsearch'].get_unified_index() ui = UnifiedIndex() smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() ui.build(indexes=[smtmmi]) - connections['elasticsearch2']._index = ui - sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = ui + sb = connections['elasticsearch'].get_backend() sb.update(smtmmi, self.sample_objs) self.assertEqual(sb.search('*:*')['hits'], 3) self.assertEqual([result.month for result in sb.search('*:*')['results']], [u'02', u'02', u'02']) - connections['elasticsearch2']._index = old_ui + connections['elasticsearch']._index = old_ui class CaptureHandler(std_logging.Handler): @@ -576,26 +576,26 @@ def setUp(self): # Stow. # Point the backend at a URL that doesn't exist so we can watch the # sparks fly. - self.old_es_url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] - settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] = "%s/foo/" % self.old_es_url + self.old_es_url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] + settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = "%s/foo/" % self.old_es_url self.cap = CaptureHandler() logging.getLogger('haystack').addHandler(self.cap) import haystack logging.getLogger('haystack').removeHandler(haystack.stream) # Setup the rest of the bits. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = ui + self.sb = connections['elasticsearch'].get_backend() def tearDown(self): import haystack # Restore. - settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] = self.old_es_url - connections['elasticsearch2']._index = self.old_ui + settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = self.old_es_url + connections['elasticsearch']._index = self.old_ui logging.getLogger('haystack').removeHandler(self.cap) logging.getLogger('haystack').addHandler(haystack.stream) @@ -633,47 +633,47 @@ def setUp(self): clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() - self.sq = connections['elasticsearch2'].get_query() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() + self.sq = connections['elasticsearch'].get_query() # Force indexing of the content. - self.smmi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') def tearDown(self): - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2SearchQueryTestCase, self).tearDown() def test_log_query(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) with self.settings(DEBUG=False): len(self.sq.get_results()) - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) with self.settings(DEBUG=True): # Redefine it to clear out the cached results. - self.sq = connections['elasticsearch2'].query(using='elasticsearch2') + self.sq = connections['elasticsearch'].query(using='elasticsearch') self.sq.add_filter(SQ(name='bar')) len(self.sq.get_results()) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) - self.assertEqual(connections['elasticsearch2'].queries[0]['query_string'], + self.assertEqual(len(connections['elasticsearch'].queries), 1) + self.assertEqual(connections['elasticsearch'].queries[0]['query_string'], 'name:(bar)') # And again, for good measure. - self.sq = connections['elasticsearch2'].query('elasticsearch2') + self.sq = connections['elasticsearch'].query('elasticsearch') self.sq.add_filter(SQ(name='bar')) self.sq.add_filter(SQ(text='moof')) len(self.sq.get_results()) - self.assertEqual(len(connections['elasticsearch2'].queries), 2) - self.assertEqual(connections['elasticsearch2'].queries[0]['query_string'], + self.assertEqual(len(connections['elasticsearch'].queries), 2) + self.assertEqual(connections['elasticsearch'].queries[0]['query_string'], 'name:(bar)') - self.assertEqual(connections['elasticsearch2'].queries[1]['query_string'], + self.assertEqual(connections['elasticsearch'].queries[1]['query_string'], u'(name:(bar) AND text:(moof))') @@ -689,14 +689,14 @@ def setUp(self): super(LiveElasticsearch2SearchQuerySetTestCase, self).setUp() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') - self.rsqs = RelatedSearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') + self.rsqs = RelatedSearchQuerySet('elasticsearch') # Ugly but not constantly reindexing saves us almost 50% runtime. global lssqstc_all_loaded @@ -708,11 +708,11 @@ def setUp(self): clear_elasticsearch_index() # Force indexing of the content. - self.smmi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2SearchQuerySetTestCase, self).tearDown() def test_load_all(self): @@ -724,28 +724,28 @@ def test_load_all(self): def test_iter(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.sqs.all() results = sorted([int(result.pk) for result in sqs]) self.assertEqual(results, list(range(1, 24))) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_slice(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all().order_by('pub_date') self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all().order_by('pub_date') self.assertEqual(int(results[21].pk), 22) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_values_slicing(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) # TODO: this would be a good candidate for refactoring into a TestCase subclass shared across backends @@ -762,50 +762,50 @@ def test_values_slicing(self): results = self.sqs.all().order_by('pub_date').values_list('pk', flat=True) self.assertListEqual(results[1:11], expected_pks) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_count(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.sqs.all() self.assertEqual(sqs.count(), 23) self.assertEqual(sqs.count(), 23) self.assertEqual(len(sqs), 23) self.assertEqual(sqs.count(), 23) # Should only execute one query to count the length of the result set. - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_manual_iter(self): results = self.sqs.all() reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = set([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_fill_cache(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all() self.assertEqual(len(results._result_cache), 0) - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) - self.assertEqual(len(connections['elasticsearch2'].queries), 2) + self.assertEqual(len(connections['elasticsearch'].queries), 2) def test_cache_is_full(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) self.assertEqual(self.sqs._cache_is_full(), False) results = self.sqs.all() fire_the_iterator_and_fill_cache = [result for result in results] self.assertEqual(results._cache_is_full(), True) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test___and__(self): sqs1 = self.sqs.filter(content='foo') @@ -905,62 +905,62 @@ def test_related_load_all_queryset(self): def test_related_iter(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.rsqs.all() results = set([int(result.pk) for result in sqs]) self.assertEqual(results, set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) - self.assertEqual(len(connections['elasticsearch2'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_slice(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual(int(results[21].pk), 22) - self.assertEqual(len(connections['elasticsearch2'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 4) reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual(set([int(result.pk) for result in results[20:30]]), set([21, 22, 23])) - self.assertEqual(len(connections['elasticsearch2'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_manual_iter(self): results = self.rsqs.all() reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = sorted([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, list(range(1, 24))) - self.assertEqual(len(connections['elasticsearch2'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_fill_cache(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all() self.assertEqual(len(results._result_cache), 0) - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) - self.assertEqual(len(connections['elasticsearch2'].queries), 2) + self.assertEqual(len(connections['elasticsearch'].queries), 2) def test_related_cache_is_full(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) self.assertEqual(self.rsqs._cache_is_full(), False) results = self.rsqs.all() fire_the_iterator_and_fill_cache = [result for result in results] self.assertEqual(results._cache_is_full(), True) - self.assertEqual(len(connections['elasticsearch2'].queries), 5) + self.assertEqual(len(connections['elasticsearch'].queries), 5) def test_quotes_regression(self): sqs = self.sqs.auto_query(u"44°48'40''N 20°28'32''E") @@ -1036,26 +1036,26 @@ def setUp(self): super(LiveElasticsearch2SpellingTestCase, self).setUp() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSpellingIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') # Wipe it clean. clear_elasticsearch_index() # Reboot the schema. - self.sb = connections['elasticsearch2'].get_backend() + self.sb = connections['elasticsearch'].get_backend() self.sb.setup() - self.smmi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2SpellingTestCase, self).tearDown() def test_spelling(self): @@ -1074,21 +1074,21 @@ def setUp(self): # Wipe it clean. clear_elasticsearch_index() - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockModelSearchIndex() self.sammi = Elasticsearch2AnotherMockModelSearchIndex() self.ui.build(indexes=[self.smmi, self.sammi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') - self.smmi.update(using='elasticsearch2') - self.sammi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') + self.sammi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2MoreLikeThisTestCase, self).tearDown() def test_more_like_this(self): @@ -1132,30 +1132,30 @@ def setUp(self): super(LiveElasticsearch2AutocompleteTestCase, self).setUp() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2AutocompleteMockModelSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') # Wipe it clean. clear_elasticsearch_index() # Reboot the schema. - self.sb = connections['elasticsearch2'].get_backend() + self.sb = connections['elasticsearch'].get_backend() self.sb.setup() - self.smmi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2AutocompleteTestCase, self).tearDown() def test_build_schema(self): - self.sb = connections['elasticsearch2'].get_backend() + self.sb = connections['elasticsearch'].get_backend() content_name, mapping = self.sb.build_schema(self.ui.all_searchfields()) self.assertEqual(mapping, { 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, @@ -1231,14 +1231,14 @@ def setUp(self): clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.srtsi = Elasticsearch2RoundTripSearchIndex() self.ui.build(indexes=[self.srtsi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') # Fake indexing. mock = MockModel() @@ -1247,7 +1247,7 @@ def setUp(self): def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2RoundTripTestCase, self).tearDown() def test_round_trip(self): @@ -1282,21 +1282,21 @@ def setUp(self): clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockModelSearchIndex() self.sammi = Elasticsearch2AnotherMockModelSearchIndex() self.ui.build(indexes=[self.smmi, self.sammi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') - self.smmi.update(using='elasticsearch2') - self.sammi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') + self.sammi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2PickleTestCase, self).tearDown() def test_pickling(self): @@ -1317,16 +1317,16 @@ def setUp(self): super(Elasticsearch2BoostBackendTestCase, self).setUp() # Wipe it clean. - self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2BoostMockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() self.sample_objs = [] @@ -1345,17 +1345,17 @@ def setUp(self): self.sample_objs.append(mock) def tearDown(self): - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(Elasticsearch2BoostBackendTestCase, self).tearDown() def raw_search(self, query): - return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) def test_boost(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*')['hits']['total'], 4) - results = SearchQuerySet(using='elasticsearch2').filter(SQ(author='daniel') | SQ(editor='daniel')) + results = SearchQuerySet(using='elasticsearch').filter(SQ(author='daniel') | SQ(editor='daniel')) self.assertEqual(set([result.id for result in results]), set([ 'core.afourthmockmodel.4', @@ -1379,12 +1379,12 @@ def test__to_python(self): class RecreateIndexTestCase(TestCase): def setUp(self): self.raw_es = elasticsearch.Elasticsearch( - settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) def test_recreate_index(self): clear_elasticsearch_index() - sb = connections['elasticsearch2'].get_backend() + sb = connections['elasticsearch'].get_backend() sb.silently_fail = True sb.setup() @@ -1410,12 +1410,12 @@ def setUp(self): clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2FacetingMockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() # Force the backend to rebuild the mapping each time. self.sb.existing_mapping = {} @@ -1438,12 +1438,12 @@ def setUp(self): self.sample_objs.append(mock) def tearDown(self): - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(Elasticsearch2FacetingTestCase, self).tearDown() def test_facet(self): self.sb.update(self.smmi, self.sample_objs) - counts = SearchQuerySet('elasticsearch2').facet('author').facet('editor').facet_counts() + counts = SearchQuerySet('elasticsearch').facet('author').facet('editor').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 5), ('Dan Watson', 4), @@ -1452,7 +1452,7 @@ def test_facet(self): ('Perry White', 5), ('George Taylor', 4), ]) - counts = SearchQuerySet('elasticsearch2').filter(content='white').facet('facet_field', + counts = SearchQuerySet('elasticsearch').filter(content='white').facet('facet_field', order='reverse_count').facet_counts() self.assertEqual(counts['fields']['facet_field'], [ ('Dan Watson', 2), @@ -1461,7 +1461,7 @@ def test_facet(self): def test_multiple_narrow(self): self.sb.update(self.smmi, self.sample_objs) - counts = SearchQuerySet('elasticsearch2').narrow('editor_exact:"Perry White"').narrow( + counts = SearchQuerySet('elasticsearch').narrow('editor_exact:"Perry White"').narrow( 'author_exact:"Daniel Lindsley"').facet('author').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 3), @@ -1469,7 +1469,7 @@ def test_multiple_narrow(self): def test_narrow(self): self.sb.update(self.smmi, self.sample_objs) - counts = SearchQuerySet('elasticsearch2').facet('author').facet('editor').narrow( + counts = SearchQuerySet('elasticsearch').facet('author').facet('editor').narrow( 'editor_exact:"Perry White"').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 3), @@ -1484,8 +1484,8 @@ def test_date_facet(self): start = datetime.date(2013, 9, 1) end = datetime.date(2013, 9, 30) # Facet by day - counts = SearchQuerySet('elasticsearch2').date_facet('pub_date', start_date=start, end_date=end, - gap_by='day').facet_counts() + counts = SearchQuerySet('elasticsearch').date_facet('pub_date', start_date=start, end_date=end, + gap_by='day').facet_counts() self.assertEqual(counts['dates']['pub_date'], [ (datetime.datetime(2013, 9, 1), 2), (datetime.datetime(2013, 9, 2), 3), @@ -1493,8 +1493,8 @@ def test_date_facet(self): (datetime.datetime(2013, 9, 4), 2), ]) # By month - counts = SearchQuerySet('elasticsearch2').date_facet('pub_date', start_date=start, end_date=end, - gap_by='month').facet_counts() + counts = SearchQuerySet('elasticsearch').date_facet('pub_date', start_date=start, end_date=end, + gap_by='month').facet_counts() self.assertEqual(counts['dates']['pub_date'], [ (datetime.datetime(2013, 9, 1), 9), ]) diff --git a/test_haystack/elasticsearch2_tests/test_inputs.py b/test_haystack/elasticsearch2_tests/test_inputs.py index 777334fb2..adc87d16d 100644 --- a/test_haystack/elasticsearch2_tests/test_inputs.py +++ b/test_haystack/elasticsearch2_tests/test_inputs.py @@ -10,7 +10,7 @@ class Elasticsearch2InputTestCase(TestCase): def setUp(self): super(Elasticsearch2InputTestCase, self).setUp() - self.query_obj = connections['elasticsearch2'].get_query() + self.query_obj = connections['elasticsearch'].get_query() def test_raw_init(self): raw = inputs.Raw('hello OR there, :you') diff --git a/test_haystack/elasticsearch2_tests/test_query.py b/test_haystack/elasticsearch2_tests/test_query.py index 968180686..65d3cfef0 100644 --- a/test_haystack/elasticsearch2_tests/test_query.py +++ b/test_haystack/elasticsearch2_tests/test_query.py @@ -17,7 +17,7 @@ class Elasticsearch2SearchQueryTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchQueryTestCase, self).setUp() - self.sq = connections['elasticsearch2'].get_query() + self.sq = connections['elasticsearch'].get_query() def test_build_query_all(self): self.assertEqual(self.sq.build_query(), '*:*') @@ -157,7 +157,7 @@ def test_in_filter_values_list(self): self.assertEqual(self.sq.build_query(), u'((why) AND title:("1" OR "2" OR "3"))') def test_narrow_sq(self): - sqs = SearchQuerySet(using='elasticsearch2').narrow(SQ(foo='moof')) + sqs = SearchQuerySet(using='elasticsearch').narrow(SQ(foo='moof')) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.narrow_queries), 1) self.assertEqual(sqs.query.narrow_queries.pop(), 'foo:(moof)') @@ -166,7 +166,7 @@ def test_narrow_sq(self): class Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase, self).setUp() - self.backend = connections['elasticsearch2'].get_backend() + self.backend = connections['elasticsearch'].get_backend() self._elasticsearch_version = elasticsearch.VERSION elasticsearch.VERSION = (0, 9, 9) @@ -189,7 +189,7 @@ def test_build_query_with_dwithin_range(self): class Elasticsearch2SearchQuerySpatialAfterReleaseTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchQuerySpatialAfterReleaseTestCase, self).setUp() - self.backend = connections['elasticsearch2'].get_backend() + self.backend = connections['elasticsearch'].get_backend() self._elasticsearch_version = elasticsearch.VERSION elasticsearch.VERSION = (1, 0, 0) diff --git a/test_haystack/elasticsearch_tests/__init__.py b/test_haystack/elasticsearch_tests/__init__.py index a4cae9bca..b220d7ef6 100644 --- a/test_haystack/elasticsearch_tests/__init__.py +++ b/test_haystack/elasticsearch_tests/__init__.py @@ -1,5 +1,4 @@ -# encoding: utf-8 - +# -*- coding: utf-8 -*- import warnings from django.conf import settings @@ -8,8 +7,12 @@ warnings.simplefilter('ignore', Warning) + def setup(): try: + import elasticsearch + if not ((1, 0, 0) <= elasticsearch.__version__ < (2, 0, 0)): + raise ImportError from elasticsearch import Elasticsearch, ElasticsearchException except ImportError: raise unittest.SkipTest("elasticsearch-py not installed.") @@ -18,5 +21,5 @@ def setup(): try: es.info() except ElasticsearchException as e: - raise unittest.SkipTest("elasticsearch not running on %r" % settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], e) - + raise unittest.SkipTest( + "elasticsearch not running on %r" % settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], e) diff --git a/test_haystack/settings.py b/test_haystack/settings.py index 54cbf500b..bec538476 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -67,12 +67,6 @@ 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, - 'elasticsearch2': { - 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', - 'URL': 'http://127.0.0.1:29200/', - 'INDEX_NAME': 'test_default', - 'INCLUDE_SPELLING': True, - }, 'simple': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', }, @@ -83,6 +77,14 @@ }, } +if os.getenv('VERSION_ES') == ">=2.0.0,<3.0.0": + HAYSTACK_CONNECTIONS['elasticsearch'] = { + 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', + 'URL': '127.0.0.1:9200/', + 'INDEX_NAME': 'test_default', + 'INCLUDE_SPELLING': True, + } + MIDDLEWARE_CLASSES = ('django.middleware.common.CommonMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', diff --git a/tox.ini b/tox.ini index b63dc9c0f..1c71c9bbc 100644 --- a/tox.ini +++ b/tox.ini @@ -1,13 +1,21 @@ [tox] envlist = docs, - py27-django1.8, - py27-django1.9, - py34-django1.8, - py34-django1.9, - py35-django1.8, - py35-django1.9, - pypy-django1.8, - pypy-django1.9, + py27-django1.8-es1.x, + py27-django1.9-es1.x, + py34-django1.8-es1.x, + py34-django1.9-es1.x, + py35-django1.8-es1.x, + py35-django1.9-es1.x, + pypy-django1.8-es1.x, + pypy-django1.9-es1.x, + py27-django1.8-es2.x, + py27-django1.9-es2.x, + py34-django1.8-es2.x, + py34-django1.9-es2.x, + py35-django1.8-es2.x, + py35-django1.9-es2.x, + pypy-django1.8-es2.x, + pypy-django1.9-es2.x, [base] deps = requests @@ -20,54 +28,140 @@ deps = deps = Django>=1.8,<1.9 +[es2.x] +deps = + elasticsearch>=2.0.0,<3.0.0 + +[es1.x] +deps = + elasticsearch>=1.0.0,<2.0.0 + [testenv] commands = python test_haystack/solr_tests/server/wait-for-solr python {toxinidir}/setup.py test -[testenv:pypy-django1.8] +[testenv:pypy-django1.8-es1.x] +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:pypy-django1.9-es1.x] +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:py27-django1.8-es1.x] +basepython = python2.7 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:py27-django1.9-es1.x] +basepython = python2.7 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:py34-django1.8-es1.x] +basepython = python3.4 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:py34-django1.9-es1.x] +basepython = python3.4 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:py35-django1.8-es1.x] +basepython = python3.5 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:py35-django1.9-es1.x] +basepython = python3.5 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:pypy-django1.8-es2.x] +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:pypy-django1.9] +[testenv:pypy-django1.9-es2.x] +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} -[testenv:py27-django1.8] +[testenv:py27-django1.8-es2.x] basepython = python2.7 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:py27-django1.9] +[testenv:py27-django1.9-es2.x] basepython = python2.7 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} -[testenv:py34-django1.8] +[testenv:py34-django1.8-es2.x] basepython = python3.4 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:py34-django1.9] +[testenv:py34-django1.9-es2.x] basepython = python3.4 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} -[testenv:py35-django1.8] +[testenv:py35-django1.8-es2.x] basepython = python3.5 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:py35-django1.9] +[testenv:py35-django1.9-es2.x] basepython = python3.5 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} From 4a75130601ef1f0b937beffa2ff0139a89582cd9 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 21:54:30 +0100 Subject: [PATCH 09/51] Uses a build matrix to test ES 1.x or ES 2.x - set literals on tests --- .../elasticsearch2_tests/test_backend.py | 45 +++++++++---------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py index cdefaacc7..b321a4eb4 100644 --- a/test_haystack/elasticsearch2_tests/test_backend.py +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -417,7 +417,7 @@ def test_search(self): self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*:*')['hits'], 3) - self.assertEqual(set([result.pk for result in self.sb.search('*:*')['results']]), set([u'2', u'1', u'3'])) + self.assertEqual(set([result.pk for result in self.sb.search('*:*')['results']]), {u'2', u'1', u'3'}) self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('Index', highlight=True)['hits'], 3) @@ -434,7 +434,7 @@ def test_search(self): self.assertEqual(results['hits'], 3) self.assertSetEqual( set(results['facets']['fields']['name']), - set([('daniel3', 1), ('daniel2', 1), ('daniel1', 1)]) + {('daniel3', 1), ('daniel2', 1), ('daniel1', 1)} ) self.assertEqual(self.sb.search('', date_facets={ @@ -451,8 +451,8 @@ def test_search(self): self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['queries'], {u'name': 3}) - self.assertEqual(self.sb.search('', narrow_queries=set(['name:daniel1'])), {'hits': 0, 'results': []}) - results = self.sb.search('Index', narrow_queries=set(['name:daniel1'])) + self.assertEqual(self.sb.search('', narrow_queries={'name:daniel1'}), {'hits': 0, 'results': []}) + results = self.sb.search('Index', narrow_queries={'name:daniel1'}) self.assertEqual(results['hits'], 1) # Ensure that swapping the ``result_class`` works. @@ -782,7 +782,7 @@ def test_manual_iter(self): self.assertEqual(len(connections['elasticsearch'].queries), 0) results = set([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, - set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) + {2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20}) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_fill_cache(self): @@ -900,8 +900,8 @@ def test_related_load_all_queryset(self): sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=10)) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs._load_all_querysets), 1) - self.assertEqual(set([obj.object.id for obj in sqs]), set([12, 17, 11, 16, 23, 15, 22, 14, 19, 21, 13, 18, 20])) - self.assertEqual(set([obj.object.id for obj in sqs[10:20]]), set([21, 22, 23])) + self.assertEqual(set([obj.object.id for obj in sqs]), {12, 17, 11, 16, 23, 15, 22, 14, 19, 21, 13, 18, 20}) + self.assertEqual(set([obj.object.id for obj in sqs[10:20]]), {21, 22, 23}) def test_related_iter(self): reset_search_queries() @@ -909,7 +909,7 @@ def test_related_iter(self): sqs = self.rsqs.all() results = set([int(result.pk) for result in sqs]) self.assertEqual(results, - set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) + {2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20}) self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_slice(self): @@ -928,7 +928,7 @@ def test_related_slice(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') - self.assertEqual(set([int(result.pk) for result in results[20:30]]), set([21, 22, 23])) + self.assertEqual(set([int(result.pk) for result in results[20:30]]), {21, 22, 23}) self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_manual_iter(self): @@ -1095,19 +1095,19 @@ def test_more_like_this(self): mlt = self.sqs.more_like_this(MockModel.objects.get(pk=1)) results = [result.pk for result in mlt] self.assertEqual(mlt.count(), 11) - self.assertEqual(set(results), set([u'10', u'5', u'2', u'21', u'4', u'6', u'23', u'9', u'14'])) + self.assertEqual(set(results), {u'10', u'5', u'2', u'21', u'4', u'6', u'23', u'9', u'14'}) self.assertEqual(len(results), 10) alt_mlt = self.sqs.filter(name='daniel3').more_like_this(MockModel.objects.get(pk=2)) results = [result.pk for result in alt_mlt] self.assertEqual(alt_mlt.count(), 9) - self.assertEqual(set(results), set([u'2', u'16', u'3', u'19', u'4', u'17', u'10', u'22', u'23'])) + self.assertEqual(set(results), {u'2', u'16', u'3', u'19', u'4', u'17', u'10', u'22', u'23'}) self.assertEqual(len(results), 9) alt_mlt_with_models = self.sqs.models(MockModel).more_like_this(MockModel.objects.get(pk=1)) results = [result.pk for result in alt_mlt_with_models] self.assertEqual(alt_mlt_with_models.count(), 10) - self.assertEqual(set(results), set([u'10', u'5', u'21', u'2', u'4', u'6', u'23', u'9', u'14', u'16'])) + self.assertEqual(set(results), {u'10', u'5', u'21', u'2', u'4', u'6', u'23', u'9', u'14', u'16'}) self.assertEqual(len(results), 10) if hasattr(MockModel.objects, 'defer'): @@ -1184,8 +1184,8 @@ def test_build_schema(self): def test_autocomplete(self): autocomplete = self.sqs.autocomplete(text_auto='mod') self.assertEqual(autocomplete.count(), 16) - self.assertEqual(set([result.pk for result in autocomplete]), set( - ['1', '12', '6', '14', '7', '4', '23', '17', '13', '18', '20', '22', '19', '15', '10', '2'])) + self.assertEqual(set([result.pk for result in autocomplete]), + {'1', '12', '6', '14', '7', '4', '23', '17', '13', '18', '20', '22', '19', '15', '10', '2'}) self.assertTrue('mod' in autocomplete[0].text.lower()) self.assertTrue('mod' in autocomplete[1].text.lower()) self.assertTrue('mod' in autocomplete[2].text.lower()) @@ -1197,7 +1197,7 @@ def test_autocomplete(self): autocomplete_2 = self.sqs.autocomplete(text_auto='your mod') self.assertEqual(autocomplete_2.count(), 13) self.assertEqual(set([result.pk for result in autocomplete_2]), - set(['1', '6', '2', '14', '12', '13', '10', '19', '4', '20', '23', '22', '15'])) + {'1', '6', '2', '14', '12', '13', '10', '19', '4', '20', '23', '22', '15'}) map_results = {result.pk: result for result in autocomplete_2} self.assertTrue('your' in map_results['1'].text.lower()) self.assertTrue('mod' in map_results['1'].text.lower()) @@ -1209,18 +1209,18 @@ def test_autocomplete(self): # Test multiple fields. autocomplete_3 = self.sqs.autocomplete(text_auto='Django', name_auto='dan') self.assertEqual(autocomplete_3.count(), 4) - self.assertEqual(set([result.pk for result in autocomplete_3]), set(['12', '1', '22', '14'])) + self.assertEqual(set([result.pk for result in autocomplete_3]), {'12', '1', '22', '14'}) self.assertEqual(len([result.pk for result in autocomplete_3]), 4) # Test numbers in phrases autocomplete_4 = self.sqs.autocomplete(text_auto='Jen 867') self.assertEqual(autocomplete_4.count(), 1) - self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) + self.assertEqual(set([result.pk for result in autocomplete_4]), {'20'}) # Test numbers alone autocomplete_4 = self.sqs.autocomplete(text_auto='867') self.assertEqual(autocomplete_4.count(), 1) - self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) + self.assertEqual(set([result.pk for result in autocomplete_4]), {'20'}) class LiveElasticsearch2RoundTripTestCase(TestCase): @@ -1357,12 +1357,9 @@ def test_boost(self): results = SearchQuerySet(using='elasticsearch').filter(SQ(author='daniel') | SQ(editor='daniel')) - self.assertEqual(set([result.id for result in results]), set([ - 'core.afourthmockmodel.4', - 'core.afourthmockmodel.3', - 'core.afourthmockmodel.1', - 'core.afourthmockmodel.2' - ])) + self.assertEqual(set([result.id for result in results]), + {'core.afourthmockmodel.4', 'core.afourthmockmodel.3', 'core.afourthmockmodel.1', + 'core.afourthmockmodel.2'}) def test__to_python(self): self.assertEqual(self.sb._to_python('abc'), 'abc') From 69d83025cdcc1bc1bfbfc7341730dbf3aed6e38c Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 22:33:27 +0100 Subject: [PATCH 10/51] Uses a build matrix to test ES 1.x or ES 2.x - set literals on tests --- test_haystack/elasticsearch2_tests/test_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_haystack/elasticsearch2_tests/test_query.py b/test_haystack/elasticsearch2_tests/test_query.py index 65d3cfef0..c66191c59 100644 --- a/test_haystack/elasticsearch2_tests/test_query.py +++ b/test_haystack/elasticsearch2_tests/test_query.py @@ -105,7 +105,7 @@ def test_build_query_in_filter_datetime(self): def test_build_query_in_with_set(self): self.sq.add_filter(SQ(content='why')) - self.sq.add_filter(SQ(title__in=set(["A Famous Paper", "An Infamous Article"]))) + self.sq.add_filter(SQ(title__in={"A Famous Paper", "An Infamous Article"})) self.assertTrue('((why) AND title:(' in self.sq.build_query()) self.assertTrue('"A Famous Paper"' in self.sq.build_query()) self.assertTrue('"An Infamous Article"' in self.sq.build_query()) From 76ad88dc1690a0575d7c9a37d533735439cb3e37 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 23:49:50 +0100 Subject: [PATCH 11/51] Adds delay on test_multiprocessing test_haystack.solr_tests.test_management_commands.ManagementCommandTestCase#test_multiprocessing --- test_haystack/solr_tests/test_management_commands.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_haystack/solr_tests/test_management_commands.py b/test_haystack/solr_tests/test_management_commands.py index f686d9741..578349112 100644 --- a/test_haystack/solr_tests/test_management_commands.py +++ b/test_haystack/solr_tests/test_management_commands.py @@ -3,6 +3,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals import datetime +import time from tempfile import mkdtemp import pysolr @@ -160,6 +161,7 @@ def test_multiprocessing(self): # TODO: Watch the output, make sure there are multiple pids. call_command('update_index', verbosity=2, workers=2, batchsize=5) + time.sleep(2) self.assertEqual(self.solr.search('*:*').hits, 23) call_command('clear_index', interactive=False, verbosity=0) From c262e1309bbdfba9f9e4b8768edffa709d8add3e Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Fri, 25 Mar 2016 00:01:39 +0100 Subject: [PATCH 12/51] Revert: Adds delay on test_multiprocessing test_haystack.solr_tests.test_management_commands.ManagementCommandTestCase#test_multiprocessing --- test_haystack/solr_tests/test_management_commands.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test_haystack/solr_tests/test_management_commands.py b/test_haystack/solr_tests/test_management_commands.py index 578349112..f686d9741 100644 --- a/test_haystack/solr_tests/test_management_commands.py +++ b/test_haystack/solr_tests/test_management_commands.py @@ -3,7 +3,6 @@ from __future__ import absolute_import, division, print_function, unicode_literals import datetime -import time from tempfile import mkdtemp import pysolr @@ -161,7 +160,6 @@ def test_multiprocessing(self): # TODO: Watch the output, make sure there are multiple pids. call_command('update_index', verbosity=2, workers=2, batchsize=5) - time.sleep(2) self.assertEqual(self.solr.search('*:*').hits, 23) call_command('clear_index', interactive=False, verbosity=0) From 24668f95453268fe2d09848018b3e506996d5a65 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Fri, 25 Mar 2016 12:58:02 +0100 Subject: [PATCH 13/51] Removes pool.join on command update_index --- haystack/management/commands/update_index.py | 1 - 1 file changed, 1 deletion(-) diff --git a/haystack/management/commands/update_index.py b/haystack/management/commands/update_index.py index 9be5288dc..cf4f44559 100755 --- a/haystack/management/commands/update_index.py +++ b/haystack/management/commands/update_index.py @@ -238,7 +238,6 @@ def update_backend(self, label, using): pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() - pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: From 3b187371f0bd09f0fe498269186077b5395f5d60 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Fri, 25 Mar 2016 13:04:57 +0100 Subject: [PATCH 14/51] Revert: Test multiprocessing with context manager --- haystack/management/commands/update_index.py | 1 + 1 file changed, 1 insertion(+) diff --git a/haystack/management/commands/update_index.py b/haystack/management/commands/update_index.py index cf4f44559..9be5288dc 100755 --- a/haystack/management/commands/update_index.py +++ b/haystack/management/commands/update_index.py @@ -238,6 +238,7 @@ def update_backend(self, label, using): pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() + pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: From 8bf05b416ba689162c6553e3bb3ac2e94864b0c2 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 16:18:19 +0100 Subject: [PATCH 15/51] Elasticsearch 2.x support --- haystack/backends/elasticsearch2_backend.py | 1045 ++++++++++++ setup.py | 2 +- .../elasticsearch2_tests/__init__.py | 22 + .../elasticsearch2_tests/test_backend.py | 1500 +++++++++++++++++ .../elasticsearch2_tests/test_inputs.py | 85 + .../elasticsearch2_tests/test_query.py | 209 +++ test_haystack/settings.py | 6 + 7 files changed, 2868 insertions(+), 1 deletion(-) create mode 100644 haystack/backends/elasticsearch2_backend.py create mode 100644 test_haystack/elasticsearch2_tests/__init__.py create mode 100644 test_haystack/elasticsearch2_tests/test_backend.py create mode 100644 test_haystack/elasticsearch2_tests/test_inputs.py create mode 100644 test_haystack/elasticsearch2_tests/test_query.py diff --git a/haystack/backends/elasticsearch2_backend.py b/haystack/backends/elasticsearch2_backend.py new file mode 100644 index 000000000..4c92d4c03 --- /dev/null +++ b/haystack/backends/elasticsearch2_backend.py @@ -0,0 +1,1045 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals + +import datetime +import re +import warnings + +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured +from django.utils import six + +import haystack +from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query +from haystack.constants import DEFAULT_OPERATOR, DJANGO_CT, DJANGO_ID, FUZZY_MAX_EXPANSIONS, FUZZY_MIN_SIM, ID +from haystack.exceptions import MissingDependency, MoreLikeThisError, SkipDocument +from haystack.inputs import Clean, Exact, PythonData, Raw +from haystack.models import SearchResult +from haystack.utils import get_identifier, get_model_ct +from haystack.utils import log as logging +from haystack.utils.app_loading import haystack_get_model + +try: + import elasticsearch + if not ((2, 0, 0) <= elasticsearch.__version__ < (3, 0, 0)): + raise ImportError + from elasticsearch.helpers import bulk, scan + from elasticsearch.exceptions import NotFoundError +except ImportError: + raise MissingDependency("The 'elasticsearch2' backend requires the installation of 'elasticsearch>=2.0.0,<3.0.0'. Please refer to the documentation.") + + +DATETIME_REGEX = re.compile( + r'^(?P\d{4})-(?P\d{2})-(?P\d{2})T' + r'(?P\d{2}):(?P\d{2}):(?P\d{2})(\.\d+)?$') + + +class Elasticsearch2SearchBackend(BaseSearchBackend): + # Word reserved by Elasticsearch for special use. + RESERVED_WORDS = ( + 'AND', + 'NOT', + 'OR', + 'TO', + ) + + # Characters reserved by Elasticsearch for special use. + # The '\\' must come first, so as not to overwrite the other slash replacements. + RESERVED_CHARACTERS = ( + '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', + '[', ']', '^', '"', '~', '*', '?', ':', '/', + ) + + # Settings to add an n-gram & edge n-gram analyzer. + DEFAULT_SETTINGS = { + 'settings': { + "analysis": { + "analyzer": { + "ngram_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["haystack_ngram", "lowercase"] + }, + "edgengram_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["haystack_edgengram", "lowercase"] + } + }, + "tokenizer": { + "haystack_ngram_tokenizer": { + "type": "nGram", + "min_gram": 3, + "max_gram": 15, + }, + "haystack_edgengram_tokenizer": { + "type": "edgeNGram", + "min_gram": 2, + "max_gram": 15, + "side": "front" + } + }, + "filter": { + "haystack_ngram": { + "type": "nGram", + "min_gram": 3, + "max_gram": 15 + }, + "haystack_edgengram": { + "type": "edgeNGram", + "min_gram": 2, + "max_gram": 15 + } + } + } + } + } + + def __init__(self, connection_alias, **connection_options): + super(Elasticsearch2SearchBackend, self).__init__(connection_alias, **connection_options) + + if 'URL' not in connection_options: + raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias) + + if 'INDEX_NAME' not in connection_options: + raise ImproperlyConfigured("You must specify a 'INDEX_NAME' in your settings for connection '%s'." % connection_alias) + + self.conn = elasticsearch.Elasticsearch(connection_options['URL'], timeout=self.timeout, **connection_options.get('KWARGS', {})) + self.index_name = connection_options['INDEX_NAME'] + self.log = logging.getLogger('haystack') + self.setup_complete = False + self.existing_mapping = {} + self.content_field_name = None + + def setup(self): + """ + Defers loading until needed. + """ + # Get the existing mapping & cache it. We'll compare it + # during the ``update`` & if it doesn't match, we'll put the new + # mapping. + try: + self.existing_mapping = self.conn.indices.get_mapping(index=self.index_name) + except NotFoundError: + pass + except Exception: + if not self.silently_fail: + raise + + unified_index = haystack.connections[self.connection_alias].get_unified_index() + self.content_field_name, field_mapping = self.build_schema(unified_index.all_searchfields()) + current_mapping = { + 'modelresult': { + 'properties': field_mapping, + } + } + + if current_mapping != self.existing_mapping: + try: + # Make sure the index is there first. + self.conn.indices.create(index=self.index_name, body=self.DEFAULT_SETTINGS, ignore=400) + self.conn.indices.put_mapping(index=self.index_name, doc_type='modelresult', body=current_mapping) + self.existing_mapping = current_mapping + except Exception: + if not self.silently_fail: + raise + + self.setup_complete = True + + def update(self, index, iterable, commit=True): + """ + Updates the backend when given a SearchIndex and a collection of + documents. + + :param index: The SearchIndex to update. + :param iterable: The collection of documents. + :param commit: True to refresh the search index after the update. + """ + if not self.setup_complete: + try: + self.setup() + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to add documents to Elasticsearch: %s", e, exc_info=True) + return + + prepped_docs = [] + + for obj in iterable: + try: + prepped_data = index.full_prepare(obj) + final_data = {} + + # Convert the data to make sure it's happy. + for key, value in prepped_data.items(): + final_data[key] = self._from_python(value) + final_data['_id'] = final_data[ID] + + prepped_docs.append(final_data) + except SkipDocument: + self.log.debug(u"Indexing for object `%s` skipped", obj) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + # We'll log the object identifier but won't include the actual object + # to avoid the possibility of that generating encoding errors while + # processing the log message: + self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, + extra={"data": {"index": index, + "object": get_identifier(obj)}}) + + bulk(self.conn, prepped_docs, index=self.index_name, doc_type='modelresult') + + if commit: + self.conn.indices.refresh(index=self.index_name) + + def remove(self, obj_or_string, commit=True): + """ + Removes a document/object from the backend. Can be either a model + instance or the identifier (i.e. ``app_name.model_name.id``) in the + event the object no longer exists. + + :param obj_or_string: The model instance or the identifier. + :param commit: True to refresh the search index after the remove. + """ + doc_id = get_identifier(obj_or_string) + + if not self.setup_complete: + try: + self.setup() + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, + exc_info=True) + return + + try: + self.conn.delete(index=self.index_name, doc_type='modelresult', id=doc_id, ignore=404) + + if commit: + self.conn.indices.refresh(index=self.index_name) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, exc_info=True) + + def clear(self, models=None, commit=True): + """ + Clears the backend of all documents/objects for a collection of models. + + :param models: List or tuple of models to clear. + :param commit: Not used. + """ + if models is not None: + assert isinstance(models, (list, tuple)) + + try: + if models is None: + self.conn.indices.delete(index=self.index_name, ignore=404) + self.setup_complete = False + self.existing_mapping = {} + self.content_field_name = None + else: + models_to_delete = [] + + for model in models: + models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model))) + + # Delete using scroll API + query = {'query': {'query_string': {'query': " OR ".join(models_to_delete)}}} + generator = scan(self.conn, query=query, index=self.index_name, doc_type='modelresult') + actions = ({ + '_op_type': 'delete', + '_id': doc['_id'], + } for doc in generator) + bulk(self.conn, actions=actions, index=self.index_name, doc_type='modelresult') + self.conn.indices.refresh(index=self.index_name) + + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + if models is not None: + self.log.error("Failed to clear Elasticsearch index of models '%s': %s", + ','.join(models_to_delete), e, exc_info=True) + else: + self.log.error("Failed to clear Elasticsearch index: %s", e, exc_info=True) + + def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, + fields='', highlight=False, facets=None, + date_facets=None, query_facets=None, + narrow_queries=None, spelling_query=None, + within=None, dwithin=None, distance_point=None, + models=None, limit_to_registered_models=None, + result_class=None): + index = haystack.connections[self.connection_alias].get_unified_index() + content_field = index.document_field + + if query_string == '*:*': + kwargs = { + 'query': { + "match_all": {} + }, + } + else: + kwargs = { + 'query': { + 'query_string': { + 'default_field': content_field, + 'default_operator': DEFAULT_OPERATOR, + 'query': query_string, + 'analyze_wildcard': True, + 'auto_generate_phrase_queries': True, + 'fuzzy_min_sim': FUZZY_MIN_SIM, + 'fuzzy_max_expansions': FUZZY_MAX_EXPANSIONS, + }, + }, + } + + # so far, no filters + filters = [] + + if fields: + if isinstance(fields, (list, set)): + fields = " ".join(fields) + + kwargs['fields'] = fields + + if sort_by is not None: + order_list = [] + for field, direction in sort_by: + if field == 'distance' and distance_point: + # Do the geo-enabled sort. + lng, lat = distance_point['point'].get_coords() + sort_kwargs = { + "_geo_distance": { + distance_point['field']: [lng, lat], + "order": direction, + "unit": "km" + } + } + else: + if field == 'distance': + warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") + + # Regular sorting. + sort_kwargs = {field: {'order': direction}} + + order_list.append(sort_kwargs) + + kwargs['sort'] = order_list + + if start_offset is not None: + kwargs['from'] = start_offset + + if end_offset is not None: + kwargs['size'] = end_offset - start_offset + + if highlight is True: + kwargs['highlight'] = { + 'fields': { + content_field: {'store': 'yes'}, + } + } + + if self.include_spelling: + kwargs['suggest'] = { + 'suggest': { + 'text': spelling_query or query_string, + 'term': { + # Using content_field here will result in suggestions of stemmed words. + 'field': '_all', + }, + }, + } + + if narrow_queries is None: + narrow_queries = set() + + if facets is not None: + kwargs.setdefault('aggs', {}) + + for facet_fieldname, extra_options in facets.items(): + facet_options = { + 'meta': { + '_type': 'terms', + }, + 'terms': { + 'field': facet_fieldname, + } + } + if 'order' in extra_options: + facet_options['meta']['order'] = extra_options.pop('order') + # Special cases for options applied at the facet level (not the terms level). + if extra_options.pop('global_scope', False): + # Renamed "global_scope" since "global" is a python keyword. + facet_options['global'] = True + if 'facet_filter' in extra_options: + facet_options['facet_filter'] = extra_options.pop('facet_filter') + facet_options['terms'].update(extra_options) + kwargs['aggs'][facet_fieldname] = facet_options + + if date_facets is not None: + kwargs.setdefault('aggs', {}) + + for facet_fieldname, value in date_facets.items(): + # Need to detect on gap_by & only add amount if it's more than one. + interval = value.get('gap_by').lower() + + # Need to detect on amount (can't be applied on months or years). + if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'): + # Just the first character is valid for use. + interval = "%s%s" % (value['gap_amount'], interval[:1]) + + kwargs['aggs'][facet_fieldname] = { + 'meta': { + '_type': 'date_histogram', + }, + 'date_histogram': { + 'field': facet_fieldname, + 'interval': interval, + }, + 'aggs': { + facet_fieldname: { + 'date_range': { + 'field': facet_fieldname, + 'ranges': [ + { + 'from': self._from_python(value.get('start_date')), + 'to': self._from_python(value.get('end_date')), + } + ] + } + } + } + } + + if query_facets is not None: + kwargs.setdefault('aggs', {}) + + for facet_fieldname, value in query_facets: + kwargs['aggs'][facet_fieldname] = { + 'meta': { + '_type': 'query', + }, + 'filter': { + 'query_string': { + 'query': value, + } + }, + } + + if limit_to_registered_models is None: + limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + + if models and len(models): + model_choices = sorted(get_model_ct(model) for model in models) + elif limit_to_registered_models: + # Using narrow queries, limit the results to only models handled + # with the current routers. + model_choices = self.build_models_list() + else: + model_choices = [] + + if len(model_choices) > 0: + filters.append({"terms": {DJANGO_CT: model_choices}}) + + for q in narrow_queries: + filters.append({ + 'query_string': { + 'query': q + } + }) + + if within is not None: + from haystack.utils.geo import generate_bounding_box + + ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2']) + within_filter = { + "geo_bounding_box": { + within['field']: { + "top_left": { + "lat": north, + "lon": west + }, + "bottom_right": { + "lat": south, + "lon": east + } + } + }, + } + filters.append(within_filter) + + if dwithin is not None: + lng, lat = dwithin['point'].get_coords() + + # NB: the 1.0.0 release of elasticsearch introduce an + # incompatible change on the distance filter formating + if elasticsearch.VERSION >= (1, 0, 0): + distance = "%(dist).6f%(unit)s" % { + 'dist': dwithin['distance'].km, + 'unit': "km" + } + else: + distance = dwithin['distance'].km + + dwithin_filter = { + "geo_distance": { + "distance": distance, + dwithin['field']: { + "lat": lat, + "lon": lng + } + } + } + filters.append(dwithin_filter) + + # if we want to filter, change the query type to filteres + if filters: + kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}} + if len(filters) == 1: + kwargs['query']['filtered']["filter"] = filters[0] + else: + kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}} + + return kwargs + + @log_query + def search(self, query_string, **kwargs): + if len(query_string) == 0: + return { + 'results': [], + 'hits': 0, + } + + if not self.setup_complete: + self.setup() + + search_kwargs = self.build_search_kwargs(query_string, **kwargs) + search_kwargs['from'] = kwargs.get('start_offset', 0) + + order_fields = set() + for order in search_kwargs.get('sort', []): + for key in order.keys(): + order_fields.add(key) + + geo_sort = '_geo_distance' in order_fields + + end_offset = kwargs.get('end_offset') + start_offset = kwargs.get('start_offset', 0) + if end_offset is not None and end_offset > start_offset: + search_kwargs['size'] = end_offset - start_offset + + try: + raw_results = self.conn.search(body=search_kwargs, + index=self.index_name, + doc_type='modelresult', + _source=True) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e, exc_info=True) + raw_results = {} + + return self._process_results(raw_results, + highlight=kwargs.get('highlight'), + result_class=kwargs.get('result_class', SearchResult), + distance_point=kwargs.get('distance_point'), + geo_sort=geo_sort) + + def more_like_this(self, model_instance, additional_query_string=None, + start_offset=0, end_offset=None, models=None, + limit_to_registered_models=None, result_class=None, **kwargs): + from haystack import connections + + if not self.setup_complete: + self.setup() + + # Deferred models will have a different class ("RealClass_Deferred_fieldname") + # which won't be in our registry: + model_klass = model_instance._meta.concrete_model + + index = connections[self.connection_alias].get_unified_index().get_index(model_klass) + field_name = index.get_content_field() + params = {} + + if start_offset is not None: + params['from_'] = start_offset + + if end_offset is not None: + params['size'] = end_offset - start_offset + + doc_id = get_identifier(model_instance) + + try: + # More like this Query + # https://www.elastic.co/guide/en/elasticsearch/reference/2.2/query-dsl-mlt-query.html + mlt_query = { + 'query': { + 'more_like_this': { + 'fields': [field_name], + 'like': [{ + "_id": doc_id + }] + } + } + } + + narrow_queries = [] + + if additional_query_string and additional_query_string != '*:*': + additional_filter = { + "query": { + "query_string": { + "query": additional_query_string + } + } + } + narrow_queries.append(additional_filter) + + if limit_to_registered_models is None: + limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + + if models and len(models): + model_choices = sorted(get_model_ct(model) for model in models) + elif limit_to_registered_models: + # Using narrow queries, limit the results to only models handled + # with the current routers. + model_choices = self.build_models_list() + else: + model_choices = [] + + if len(model_choices) > 0: + model_filter = {"terms": {DJANGO_CT: model_choices}} + narrow_queries.append(model_filter) + + if len(narrow_queries) > 0: + mlt_query = { + "query": { + "filtered": { + 'query': mlt_query['query'], + 'filter': { + 'bool': { + 'must': list(narrow_queries) + } + } + } + } + } + + raw_results = self.conn.search( + body=mlt_query, + index=self.index_name, + doc_type='modelresult', + _source=True, **params) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to fetch More Like This from Elasticsearch for document '%s': %s", + doc_id, e, exc_info=True) + raw_results = {} + + return self._process_results(raw_results, result_class=result_class) + + def _process_results(self, raw_results, highlight=False, + result_class=None, distance_point=None, + geo_sort=False): + from haystack import connections + results = [] + hits = raw_results.get('hits', {}).get('total', 0) + facets = {} + spelling_suggestion = None + + if result_class is None: + result_class = SearchResult + + if self.include_spelling and 'suggest' in raw_results: + raw_suggest = raw_results['suggest'].get('suggest') + if raw_suggest: + spelling_suggestion = ' '.join([word['text'] if len(word['options']) == 0 else word['options'][0]['text'] for word in raw_suggest]) + + if 'aggregations' in raw_results: + facets = { + 'fields': {}, + 'dates': {}, + 'queries': {}, + } + + for facet_fieldname, facet_info in raw_results['aggregations'].items(): + facet_type = facet_info['meta']['_type'] + if facet_type == 'terms': + facets['fields'][facet_fieldname] = [(individual['key'], individual['doc_count']) for individual in facet_info['buckets']] + if 'order' in facet_info['meta']: + if facet_info['meta']['order'] == 'reverse_count': + srt = sorted(facets['fields'][facet_fieldname], key=lambda x: x[1]) + facets['fields'][facet_fieldname] = srt + elif facet_type == 'date_histogram': + # Elasticsearch provides UTC timestamps with an extra three + # decimals of precision, which datetime barfs on. + facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['key'] / 1000), individual['doc_count']) for individual in facet_info['buckets']] + elif facet_type == 'query': + facets['queries'][facet_fieldname] = facet_info['doc_count'] + + unified_index = connections[self.connection_alias].get_unified_index() + indexed_models = unified_index.get_indexed_models() + content_field = unified_index.document_field + + for raw_result in raw_results.get('hits', {}).get('hits', []): + source = raw_result['_source'] + app_label, model_name = source[DJANGO_CT].split('.') + additional_fields = {} + model = haystack_get_model(app_label, model_name) + + if model and model in indexed_models: + for key, value in source.items(): + index = unified_index.get_index(model) + string_key = str(key) + + if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): + additional_fields[string_key] = index.fields[string_key].convert(value) + else: + additional_fields[string_key] = self._to_python(value) + + del(additional_fields[DJANGO_CT]) + del(additional_fields[DJANGO_ID]) + + if 'highlight' in raw_result: + additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '') + + if distance_point: + additional_fields['_point_of_origin'] = distance_point + + if geo_sort and raw_result.get('sort'): + from haystack.utils.geo import Distance + additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0])) + else: + additional_fields['_distance'] = None + + result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields) + results.append(result) + else: + hits -= 1 + + return { + 'results': results, + 'hits': hits, + 'facets': facets, + 'spelling_suggestion': spelling_suggestion, + } + + def build_schema(self, fields): + content_field_name = '' + mapping = { + DJANGO_CT: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, + DJANGO_ID: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, + } + + for field_name, field_class in fields.items(): + field_mapping = FIELD_MAPPINGS.get(field_class.field_type, DEFAULT_FIELD_MAPPING).copy() + if field_class.boost != 1.0: + field_mapping['boost'] = field_class.boost + + if field_class.document is True: + content_field_name = field_class.index_fieldname + + # Do this last to override `text` fields. + if field_mapping['type'] == 'string': + if field_class.indexed is False or hasattr(field_class, 'facet_for'): + field_mapping['index'] = 'not_analyzed' + del field_mapping['analyzer'] + + mapping[field_class.index_fieldname] = field_mapping + + return content_field_name, mapping + + def _iso_datetime(self, value): + """ + If value appears to be something datetime-like, return it in ISO format. + + Otherwise, return None. + """ + if hasattr(value, 'strftime'): + if hasattr(value, 'hour'): + return value.isoformat() + else: + return '%sT00:00:00' % value.isoformat() + + def _from_python(self, value): + """Convert more Python data types to ES-understandable JSON.""" + iso = self._iso_datetime(value) + if iso: + return iso + elif isinstance(value, six.binary_type): + # TODO: Be stricter. + return six.text_type(value, errors='replace') + elif isinstance(value, set): + return list(value) + return value + + def _to_python(self, value): + """Convert values from ElasticSearch to native Python values.""" + if isinstance(value, (int, float, complex, list, tuple, bool)): + return value + + if isinstance(value, six.string_types): + possible_datetime = DATETIME_REGEX.search(value) + + if possible_datetime: + date_values = possible_datetime.groupdict() + + for dk, dv in date_values.items(): + date_values[dk] = int(dv) + + return datetime.datetime( + date_values['year'], date_values['month'], + date_values['day'], date_values['hour'], + date_values['minute'], date_values['second']) + + try: + # This is slightly gross but it's hard to tell otherwise what the + # string's original type might have been. Be careful who you trust. + converted_value = eval(value) + + # Try to handle most built-in types. + if isinstance( + converted_value, + (int, list, tuple, set, dict, float, complex)): + return converted_value + except Exception: + # If it fails (SyntaxError or its ilk) or we don't trust it, + # continue on. + pass + + return value + +# DRL_FIXME: Perhaps move to something where, if none of these +# match, call a custom method on the form that returns, per-backend, +# the right type of storage? +DEFAULT_FIELD_MAPPING = {'type': 'string', 'analyzer': 'snowball'} +FIELD_MAPPINGS = { + 'edge_ngram': {'type': 'string', 'analyzer': 'edgengram_analyzer'}, + 'ngram': {'type': 'string', 'analyzer': 'ngram_analyzer'}, + 'date': {'type': 'date'}, + 'datetime': {'type': 'date'}, + + 'location': {'type': 'geo_point'}, + 'boolean': {'type': 'boolean'}, + 'float': {'type': 'float'}, + 'long': {'type': 'long'}, + 'integer': {'type': 'long'}, +} + + +# Sucks that this is almost an exact copy of what's in the Solr backend, +# but we can't import due to dependencies. +class Elasticsearch2SearchQuery(BaseSearchQuery): + def matching_all_fragment(self): + return '*:*' + + def build_query_fragment(self, field, filter_type, value): + from haystack import connections + query_frag = '' + + if not hasattr(value, 'input_type_name'): + # Handle when we've got a ``ValuesListQuerySet``... + if hasattr(value, 'values_list'): + value = list(value) + + if isinstance(value, six.string_types): + # It's not an ``InputType``. Assume ``Clean``. + value = Clean(value) + else: + value = PythonData(value) + + # Prepare the query using the InputType. + prepared_value = value.prepare(self) + + if not isinstance(prepared_value, (set, list, tuple)): + # Then convert whatever we get back to what pysolr wants if needed. + prepared_value = self.backend._from_python(prepared_value) + + # 'content' is a special reserved word, much like 'pk' in + # Django's ORM layer. It indicates 'no special field'. + if field == 'content': + index_fieldname = '' + else: + index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field) + + filter_types = { + 'contains': u'%s', + 'startswith': u'%s*', + 'exact': u'%s', + 'gt': u'{%s TO *}', + 'gte': u'[%s TO *]', + 'lt': u'{* TO %s}', + 'lte': u'[* TO %s]', + 'fuzzy': u'%s~', + } + + if value.post_process is False: + query_frag = prepared_value + else: + if filter_type in ['contains', 'startswith', 'fuzzy']: + if value.input_type_name == 'exact': + query_frag = prepared_value + else: + # Iterate over terms & incorportate the converted form of each into the query. + terms = [] + + if isinstance(prepared_value, six.string_types): + for possible_value in prepared_value.split(' '): + terms.append(filter_types[filter_type] % self.backend._from_python(possible_value)) + else: + terms.append(filter_types[filter_type] % self.backend._from_python(prepared_value)) + + if len(terms) == 1: + query_frag = terms[0] + else: + query_frag = u"(%s)" % " AND ".join(terms) + elif filter_type == 'in': + in_options = [] + + for possible_value in prepared_value: + in_options.append(u'"%s"' % self.backend._from_python(possible_value)) + + query_frag = u"(%s)" % " OR ".join(in_options) + elif filter_type == 'range': + start = self.backend._from_python(prepared_value[0]) + end = self.backend._from_python(prepared_value[1]) + query_frag = u'["%s" TO "%s"]' % (start, end) + elif filter_type == 'exact': + if value.input_type_name == 'exact': + query_frag = prepared_value + else: + prepared_value = Exact(prepared_value).prepare(self) + query_frag = filter_types[filter_type] % prepared_value + else: + if value.input_type_name != 'exact': + prepared_value = Exact(prepared_value).prepare(self) + + query_frag = filter_types[filter_type] % prepared_value + + if len(query_frag) and not isinstance(value, Raw): + if not query_frag.startswith('(') and not query_frag.endswith(')'): + query_frag = "(%s)" % query_frag + + return u"%s%s" % (index_fieldname, query_frag) + + def build_alt_parser_query(self, parser_name, query_string='', **kwargs): + if query_string: + kwargs['v'] = query_string + + kwarg_bits = [] + + for key in sorted(kwargs.keys()): + if isinstance(kwargs[key], six.string_types) and ' ' in kwargs[key]: + kwarg_bits.append(u"%s='%s'" % (key, kwargs[key])) + else: + kwarg_bits.append(u"%s=%s" % (key, kwargs[key])) + + return u"{!%s %s}" % (parser_name, ' '.join(kwarg_bits)) + + def build_params(self, spelling_query=None, **kwargs): + search_kwargs = { + 'start_offset': self.start_offset, + 'result_class': self.result_class + } + order_by_list = None + + if self.order_by: + if order_by_list is None: + order_by_list = [] + + for field in self.order_by: + direction = 'asc' + if field.startswith('-'): + direction = 'desc' + field = field[1:] + order_by_list.append((field, direction)) + + search_kwargs['sort_by'] = order_by_list + + if self.date_facets: + search_kwargs['date_facets'] = self.date_facets + + if self.distance_point: + search_kwargs['distance_point'] = self.distance_point + + if self.dwithin: + search_kwargs['dwithin'] = self.dwithin + + if self.end_offset is not None: + search_kwargs['end_offset'] = self.end_offset + + if self.facets: + search_kwargs['facets'] = self.facets + + if self.fields: + search_kwargs['fields'] = self.fields + + if self.highlight: + search_kwargs['highlight'] = self.highlight + + if self.models: + search_kwargs['models'] = self.models + + if self.narrow_queries: + search_kwargs['narrow_queries'] = self.narrow_queries + + if self.query_facets: + search_kwargs['query_facets'] = self.query_facets + + if self.within: + search_kwargs['within'] = self.within + + if spelling_query: + search_kwargs['spelling_query'] = spelling_query + + return search_kwargs + + def run(self, spelling_query=None, **kwargs): + """Builds and executes the query. Returns a list of search results.""" + final_query = self.build_query() + search_kwargs = self.build_params(spelling_query, **kwargs) + + if kwargs: + search_kwargs.update(kwargs) + + results = self.backend.search(final_query, **search_kwargs) + self._results = results.get('results', []) + self._hit_count = results.get('hits', 0) + self._facet_counts = self.post_process_facets(results) + self._spelling_suggestion = results.get('spelling_suggestion', None) + + def run_mlt(self, **kwargs): + """Builds and executes the query. Returns a list of search results.""" + if self._more_like_this is False or self._mlt_instance is None: + raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.") + + additional_query_string = self.build_query() + search_kwargs = { + 'start_offset': self.start_offset, + 'result_class': self.result_class, + 'models': self.models + } + + if self.end_offset is not None: + search_kwargs['end_offset'] = self.end_offset - self.start_offset + + results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs) + self._results = results.get('results', []) + self._hit_count = results.get('hits', 0) + + +class Elasticsearch2SearchEngine(BaseEngine): + backend = Elasticsearch2SearchBackend + query = Elasticsearch2SearchQuery diff --git a/setup.py b/setup.py index c8b77e7c7..5c44d97d2 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ ] tests_require = [ - 'elasticsearch>=1.0.0,<2.0.0', + 'elasticsearch>=2.0.0,<3.0.0', 'pysolr>=3.3.2', 'whoosh==2.5.4', 'python-dateutil', diff --git a/test_haystack/elasticsearch2_tests/__init__.py b/test_haystack/elasticsearch2_tests/__init__.py new file mode 100644 index 000000000..8433081cd --- /dev/null +++ b/test_haystack/elasticsearch2_tests/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +import warnings + +from django.conf import settings + +from ..utils import unittest + +warnings.simplefilter('ignore', Warning) + + +def setup(): + try: + from elasticsearch import Elasticsearch, ElasticsearchException + except ImportError: + raise unittest.SkipTest("elasticsearch-py not installed.") + + url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] + es = Elasticsearch(url) + try: + es.info() + except ElasticsearchException as e: + raise unittest.SkipTest("elasticsearch not running on %r" % url, e) diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py new file mode 100644 index 000000000..0ba061eda --- /dev/null +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -0,0 +1,1500 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals + +import datetime +import logging as std_logging +import operator +from decimal import Decimal + +import elasticsearch +from django.conf import settings +from django.test import TestCase +from django.test.utils import override_settings + +from haystack import connections, indexes, reset_search_queries +from haystack.exceptions import SkipDocument +from haystack.inputs import AutoQuery +from haystack.models import SearchResult +from haystack.query import RelatedSearchQuerySet, SearchQuerySet, SQ +from haystack.utils import log as logging +from haystack.utils.geo import Point +from haystack.utils.loading import UnifiedIndex +from ..core.models import AFourthMockModel, AnotherMockModel, ASixthMockModel, MockModel +from ..mocks import MockSearchResult +from ..utils import unittest + +test_pickling = True + +try: + import cPickle as pickle +except ImportError: + try: + import pickle + except ImportError: + test_pickling = False + + +def clear_elasticsearch_index(): + # Wipe it clean. + raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + try: + raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + raw_es.indices.refresh() + except elasticsearch.TransportError: + pass + + # Since we've just completely deleted the index, we'll reset setup_complete so the next access will + # correctly define the mappings: + connections['elasticsearch2'].get_backend().setup_complete = False + + +class Elasticsearch2MockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + name = indexes.CharField(model_attr='author', faceted=True) + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return MockModel + + +class Elasticsearch2MockSearchIndexWithSkipDocument(Elasticsearch2MockSearchIndex): + def prepare_text(self, obj): + if obj.author == 'daniel3': + raise SkipDocument + return u"Indexed!\n%s" % obj.id + + +class Elasticsearch2MockSpellingIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True) + name = indexes.CharField(model_attr='author', faceted=True) + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return MockModel + + def prepare_text(self, obj): + return obj.foo + + +class Elasticsearch2MaintainTypeMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + month = indexes.CharField(indexed=False) + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def prepare_month(self, obj): + return "%02d" % obj.pub_date.month + + def get_model(self): + return MockModel + + +class Elasticsearch2MockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(model_attr='foo', document=True) + name = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return MockModel + + +class Elasticsearch2AnotherMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True) + name = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return AnotherMockModel + + def prepare_text(self, obj): + return u"You might be searching for the user %s" % obj.author + + +class Elasticsearch2BoostMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField( + document=True, use_template=True, + template_name='search/indexes/core/mockmodel_template.txt' + ) + author = indexes.CharField(model_attr='author', weight=2.0) + editor = indexes.CharField(model_attr='editor') + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return AFourthMockModel + + def prepare(self, obj): + data = super(Elasticsearch2BoostMockSearchIndex, self).prepare(obj) + + if obj.pk == 4: + data['boost'] = 5.0 + + return data + + +class Elasticsearch2FacetingMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True) + author = indexes.CharField(model_attr='author', faceted=True) + editor = indexes.CharField(model_attr='editor', faceted=True) + pub_date = indexes.DateField(model_attr='pub_date', faceted=True) + facet_field = indexes.FacetCharField(model_attr='author') + + def prepare_text(self, obj): + return '%s %s' % (obj.author, obj.editor) + + def get_model(self): + return AFourthMockModel + + +class Elasticsearch2RoundTripSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, default='') + name = indexes.CharField() + is_active = indexes.BooleanField() + post_count = indexes.IntegerField() + average_rating = indexes.FloatField() + price = indexes.DecimalField() + pub_date = indexes.DateField() + created = indexes.DateTimeField() + tags = indexes.MultiValueField() + sites = indexes.MultiValueField() + + def get_model(self): + return MockModel + + def prepare(self, obj): + prepped = super(Elasticsearch2RoundTripSearchIndex, self).prepare(obj) + prepped.update({ + 'text': 'This is some example text.', + 'name': 'Mister Pants', + 'is_active': True, + 'post_count': 25, + 'average_rating': 3.6, + 'price': Decimal('24.99'), + 'pub_date': datetime.date(2009, 11, 21), + 'created': datetime.datetime(2009, 11, 21, 21, 31, 00), + 'tags': ['staff', 'outdoor', 'activist', 'scientist'], + 'sites': [3, 5, 1], + }) + return prepped + + +class Elasticsearch2ComplexFacetsMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, default='') + name = indexes.CharField(faceted=True) + is_active = indexes.BooleanField(faceted=True) + post_count = indexes.IntegerField() + post_count_i = indexes.FacetIntegerField(facet_for='post_count') + average_rating = indexes.FloatField(faceted=True) + pub_date = indexes.DateField(faceted=True) + created = indexes.DateTimeField(faceted=True) + sites = indexes.MultiValueField(faceted=True) + + def get_model(self): + return MockModel + + +class Elasticsearch2AutocompleteMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(model_attr='foo', document=True) + name = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pub_date') + text_auto = indexes.EdgeNgramField(model_attr='foo') + name_auto = indexes.EdgeNgramField(model_attr='author') + + def get_model(self): + return MockModel + + +class Elasticsearch2SpatialSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(model_attr='name', document=True) + location = indexes.LocationField() + + def prepare_location(self, obj): + return "%s,%s" % (obj.lat, obj.lon) + + def get_model(self): + return ASixthMockModel + + +class TestSettings(TestCase): + def test_kwargs_are_passed_on(self): + from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend + backend = ElasticsearchSearchBackend('alias', **{ + 'URL': settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'], + 'INDEX_NAME': 'testing', + 'KWARGS': {'max_retries': 42} + }) + + self.assertEqual(backend.conn.transport.max_retries, 42) + + +class Elasticsearch2SearchBackendTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchBackendTestCase, self).setUp() + + # Wipe it clean. + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + self.smmidni = Elasticsearch2MockSearchIndexWithSkipDocument() + self.smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + + # Force the backend to rebuild the mapping each time. + self.sb.existing_mapping = {} + self.sb.setup() + + self.sample_objs = [] + + for i in range(1, 4): + mock = MockModel() + mock.id = i + mock.author = 'daniel%s' % i + mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) + self.sample_objs.append(mock) + + def tearDown(self): + connections['elasticsearch2']._index = self.old_ui + super(Elasticsearch2SearchBackendTestCase, self).tearDown() + self.sb.silently_fail = True + + def raw_search(self, query): + try: + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + except elasticsearch.TransportError: + return {} + + def test_non_silent(self): + bad_sb = connections['elasticsearch2'].backend('bad', URL='http://omg.wtf.bbq:1000/', INDEX_NAME='whatver', + SILENTLY_FAIL=False, TIMEOUT=1) + + try: + bad_sb.update(self.smmi, self.sample_objs) + self.fail() + except: + pass + + try: + bad_sb.remove('core.mockmodel.1') + self.fail() + except: + pass + + try: + bad_sb.clear() + self.fail() + except: + pass + + try: + bad_sb.search('foo') + self.fail() + except: + pass + + def test_update_no_documents(self): + url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] + index_name = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME'] + + sb = connections['elasticsearch2'].backend('elasticsearch2', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=True) + self.assertEqual(sb.update(self.smmi, []), None) + + sb = connections['elasticsearch2'].backend('elasticsearch2', URL=url, INDEX_NAME=index_name, + SILENTLY_FAIL=False) + try: + sb.update(self.smmi, []) + self.fail() + except: + pass + + def test_update(self): + self.sb.update(self.smmi, self.sample_objs) + + # Check what Elasticsearch thinks is there. + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + self.assertEqual( + sorted([res['_source'] for res in self.raw_search('*:*')['hits']['hits']], key=lambda x: x['id']), [ + { + 'django_id': '1', + 'django_ct': 'core.mockmodel', + 'name': 'daniel1', + 'name_exact': 'daniel1', + 'text': 'Indexed!\n1', + 'pub_date': '2009-02-24T00:00:00', + 'id': 'core.mockmodel.1' + }, + { + 'django_id': '2', + 'django_ct': 'core.mockmodel', + 'name': 'daniel2', + 'name_exact': 'daniel2', + 'text': 'Indexed!\n2', + 'pub_date': '2009-02-23T00:00:00', + 'id': 'core.mockmodel.2' + }, + { + 'django_id': '3', + 'django_ct': 'core.mockmodel', + 'name': 'daniel3', + 'name_exact': 'daniel3', + 'text': 'Indexed!\n3', + 'pub_date': '2009-02-22T00:00:00', + 'id': 'core.mockmodel.3' + } + ]) + + def test_update_with_SkipDocument_raised(self): + self.sb.update(self.smmidni, self.sample_objs) + + # Check what Elasticsearch thinks is there. + res = self.raw_search('*:*')['hits'] + self.assertEqual(res['total'], 2) + self.assertListEqual( + sorted([x['_source']['id'] for x in res['hits']]), + ['core.mockmodel.1', 'core.mockmodel.2'] + ) + + def test_remove(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + + self.sb.remove(self.sample_objs[0]) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 2) + self.assertEqual(sorted([res['_source'] for res in self.raw_search('*:*')['hits']['hits']], + key=operator.itemgetter('django_id')), [ + { + 'django_id': '2', + 'django_ct': 'core.mockmodel', + 'name': 'daniel2', + 'name_exact': 'daniel2', + 'text': 'Indexed!\n2', + 'pub_date': '2009-02-23T00:00:00', + 'id': 'core.mockmodel.2' + }, + { + 'django_id': '3', + 'django_ct': 'core.mockmodel', + 'name': 'daniel3', + 'name_exact': 'daniel3', + 'text': 'Indexed!\n3', + 'pub_date': '2009-02-22T00:00:00', + 'id': 'core.mockmodel.3' + } + ]) + + def test_remove_succeeds_on_404(self): + self.sb.silently_fail = False + self.sb.remove('core.mockmodel.421') + + def test_clear(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear() + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) + + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear([AnotherMockModel]) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear([MockModel]) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) + + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear([AnotherMockModel, MockModel]) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) + + def test_search(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + + self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('*:*')['hits'], 3) + self.assertEqual(set([result.pk for result in self.sb.search('*:*')['results']]), set([u'2', u'1', u'3'])) + + self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('Index', highlight=True)['hits'], 3) + self.assertEqual( + sorted([result.highlighted[0] for result in self.sb.search('Index', highlight=True)['results']]), + [u'Indexed!\n1', u'Indexed!\n2', u'Indexed!\n3']) + + self.assertEqual(self.sb.search('Indx')['hits'], 0) + self.assertEqual(self.sb.search('indaxed')['spelling_suggestion'], 'indexed') + self.assertEqual(self.sb.search('arf', spelling_query='indexyd')['spelling_suggestion'], 'indexed') + + self.assertEqual(self.sb.search('', facets={'name': {}}), {'hits': 0, 'results': []}) + results = self.sb.search('Index', facets={'name': {}}) + self.assertEqual(results['hits'], 3) + self.assertSetEqual( + set(results['facets']['fields']['name']), + set([('daniel3', 1), ('daniel2', 1), ('daniel1', 1)]) + ) + + self.assertEqual(self.sb.search('', date_facets={ + 'pub_date': {'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 4, 1), + 'gap_by': 'month', 'gap_amount': 1}}), {'hits': 0, 'results': []}) + results = self.sb.search('Index', date_facets={ + 'pub_date': {'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 4, 1), + 'gap_by': 'month', 'gap_amount': 1}}) + self.assertEqual(results['hits'], 3) + self.assertEqual(results['facets']['dates']['pub_date'], [(datetime.datetime(2009, 2, 1, 0, 0), 3)]) + + self.assertEqual(self.sb.search('', query_facets=[('name', '[* TO e]')]), {'hits': 0, 'results': []}) + results = self.sb.search('Index', query_facets=[('name', '[* TO e]')]) + self.assertEqual(results['hits'], 3) + self.assertEqual(results['facets']['queries'], {u'name': 3}) + + self.assertEqual(self.sb.search('', narrow_queries=set(['name:daniel1'])), {'hits': 0, 'results': []}) + results = self.sb.search('Index', narrow_queries=set(['name:daniel1'])) + self.assertEqual(results['hits'], 1) + + # Ensure that swapping the ``result_class`` works. + self.assertTrue( + isinstance(self.sb.search(u'index', result_class=MockSearchResult)['results'][0], MockSearchResult)) + + # Check the use of ``limit_to_registered_models``. + self.assertEqual(self.sb.search('', limit_to_registered_models=False), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('*:*', limit_to_registered_models=False)['hits'], 3) + self.assertEqual( + sorted([result.pk for result in self.sb.search('*:*', limit_to_registered_models=False)['results']]), + ['1', '2', '3']) + + # Stow. + old_limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = False + + self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('*:*')['hits'], 3) + self.assertEqual(sorted([result.pk for result in self.sb.search('*:*')['results']]), ['1', '2', '3']) + + # Restore. + settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = old_limit_to_registered_models + + def test_spatial_search_parameters(self): + p1 = Point(1.23, 4.56) + kwargs = self.sb.build_search_kwargs('*:*', distance_point={'field': 'location', 'point': p1}, + sort_by=(('distance', 'desc'),)) + + self.assertIn('sort', kwargs) + self.assertEqual(1, len(kwargs['sort'])) + geo_d = kwargs['sort'][0]['_geo_distance'] + + # ElasticSearch supports the GeoJSON-style lng, lat pairs so unlike Solr the values should be + # in the same order as we used to create the Point(): + # http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-geo-distance-filter.html#_lat_lon_as_array_4 + + self.assertDictEqual(geo_d, {'location': [1.23, 4.56], 'unit': 'km', 'order': 'desc'}) + + def test_more_like_this(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + + # A functional MLT example with enough data to work is below. Rely on + # this to ensure the API is correct enough. + self.assertEqual(self.sb.more_like_this(self.sample_objs[0])['hits'], 0) + self.assertEqual([result.pk for result in self.sb.more_like_this(self.sample_objs[0])['results']], []) + + def test_build_schema(self): + old_ui = connections['elasticsearch2'].get_unified_index() + + (content_field_name, mapping) = self.sb.build_schema(old_ui.all_searchfields()) + self.assertEqual(content_field_name, 'text') + self.assertEqual(len(mapping), 4 + 2) # +2 management fields + self.assertEqual(mapping, { + 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'text': {'type': 'string', 'analyzer': 'snowball'}, + 'pub_date': {'type': 'date'}, + 'name': {'type': 'string', 'analyzer': 'snowball'}, + 'name_exact': {'index': 'not_analyzed', 'type': 'string'} + }) + + ui = UnifiedIndex() + ui.build(indexes=[Elasticsearch2ComplexFacetsMockSearchIndex()]) + (content_field_name, mapping) = self.sb.build_schema(ui.all_searchfields()) + self.assertEqual(content_field_name, 'text') + self.assertEqual(len(mapping), 15 + 2) # +2 management fields + self.assertEqual(mapping, { + 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'name': {'type': 'string', 'analyzer': 'snowball'}, + 'is_active_exact': {'type': 'boolean'}, + 'created': {'type': 'date'}, + 'post_count': {'type': 'long'}, + 'created_exact': {'type': 'date'}, + 'sites_exact': {'index': 'not_analyzed', 'type': 'string'}, + 'is_active': {'type': 'boolean'}, + 'sites': {'type': 'string', 'analyzer': 'snowball'}, + 'post_count_i': {'type': 'long'}, + 'average_rating': {'type': 'float'}, + 'text': {'type': 'string', 'analyzer': 'snowball'}, + 'pub_date_exact': {'type': 'date'}, + 'name_exact': {'index': 'not_analyzed', 'type': 'string'}, + 'pub_date': {'type': 'date'}, + 'average_rating_exact': {'type': 'float'} + }) + + def test_verify_type(self): + old_ui = connections['elasticsearch2'].get_unified_index() + ui = UnifiedIndex() + smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() + ui.build(indexes=[smtmmi]) + connections['elasticsearch2']._index = ui + sb = connections['elasticsearch2'].get_backend() + sb.update(smtmmi, self.sample_objs) + + self.assertEqual(sb.search('*:*')['hits'], 3) + self.assertEqual([result.month for result in sb.search('*:*')['results']], [u'02', u'02', u'02']) + connections['elasticsearch2']._index = old_ui + + +class CaptureHandler(std_logging.Handler): + logs_seen = [] + + def emit(self, record): + CaptureHandler.logs_seen.append(record) + + +class FailedElasticsearch2SearchBackendTestCase(TestCase): + def setUp(self): + self.sample_objs = [] + + for i in range(1, 4): + mock = MockModel() + mock.id = i + mock.author = 'daniel%s' % i + mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) + self.sample_objs.append(mock) + + # Stow. + # Point the backend at a URL that doesn't exist so we can watch the + # sparks fly. + self.old_es_url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] + settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] = "%s/foo/" % self.old_es_url + self.cap = CaptureHandler() + logging.getLogger('haystack').addHandler(self.cap) + import haystack + logging.getLogger('haystack').removeHandler(haystack.stream) + + # Setup the rest of the bits. + self.old_ui = connections['elasticsearch2'].get_unified_index() + ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = ui + self.sb = connections['elasticsearch2'].get_backend() + + def tearDown(self): + import haystack + # Restore. + settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] = self.old_es_url + connections['elasticsearch2']._index = self.old_ui + logging.getLogger('haystack').removeHandler(self.cap) + logging.getLogger('haystack').addHandler(haystack.stream) + + @unittest.expectedFailure + def test_all_cases(self): + # Prior to the addition of the try/except bits, these would all fail miserably. + self.assertEqual(len(CaptureHandler.logs_seen), 0) + + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(len(CaptureHandler.logs_seen), 1) + + self.sb.remove(self.sample_objs[0]) + self.assertEqual(len(CaptureHandler.logs_seen), 2) + + self.sb.search('search') + self.assertEqual(len(CaptureHandler.logs_seen), 3) + + self.sb.more_like_this(self.sample_objs[0]) + self.assertEqual(len(CaptureHandler.logs_seen), 4) + + self.sb.clear([MockModel]) + self.assertEqual(len(CaptureHandler.logs_seen), 5) + + self.sb.clear() + self.assertEqual(len(CaptureHandler.logs_seen), 6) + + +class LiveElasticsearch2SearchQueryTestCase(TestCase): + fixtures = ['initial_data.json'] + + def setUp(self): + super(LiveElasticsearch2SearchQueryTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + self.sq = connections['elasticsearch2'].get_query() + + # Force indexing of the content. + self.smmi.update(using='elasticsearch2') + + def tearDown(self): + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2SearchQueryTestCase, self).tearDown() + + def test_log_query(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + + with self.settings(DEBUG=False): + len(self.sq.get_results()) + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + + with self.settings(DEBUG=True): + # Redefine it to clear out the cached results. + self.sq = connections['elasticsearch2'].query(using='elasticsearch2') + self.sq.add_filter(SQ(name='bar')) + len(self.sq.get_results()) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(connections['elasticsearch2'].queries[0]['query_string'], + 'name:(bar)') + + # And again, for good measure. + self.sq = connections['elasticsearch2'].query('elasticsearch2') + self.sq.add_filter(SQ(name='bar')) + self.sq.add_filter(SQ(text='moof')) + len(self.sq.get_results()) + self.assertEqual(len(connections['elasticsearch2'].queries), 2) + self.assertEqual(connections['elasticsearch2'].queries[0]['query_string'], + 'name:(bar)') + self.assertEqual(connections['elasticsearch2'].queries[1]['query_string'], + u'(name:(bar) AND text:(moof))') + + +lssqstc_all_loaded = None + + +@override_settings(DEBUG=True) +class LiveElasticsearch2SearchQuerySetTestCase(TestCase): + """Used to test actual implementation details of the SearchQuerySet.""" + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2SearchQuerySetTestCase, self).setUp() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + self.rsqs = RelatedSearchQuerySet('elasticsearch2') + + # Ugly but not constantly reindexing saves us almost 50% runtime. + global lssqstc_all_loaded + + if lssqstc_all_loaded is None: + lssqstc_all_loaded = True + + # Wipe it clean. + clear_elasticsearch_index() + + # Force indexing of the content. + self.smmi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2SearchQuerySetTestCase, self).tearDown() + + def test_load_all(self): + sqs = self.sqs.order_by('pub_date').load_all() + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertTrue(len(sqs) > 0) + self.assertEqual(sqs[2].object.foo, + u'In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.') + + def test_iter(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + sqs = self.sqs.all() + results = sorted([int(result.pk) for result in sqs]) + self.assertEqual(results, list(range(1, 24))) + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + def test_slice(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.sqs.all().order_by('pub_date') + self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.sqs.all().order_by('pub_date') + self.assertEqual(int(results[21].pk), 22) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + + def test_values_slicing(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + + # TODO: this would be a good candidate for refactoring into a TestCase subclass shared across backends + + # The values will come back as strings because Hasytack doesn't assume PKs are integers. + # We'll prepare this set once since we're going to query the same results in multiple ways: + expected_pks = [str(i) for i in [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]] + + results = self.sqs.all().order_by('pub_date').values('pk') + self.assertListEqual([i['pk'] for i in results[1:11]], expected_pks) + + results = self.sqs.all().order_by('pub_date').values_list('pk') + self.assertListEqual([i[0] for i in results[1:11]], expected_pks) + + results = self.sqs.all().order_by('pub_date').values_list('pk', flat=True) + self.assertListEqual(results[1:11], expected_pks) + + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + def test_count(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + sqs = self.sqs.all() + self.assertEqual(sqs.count(), 23) + self.assertEqual(sqs.count(), 23) + self.assertEqual(len(sqs), 23) + self.assertEqual(sqs.count(), 23) + # Should only execute one query to count the length of the result set. + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + + def test_manual_iter(self): + results = self.sqs.all() + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = set([int(result.pk) for result in results._manual_iter()]) + self.assertEqual(results, + set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + def test_fill_cache(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.sqs.all() + self.assertEqual(len(results._result_cache), 0) + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results._fill_cache(0, 10) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + results._fill_cache(10, 20) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) + self.assertEqual(len(connections['elasticsearch2'].queries), 2) + + def test_cache_is_full(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(self.sqs._cache_is_full(), False) + results = self.sqs.all() + fire_the_iterator_and_fill_cache = [result for result in results] + self.assertEqual(results._cache_is_full(), True) + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + def test___and__(self): + sqs1 = self.sqs.filter(content='foo') + sqs2 = self.sqs.filter(content='bar') + sqs = sqs1 & sqs2 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 2) + self.assertEqual(sqs.query.build_query(), u'((foo) AND (bar))') + + # Now for something more complex... + sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) + sqs4 = self.sqs.filter(content='bar') + sqs = sqs3 & sqs4 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 3) + self.assertEqual(sqs.query.build_query(), u'(NOT (title:(moof)) AND ((foo) OR (baz)) AND (bar))') + + def test___or__(self): + sqs1 = self.sqs.filter(content='foo') + sqs2 = self.sqs.filter(content='bar') + sqs = sqs1 | sqs2 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 2) + self.assertEqual(sqs.query.build_query(), u'((foo) OR (bar))') + + # Now for something more complex... + sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) + sqs4 = self.sqs.filter(content='bar').models(MockModel) + sqs = sqs3 | sqs4 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 2) + self.assertEqual(sqs.query.build_query(), u'((NOT (title:(moof)) AND ((foo) OR (baz))) OR (bar))') + + def test_auto_query(self): + # Ensure bits in exact matches get escaped properly as well. + # This will break horrifically if escaping isn't working. + sqs = self.sqs.auto_query('"pants:rule"') + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(repr(sqs.query.query_filter), '') + self.assertEqual(sqs.query.build_query(), u'("pants\\:rule")') + self.assertEqual(len(sqs), 0) + + # Regressions + + def test_regression_proper_start_offsets(self): + sqs = self.sqs.filter(text='index') + self.assertNotEqual(sqs.count(), 0) + + id_counts = {} + + for item in sqs: + if item.id in id_counts: + id_counts[item.id] += 1 + else: + id_counts[item.id] = 1 + + for key, value in id_counts.items(): + if value > 1: + self.fail("Result with id '%s' seen more than once in the results." % key) + + def test_regression_raw_search_breaks_slicing(self): + sqs = self.sqs.raw_search('text:index') + page_1 = [result.pk for result in sqs[0:10]] + page_2 = [result.pk for result in sqs[10:20]] + + for pk in page_2: + if pk in page_1: + self.fail("Result with id '%s' seen more than once in the results." % pk) + + # RelatedSearchQuerySet Tests + + def test_related_load_all(self): + sqs = self.rsqs.order_by('pub_date').load_all() + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertTrue(len(sqs) > 0) + self.assertEqual(sqs[2].object.foo, + u'In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.') + + def test_related_load_all_queryset(self): + sqs = self.rsqs.load_all().order_by('pub_date') + self.assertEqual(len(sqs._load_all_querysets), 0) + + sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=1)) + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs._load_all_querysets), 1) + self.assertEqual(sorted([obj.object.id for obj in sqs]), list(range(2, 24))) + + sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=10)) + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs._load_all_querysets), 1) + self.assertEqual(set([obj.object.id for obj in sqs]), set([12, 17, 11, 16, 23, 15, 22, 14, 19, 21, 13, 18, 20])) + self.assertEqual(set([obj.object.id for obj in sqs[10:20]]), set([21, 22, 23])) + + def test_related_iter(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + sqs = self.rsqs.all() + results = set([int(result.pk) for result in sqs]) + self.assertEqual(results, + set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) + self.assertEqual(len(connections['elasticsearch2'].queries), 4) + + def test_related_slice(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.rsqs.all().order_by('pub_date') + self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.rsqs.all().order_by('pub_date') + self.assertEqual(int(results[21].pk), 22) + self.assertEqual(len(connections['elasticsearch2'].queries), 4) + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.rsqs.all().order_by('pub_date') + self.assertEqual(set([int(result.pk) for result in results[20:30]]), set([21, 22, 23])) + self.assertEqual(len(connections['elasticsearch2'].queries), 4) + + def test_related_manual_iter(self): + results = self.rsqs.all() + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = sorted([int(result.pk) for result in results._manual_iter()]) + self.assertEqual(results, list(range(1, 24))) + self.assertEqual(len(connections['elasticsearch2'].queries), 4) + + def test_related_fill_cache(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.rsqs.all() + self.assertEqual(len(results._result_cache), 0) + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results._fill_cache(0, 10) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + results._fill_cache(10, 20) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) + self.assertEqual(len(connections['elasticsearch2'].queries), 2) + + def test_related_cache_is_full(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(self.rsqs._cache_is_full(), False) + results = self.rsqs.all() + fire_the_iterator_and_fill_cache = [result for result in results] + self.assertEqual(results._cache_is_full(), True) + self.assertEqual(len(connections['elasticsearch2'].queries), 5) + + def test_quotes_regression(self): + sqs = self.sqs.auto_query(u"44°48'40''N 20°28'32''E") + # Should not have empty terms. + self.assertEqual(sqs.query.build_query(), u"(44\xb048'40''N 20\xb028'32''E)") + # Should not cause Elasticsearch to 500. + self.assertEqual(sqs.count(), 0) + + sqs = self.sqs.auto_query('blazing') + self.assertEqual(sqs.query.build_query(), u'(blazing)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('blazing saddles') + self.assertEqual(sqs.query.build_query(), u'(blazing saddles)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles') + self.assertEqual(sqs.query.build_query(), u'(\\"blazing saddles)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles"') + self.assertEqual(sqs.query.build_query(), u'("blazing saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing saddles"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'saddles"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'\'saddles"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \')') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \'\\")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles" mel') + self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles" mel brooks') + self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel brooks)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing saddles" brooks') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" brooks)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing saddles" "brooks') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" \\"brooks)') + self.assertEqual(sqs.count(), 0) + + def test_query_generation(self): + sqs = self.sqs.filter(SQ(content=AutoQuery("hello world")) | SQ(title=AutoQuery("hello world"))) + self.assertEqual(sqs.query.build_query(), u"((hello world) OR title:(hello world))") + + def test_result_class(self): + # Assert that we're defaulting to ``SearchResult``. + sqs = self.sqs.all() + self.assertTrue(isinstance(sqs[0], SearchResult)) + + # Custom class. + sqs = self.sqs.result_class(MockSearchResult).all() + self.assertTrue(isinstance(sqs[0], MockSearchResult)) + + # Reset to default. + sqs = self.sqs.result_class(None).all() + self.assertTrue(isinstance(sqs[0], SearchResult)) + + +@override_settings(DEBUG=True) +class LiveElasticsearch2SpellingTestCase(TestCase): + """Used to test actual implementation details of the SearchQuerySet.""" + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2SpellingTestCase, self).setUp() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSpellingIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + + # Wipe it clean. + clear_elasticsearch_index() + + # Reboot the schema. + self.sb = connections['elasticsearch2'].get_backend() + self.sb.setup() + + self.smmi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2SpellingTestCase, self).tearDown() + + def test_spelling(self): + self.assertEqual(self.sqs.auto_query('structurd').spelling_suggestion(), 'structured') + self.assertEqual(self.sqs.spelling_suggestion('structurd'), 'structured') + self.assertEqual(self.sqs.auto_query('srchindex instanc').spelling_suggestion(), 'searchindex instance') + self.assertEqual(self.sqs.spelling_suggestion('srchindex instanc'), 'searchindex instance') + + +class LiveElasticsearch2MoreLikeThisTestCase(TestCase): + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2MoreLikeThisTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockModelSearchIndex() + self.sammi = Elasticsearch2AnotherMockModelSearchIndex() + self.ui.build(indexes=[self.smmi, self.sammi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + + self.smmi.update(using='elasticsearch2') + self.sammi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2MoreLikeThisTestCase, self).tearDown() + + def test_more_like_this(self): + mlt = self.sqs.more_like_this(MockModel.objects.get(pk=1)) + results = [result.pk for result in mlt] + self.assertEqual(mlt.count(), 11) + self.assertEqual(set(results), set([u'10', u'5', u'2', u'21', u'4', u'6', u'23', u'9', u'14'])) + self.assertEqual(len(results), 10) + + alt_mlt = self.sqs.filter(name='daniel3').more_like_this(MockModel.objects.get(pk=2)) + results = [result.pk for result in alt_mlt] + self.assertEqual(alt_mlt.count(), 9) + self.assertEqual(set(results), set([u'2', u'16', u'3', u'19', u'4', u'17', u'10', u'22', u'23'])) + self.assertEqual(len(results), 9) + + alt_mlt_with_models = self.sqs.models(MockModel).more_like_this(MockModel.objects.get(pk=1)) + results = [result.pk for result in alt_mlt_with_models] + self.assertEqual(alt_mlt_with_models.count(), 10) + self.assertEqual(set(results), set([u'10', u'5', u'21', u'2', u'4', u'6', u'23', u'9', u'14', u'16'])) + self.assertEqual(len(results), 10) + + if hasattr(MockModel.objects, 'defer'): + # Make sure MLT works with deferred bits. + mi = MockModel.objects.defer('foo').get(pk=1) + self.assertEqual(mi._deferred, True) + deferred = self.sqs.models(MockModel).more_like_this(mi) + self.assertEqual(deferred.count(), 0) + self.assertEqual([result.pk for result in deferred], []) + self.assertEqual(len([result.pk for result in deferred]), 0) + + # Ensure that swapping the ``result_class`` works. + self.assertTrue( + isinstance(self.sqs.result_class(MockSearchResult).more_like_this(MockModel.objects.get(pk=1))[0], + MockSearchResult)) + + +class LiveElasticsearch2AutocompleteTestCase(TestCase): + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2AutocompleteTestCase, self).setUp() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2AutocompleteMockModelSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + + # Wipe it clean. + clear_elasticsearch_index() + + # Reboot the schema. + self.sb = connections['elasticsearch2'].get_backend() + self.sb.setup() + + self.smmi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2AutocompleteTestCase, self).tearDown() + + def test_build_schema(self): + self.sb = connections['elasticsearch2'].get_backend() + content_name, mapping = self.sb.build_schema(self.ui.all_searchfields()) + self.assertEqual(mapping, { + 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'name_auto': { + 'type': 'string', + 'analyzer': 'edgengram_analyzer', + }, + 'text': { + 'type': 'string', + 'analyzer': 'snowball', + }, + 'pub_date': { + 'type': 'date' + }, + 'name': { + 'type': 'string', + 'analyzer': 'snowball', + }, + 'text_auto': { + 'type': 'string', + 'analyzer': 'edgengram_analyzer', + } + }) + + def test_autocomplete(self): + autocomplete = self.sqs.autocomplete(text_auto='mod') + self.assertEqual(autocomplete.count(), 16) + self.assertEqual(set([result.pk for result in autocomplete]), set( + ['1', '12', '6', '14', '7', '4', '23', '17', '13', '18', '20', '22', '19', '15', '10', '2'])) + self.assertTrue('mod' in autocomplete[0].text.lower()) + self.assertTrue('mod' in autocomplete[1].text.lower()) + self.assertTrue('mod' in autocomplete[2].text.lower()) + self.assertTrue('mod' in autocomplete[3].text.lower()) + self.assertTrue('mod' in autocomplete[4].text.lower()) + self.assertEqual(len([result.pk for result in autocomplete]), 16) + + # Test multiple words. + autocomplete_2 = self.sqs.autocomplete(text_auto='your mod') + self.assertEqual(autocomplete_2.count(), 13) + self.assertEqual(set([result.pk for result in autocomplete_2]), + set(['1', '6', '2', '14', '12', '13', '10', '19', '4', '20', '23', '22', '15'])) + map_results = {result.pk: result for result in autocomplete_2} + self.assertTrue('your' in map_results['1'].text.lower()) + self.assertTrue('mod' in map_results['1'].text.lower()) + self.assertTrue('your' in map_results['6'].text.lower()) + self.assertTrue('mod' in map_results['6'].text.lower()) + self.assertTrue('your' in map_results['2'].text.lower()) + self.assertEqual(len([result.pk for result in autocomplete_2]), 13) + + # Test multiple fields. + autocomplete_3 = self.sqs.autocomplete(text_auto='Django', name_auto='dan') + self.assertEqual(autocomplete_3.count(), 4) + self.assertEqual(set([result.pk for result in autocomplete_3]), set(['12', '1', '22', '14'])) + self.assertEqual(len([result.pk for result in autocomplete_3]), 4) + + # Test numbers in phrases + autocomplete_4 = self.sqs.autocomplete(text_auto='Jen 867') + self.assertEqual(autocomplete_4.count(), 1) + self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) + + # Test numbers alone + autocomplete_4 = self.sqs.autocomplete(text_auto='867') + self.assertEqual(autocomplete_4.count(), 1) + self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) + + +class LiveElasticsearch2RoundTripTestCase(TestCase): + def setUp(self): + super(LiveElasticsearch2RoundTripTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.srtsi = Elasticsearch2RoundTripSearchIndex() + self.ui.build(indexes=[self.srtsi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + + self.sqs = SearchQuerySet('elasticsearch2') + + # Fake indexing. + mock = MockModel() + mock.id = 1 + self.sb.update(self.srtsi, [mock]) + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2RoundTripTestCase, self).tearDown() + + def test_round_trip(self): + results = self.sqs.filter(id='core.mockmodel.1') + + # Sanity check. + self.assertEqual(results.count(), 1) + + # Check the individual fields. + result = results[0] + self.assertEqual(result.id, 'core.mockmodel.1') + self.assertEqual(result.text, 'This is some example text.') + self.assertEqual(result.name, 'Mister Pants') + self.assertEqual(result.is_active, True) + self.assertEqual(result.post_count, 25) + self.assertEqual(result.average_rating, 3.6) + self.assertEqual(result.price, u'24.99') + self.assertEqual(result.pub_date, datetime.date(2009, 11, 21)) + self.assertEqual(result.created, datetime.datetime(2009, 11, 21, 21, 31, 00)) + self.assertEqual(result.tags, ['staff', 'outdoor', 'activist', 'scientist']) + self.assertEqual(result.sites, [3, 5, 1]) + + +@unittest.skipUnless(test_pickling, 'Skipping pickling tests') +class LiveElasticsearch2PickleTestCase(TestCase): + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2PickleTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockModelSearchIndex() + self.sammi = Elasticsearch2AnotherMockModelSearchIndex() + self.ui.build(indexes=[self.smmi, self.sammi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + + self.smmi.update(using='elasticsearch2') + self.sammi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2PickleTestCase, self).tearDown() + + def test_pickling(self): + results = self.sqs.all() + + for res in results: + # Make sure the cache is full. + pass + + in_a_pickle = pickle.dumps(results) + like_a_cuke = pickle.loads(in_a_pickle) + self.assertEqual(len(like_a_cuke), len(results)) + self.assertEqual(like_a_cuke[0].id, results[0].id) + + +class Elasticsearch2BoostBackendTestCase(TestCase): + def setUp(self): + super(Elasticsearch2BoostBackendTestCase, self).setUp() + + # Wipe it clean. + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2BoostMockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + + self.sample_objs = [] + + for i in range(1, 5): + mock = AFourthMockModel() + mock.id = i + + if i % 2: + mock.author = 'daniel' + mock.editor = 'david' + else: + mock.author = 'david' + mock.editor = 'daniel' + + mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) + self.sample_objs.append(mock) + + def tearDown(self): + connections['elasticsearch2']._index = self.old_ui + super(Elasticsearch2BoostBackendTestCase, self).tearDown() + + def raw_search(self, query): + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + + def test_boost(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 4) + + results = SearchQuerySet(using='elasticsearch2').filter(SQ(author='daniel') | SQ(editor='daniel')) + + self.assertEqual(set([result.id for result in results]), set([ + 'core.afourthmockmodel.4', + 'core.afourthmockmodel.3', + 'core.afourthmockmodel.1', + 'core.afourthmockmodel.2' + ])) + + def test__to_python(self): + self.assertEqual(self.sb._to_python('abc'), 'abc') + self.assertEqual(self.sb._to_python('1'), 1) + self.assertEqual(self.sb._to_python('2653'), 2653) + self.assertEqual(self.sb._to_python('25.5'), 25.5) + self.assertEqual(self.sb._to_python('[1, 2, 3]'), [1, 2, 3]) + self.assertEqual(self.sb._to_python('{"a": 1, "b": 2, "c": 3}'), {'a': 1, 'c': 3, 'b': 2}) + self.assertEqual(self.sb._to_python('2009-05-09T16:14:00'), datetime.datetime(2009, 5, 9, 16, 14)) + self.assertEqual(self.sb._to_python('2009-05-09T00:00:00'), datetime.datetime(2009, 5, 9, 0, 0)) + self.assertEqual(self.sb._to_python(None), None) + + +class RecreateIndexTestCase(TestCase): + def setUp(self): + self.raw_es = elasticsearch.Elasticsearch( + settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + + def test_recreate_index(self): + clear_elasticsearch_index() + + sb = connections['elasticsearch2'].get_backend() + sb.silently_fail = True + sb.setup() + + original_mapping = self.raw_es.indices.get_mapping(index=sb.index_name) + + sb.clear() + sb.setup() + + try: + updated_mapping = self.raw_es.indices.get_mapping(sb.index_name) + except elasticsearch.NotFoundError: + self.fail("There is no mapping after recreating the index") + + self.assertEqual(original_mapping, updated_mapping, + "Mapping after recreating the index differs from the original one") + + +class Elasticsearch2FacetingTestCase(TestCase): + def setUp(self): + super(Elasticsearch2FacetingTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2FacetingMockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + + # Force the backend to rebuild the mapping each time. + self.sb.existing_mapping = {} + self.sb.setup() + + self.sample_objs = [] + + for i in range(1, 10): + mock = AFourthMockModel() + mock.id = i + if i > 5: + mock.editor = 'George Taylor' + else: + mock.editor = 'Perry White' + if i % 2: + mock.author = 'Daniel Lindsley' + else: + mock.author = 'Dan Watson' + mock.pub_date = datetime.date(2013, 9, (i % 4) + 1) + self.sample_objs.append(mock) + + def tearDown(self): + connections['elasticsearch2']._index = self.old_ui + super(Elasticsearch2FacetingTestCase, self).tearDown() + + def test_facet(self): + self.sb.update(self.smmi, self.sample_objs) + counts = SearchQuerySet('elasticsearch2').facet('author').facet('editor').facet_counts() + self.assertEqual(counts['fields']['author'], [ + ('Daniel Lindsley', 5), + ('Dan Watson', 4), + ]) + self.assertEqual(counts['fields']['editor'], [ + ('Perry White', 5), + ('George Taylor', 4), + ]) + counts = SearchQuerySet('elasticsearch2').filter(content='white').facet('facet_field', + order='reverse_count').facet_counts() + self.assertEqual(counts['fields']['facet_field'], [ + ('Dan Watson', 2), + ('Daniel Lindsley', 3), + ]) + + def test_multiple_narrow(self): + self.sb.update(self.smmi, self.sample_objs) + counts = SearchQuerySet('elasticsearch2').narrow('editor_exact:"Perry White"').narrow( + 'author_exact:"Daniel Lindsley"').facet('author').facet_counts() + self.assertEqual(counts['fields']['author'], [ + ('Daniel Lindsley', 3), + ]) + + def test_narrow(self): + self.sb.update(self.smmi, self.sample_objs) + counts = SearchQuerySet('elasticsearch2').facet('author').facet('editor').narrow( + 'editor_exact:"Perry White"').facet_counts() + self.assertEqual(counts['fields']['author'], [ + ('Daniel Lindsley', 3), + ('Dan Watson', 2), + ]) + self.assertEqual(counts['fields']['editor'], [ + ('Perry White', 5), + ]) + + def test_date_facet(self): + self.sb.update(self.smmi, self.sample_objs) + start = datetime.date(2013, 9, 1) + end = datetime.date(2013, 9, 30) + # Facet by day + counts = SearchQuerySet('elasticsearch2').date_facet('pub_date', start_date=start, end_date=end, + gap_by='day').facet_counts() + self.assertEqual(counts['dates']['pub_date'], [ + (datetime.datetime(2013, 9, 1), 2), + (datetime.datetime(2013, 9, 2), 3), + (datetime.datetime(2013, 9, 3), 2), + (datetime.datetime(2013, 9, 4), 2), + ]) + # By month + counts = SearchQuerySet('elasticsearch2').date_facet('pub_date', start_date=start, end_date=end, + gap_by='month').facet_counts() + self.assertEqual(counts['dates']['pub_date'], [ + (datetime.datetime(2013, 9, 1), 9), + ]) diff --git a/test_haystack/elasticsearch2_tests/test_inputs.py b/test_haystack/elasticsearch2_tests/test_inputs.py new file mode 100644 index 000000000..777334fb2 --- /dev/null +++ b/test_haystack/elasticsearch2_tests/test_inputs.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import, division, print_function, unicode_literals + +from django.test import TestCase + +from haystack import connections, inputs + + +class Elasticsearch2InputTestCase(TestCase): + def setUp(self): + super(Elasticsearch2InputTestCase, self).setUp() + self.query_obj = connections['elasticsearch2'].get_query() + + def test_raw_init(self): + raw = inputs.Raw('hello OR there, :you') + self.assertEqual(raw.query_string, 'hello OR there, :you') + self.assertEqual(raw.kwargs, {}) + self.assertEqual(raw.post_process, False) + + raw = inputs.Raw('hello OR there, :you', test='really') + self.assertEqual(raw.query_string, 'hello OR there, :you') + self.assertEqual(raw.kwargs, {'test': 'really'}) + self.assertEqual(raw.post_process, False) + + def test_raw_prepare(self): + raw = inputs.Raw('hello OR there, :you') + self.assertEqual(raw.prepare(self.query_obj), 'hello OR there, :you') + + def test_clean_init(self): + clean = inputs.Clean('hello OR there, :you') + self.assertEqual(clean.query_string, 'hello OR there, :you') + self.assertEqual(clean.post_process, True) + + def test_clean_prepare(self): + clean = inputs.Clean('hello OR there, :you') + self.assertEqual(clean.prepare(self.query_obj), 'hello or there, \\:you') + + def test_exact_init(self): + exact = inputs.Exact('hello OR there, :you') + self.assertEqual(exact.query_string, 'hello OR there, :you') + self.assertEqual(exact.post_process, True) + + def test_exact_prepare(self): + exact = inputs.Exact('hello OR there, :you') + self.assertEqual(exact.prepare(self.query_obj), u'"hello OR there, :you"') + + exact = inputs.Exact('hello OR there, :you', clean=True) + self.assertEqual(exact.prepare(self.query_obj), u'"hello or there, \\:you"') + + def test_not_init(self): + not_it = inputs.Not('hello OR there, :you') + self.assertEqual(not_it.query_string, 'hello OR there, :you') + self.assertEqual(not_it.post_process, True) + + def test_not_prepare(self): + not_it = inputs.Not('hello OR there, :you') + self.assertEqual(not_it.prepare(self.query_obj), u'NOT (hello or there, \\:you)') + + def test_autoquery_init(self): + autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') + self.assertEqual(autoquery.query_string, 'panic -don\'t "froody dude"') + self.assertEqual(autoquery.post_process, False) + + def test_autoquery_prepare(self): + autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') + self.assertEqual(autoquery.prepare(self.query_obj), u'panic NOT don\'t "froody dude"') + + def test_altparser_init(self): + altparser = inputs.AltParser('dismax') + self.assertEqual(altparser.parser_name, 'dismax') + self.assertEqual(altparser.query_string, '') + self.assertEqual(altparser.kwargs, {}) + self.assertEqual(altparser.post_process, False) + + altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) + self.assertEqual(altparser.parser_name, 'dismax') + self.assertEqual(altparser.query_string, 'douglas adams') + self.assertEqual(altparser.kwargs, {'mm': 1, 'qf': 'author'}) + self.assertEqual(altparser.post_process, False) + + def test_altparser_prepare(self): + altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) + self.assertEqual(altparser.prepare(self.query_obj), + u"""{!dismax mm=1 qf=author v='douglas adams'}""") diff --git a/test_haystack/elasticsearch2_tests/test_query.py b/test_haystack/elasticsearch2_tests/test_query.py new file mode 100644 index 000000000..968180686 --- /dev/null +++ b/test_haystack/elasticsearch2_tests/test_query.py @@ -0,0 +1,209 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals + +import datetime + +import elasticsearch +from django.test import TestCase + +from haystack import connections +from haystack.inputs import Exact +from haystack.models import SearchResult +from haystack.query import SearchQuerySet, SQ +from haystack.utils.geo import D, Point +from ..core.models import AnotherMockModel, MockModel + + +class Elasticsearch2SearchQueryTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchQueryTestCase, self).setUp() + self.sq = connections['elasticsearch2'].get_query() + + def test_build_query_all(self): + self.assertEqual(self.sq.build_query(), '*:*') + + def test_build_query_single_word(self): + self.sq.add_filter(SQ(content='hello')) + self.assertEqual(self.sq.build_query(), '(hello)') + + def test_build_query_boolean(self): + self.sq.add_filter(SQ(content=True)) + self.assertEqual(self.sq.build_query(), '(True)') + + def test_regression_slash_search(self): + self.sq.add_filter(SQ(content='hello/')) + self.assertEqual(self.sq.build_query(), '(hello\\/)') + + def test_build_query_datetime(self): + self.sq.add_filter(SQ(content=datetime.datetime(2009, 5, 8, 11, 28))) + self.assertEqual(self.sq.build_query(), '(2009-05-08T11:28:00)') + + def test_build_query_multiple_words_and(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_filter(SQ(content='world')) + self.assertEqual(self.sq.build_query(), '((hello) AND (world))') + + def test_build_query_multiple_words_not(self): + self.sq.add_filter(~SQ(content='hello')) + self.sq.add_filter(~SQ(content='world')) + self.assertEqual(self.sq.build_query(), '(NOT ((hello)) AND NOT ((world)))') + + def test_build_query_multiple_words_or(self): + self.sq.add_filter(~SQ(content='hello')) + self.sq.add_filter(SQ(content='hello'), use_or=True) + self.assertEqual(self.sq.build_query(), '(NOT ((hello)) OR (hello))') + + def test_build_query_multiple_words_mixed(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(content='hello'), use_or=True) + self.sq.add_filter(~SQ(content='world')) + self.assertEqual(self.sq.build_query(), u'(((why) OR (hello)) AND NOT ((world)))') + + def test_build_query_phrase(self): + self.sq.add_filter(SQ(content='hello world')) + self.assertEqual(self.sq.build_query(), '(hello AND world)') + + self.sq.add_filter(SQ(content__exact='hello world')) + self.assertEqual(self.sq.build_query(), u'((hello AND world) AND ("hello world"))') + + def test_build_query_boost(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_boost('world', 5) + self.assertEqual(self.sq.build_query(), "(hello) world^5") + + def test_build_query_multiple_filter_types(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(pub_date__lte=Exact('2009-02-10 01:59:00'))) + self.sq.add_filter(SQ(author__gt='daniel')) + self.sq.add_filter(SQ(created__lt=Exact('2009-02-12 12:13:00'))) + self.sq.add_filter(SQ(title__gte='B')) + self.sq.add_filter(SQ(id__in=[1, 2, 3])) + self.sq.add_filter(SQ(rating__range=[3, 5])) + self.assertEqual(self.sq.build_query(), + u'((why) AND pub_date:([* TO "2009-02-10 01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12 12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') + + def test_build_query_multiple_filter_types_with_datetimes(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0))) + self.sq.add_filter(SQ(author__gt='daniel')) + self.sq.add_filter(SQ(created__lt=datetime.datetime(2009, 2, 12, 12, 13, 0))) + self.sq.add_filter(SQ(title__gte='B')) + self.sq.add_filter(SQ(id__in=[1, 2, 3])) + self.sq.add_filter(SQ(rating__range=[3, 5])) + self.assertEqual(self.sq.build_query(), + u'((why) AND pub_date:([* TO "2009-02-10T01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12T12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') + + def test_build_query_in_filter_multiple_words(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article"])) + self.assertEqual(self.sq.build_query(), u'((why) AND title:("A Famous Paper" OR "An Infamous Article"))') + + def test_build_query_in_filter_datetime(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(pub_date__in=[datetime.datetime(2009, 7, 6, 1, 56, 21)])) + self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:("2009-07-06T01:56:21"))') + + def test_build_query_in_with_set(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__in=set(["A Famous Paper", "An Infamous Article"]))) + self.assertTrue('((why) AND title:(' in self.sq.build_query()) + self.assertTrue('"A Famous Paper"' in self.sq.build_query()) + self.assertTrue('"An Infamous Article"' in self.sq.build_query()) + + def test_build_query_wildcard_filter_types(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__startswith='haystack')) + self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack*))') + + def test_build_query_fuzzy_filter_types(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__fuzzy='haystack')) + self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack~))') + + def test_clean(self): + self.assertEqual(self.sq.clean('hello world'), 'hello world') + self.assertEqual(self.sq.clean('hello AND world'), 'hello and world') + self.assertEqual(self.sq.clean('hello AND OR NOT TO + - && || ! ( ) { } [ ] ^ " ~ * ? : \ / world'), + 'hello and or not to \\+ \\- \\&& \\|| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ \\/ world') + self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), + 'so please NOTe i am in a bAND and bORed') + + def test_build_query_with_models(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_model(MockModel) + self.assertEqual(self.sq.build_query(), '(hello)') + + self.sq.add_model(AnotherMockModel) + self.assertEqual(self.sq.build_query(), u'(hello)') + + def test_set_result_class(self): + # Assert that we're defaulting to ``SearchResult``. + self.assertTrue(issubclass(self.sq.result_class, SearchResult)) + + # Custom class. + class IttyBittyResult(object): + pass + + self.sq.set_result_class(IttyBittyResult) + self.assertTrue(issubclass(self.sq.result_class, IttyBittyResult)) + + # Reset to default. + self.sq.set_result_class(None) + self.assertTrue(issubclass(self.sq.result_class, SearchResult)) + + def test_in_filter_values_list(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__in=[1, 2, 3])) + self.assertEqual(self.sq.build_query(), u'((why) AND title:("1" OR "2" OR "3"))') + + def test_narrow_sq(self): + sqs = SearchQuerySet(using='elasticsearch2').narrow(SQ(foo='moof')) + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.narrow_queries), 1) + self.assertEqual(sqs.query.narrow_queries.pop(), 'foo:(moof)') + + +class Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase, self).setUp() + self.backend = connections['elasticsearch2'].get_backend() + self._elasticsearch_version = elasticsearch.VERSION + elasticsearch.VERSION = (0, 9, 9) + + def tearDown(self): + elasticsearch.VERSION = self._elasticsearch_version + + def test_build_query_with_dwithin_range(self): + """ + Test build_search_kwargs with dwithin range for Elasticsearch versions < 1.0.0 + """ + search_kwargs = self.backend.build_search_kwargs('where', dwithin={ + 'field': "location_field", + 'point': Point(1.2345678, 2.3456789), + 'distance': D(m=500) + }) + self.assertEqual(search_kwargs['query']['filtered']['filter']['bool']['must'][1]['geo_distance'], + {'distance': 0.5, 'location_field': {'lat': 2.3456789, 'lon': 1.2345678}}) + + +class Elasticsearch2SearchQuerySpatialAfterReleaseTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchQuerySpatialAfterReleaseTestCase, self).setUp() + self.backend = connections['elasticsearch2'].get_backend() + self._elasticsearch_version = elasticsearch.VERSION + elasticsearch.VERSION = (1, 0, 0) + + def tearDown(self): + elasticsearch.VERSION = self._elasticsearch_version + + def test_build_query_with_dwithin_range(self): + """ + Test build_search_kwargs with dwithin range for Elasticsearch versions >= 1.0.0 + """ + search_kwargs = self.backend.build_search_kwargs('where', dwithin={ + 'field': "location_field", + 'point': Point(1.2345678, 2.3456789), + 'distance': D(m=500) + }) + self.assertEqual(search_kwargs['query']['filtered']['filter']['bool']['must'][1]['geo_distance'], + {'distance': "0.500000km", 'location_field': {'lat': 2.3456789, 'lon': 1.2345678}}) diff --git a/test_haystack/settings.py b/test_haystack/settings.py index d676c0de0..7a796164d 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -84,6 +84,12 @@ 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, + 'elasticsearch2': { + 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', + 'URL': 'http://192.168.99.100:9200/', + 'INDEX_NAME': 'test_default', + 'INCLUDE_SPELLING': True, + }, 'simple': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', }, From b5cb345917979a4cc87eb9462bec943d1bdda83c Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 16:29:00 +0100 Subject: [PATCH 16/51] Elasticsearch 2.x support - Fix localhost IP in elasticsearch2 settings --- test_haystack/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_haystack/settings.py b/test_haystack/settings.py index 7a796164d..dca6dd4e7 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -86,7 +86,7 @@ }, 'elasticsearch2': { 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', - 'URL': 'http://192.168.99.100:9200/', + 'URL': 'http://127.0.0.1:9200/', 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, From 22288d720eb248bc9df66eeeb7ec876aa5218ea7 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 17:26:30 +0100 Subject: [PATCH 17/51] Launchs ES 2.x on Travis - Port to connect 29200 --- .travis.yml | 3 +++ test_haystack/settings.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index eee4d5886..9063cbd8c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,6 +26,9 @@ addons: before_install: - mkdir -p $HOME/download-cache + - wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz + - tar zxf elasticsearch-2.2.1.tar.gz + - elasticsearch-2.2.1/bin/elasticsearch -Dtransport.tcp.port=29300 -Dhttp.port=29200 install: - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install mock==1.0.1; fi diff --git a/test_haystack/settings.py b/test_haystack/settings.py index dca6dd4e7..0fa301297 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -86,7 +86,7 @@ }, 'elasticsearch2': { 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', - 'URL': 'http://127.0.0.1:9200/', + 'URL': 'http://127.0.0.1:29200/', 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, From c2b94cd19c0695723e8e296352429bb9e3af446c Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 17:30:34 +0100 Subject: [PATCH 18/51] Launchs ES 2.x on Travis - daemonize ES 2.x --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9063cbd8c..d435f060e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,7 @@ before_install: - mkdir -p $HOME/download-cache - wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz - tar zxf elasticsearch-2.2.1.tar.gz - - elasticsearch-2.2.1/bin/elasticsearch -Dtransport.tcp.port=29300 -Dhttp.port=29200 + - elasticsearch-2.2.1/bin/elasticsearch -d -Dtransport.tcp.port=29300 -Dhttp.port=29200 install: - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install mock==1.0.1; fi From 2fd632ff31972305b52820da14c869c26457447a Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 17:39:33 +0100 Subject: [PATCH 19/51] Launchs ES 2.x on Travis - commented out ES 1.x service --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index d435f060e..bb01867e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -51,8 +51,8 @@ matrix: allow_failures: - python: "pypy" -services: - - elasticsearch +#services: +# - elasticsearch notifications: irc: "irc.freenode.org#haystack" From f50c38e1d35b779cf21bd29dea3a403ec9ff1e55 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 18:28:28 +0100 Subject: [PATCH 20/51] Launchs ES 2.x on Travis - Fix catching exception on skipping tests --- test_haystack/elasticsearch2_tests/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_haystack/elasticsearch2_tests/__init__.py b/test_haystack/elasticsearch2_tests/__init__.py index 8433081cd..a456c9829 100644 --- a/test_haystack/elasticsearch2_tests/__init__.py +++ b/test_haystack/elasticsearch2_tests/__init__.py @@ -10,7 +10,7 @@ def setup(): try: - from elasticsearch import Elasticsearch, ElasticsearchException + from elasticsearch import Elasticsearch, exceptions except ImportError: raise unittest.SkipTest("elasticsearch-py not installed.") @@ -18,5 +18,5 @@ def setup(): es = Elasticsearch(url) try: es.info() - except ElasticsearchException as e: + except exceptions.ConnectionError as e: raise unittest.SkipTest("elasticsearch not running on %r" % url, e) From 7a7f99977c573cd315b28650ef851007f89b4a0d Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 01:59:57 +0100 Subject: [PATCH 21/51] Uses a build matrix to test ES 1.x or ES 2.x --- .travis.yml | 25 +- setup.py | 1 - .../elasticsearch2_tests/__init__.py | 7 +- .../elasticsearch2_tests/test_backend.py | 270 +++++++++--------- .../elasticsearch2_tests/test_inputs.py | 2 +- .../elasticsearch2_tests/test_query.py | 8 +- test_haystack/elasticsearch_tests/__init__.py | 10 +- test_haystack/settings.py | 14 +- tox.ini | 126 ++++++-- 9 files changed, 286 insertions(+), 177 deletions(-) diff --git a/.travis.yml b/.travis.yml index bb01867e2..eac4d35d8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,13 +26,21 @@ addons: before_install: - mkdir -p $HOME/download-cache - - wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz - - tar zxf elasticsearch-2.2.1.tar.gz - - elasticsearch-2.2.1/bin/elasticsearch -d -Dtransport.tcp.port=29300 -Dhttp.port=29200 + - > + if [[ $VERSION_ES == '>=1.0.0,<2.0.0' ]]; + then + wget https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-1.7.5.tar.gz + tar zxf elasticsearch-1.7.5.tar.gz + elasticsearch-1.7.5/bin/elasticsearch -d -Dhttp.port=9200 + else + wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz + tar zxf elasticsearch-2.2.1.tar.gz + elasticsearch-2.2.1/bin/elasticsearch -d -Dhttp.port=9200 + fi install: - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install mock==1.0.1; fi - - pip install requests "Django${DJANGO_VERSION}" + - pip install requests "Django${DJANGO_VERSION}" "elasticsearch${VERSION_ES}" - python setup.py clean build install before_script: @@ -44,16 +52,15 @@ script: env: matrix: - - DJANGO_VERSION=">=1.8,<1.9" - - DJANGO_VERSION=">=1.9,<1.10" + - DJANGO_VERSION=">=1.8,<1.9" VERSION_ES=">=1.0.0,<2.0.0" + - DJANGO_VERSION=">=1.9,<1.10" VERSION_ES=">=1.0.0,<2.0.0" + - DJANGO_VERSION=">=1.8,<1.9" VERSION_ES=">=2.0.0,<3.0.0" + - DJANGO_VERSION=">=1.9,<1.10" VERSION_ES=">=2.0.0,<3.0.0" matrix: allow_failures: - python: "pypy" -#services: -# - elasticsearch - notifications: irc: "irc.freenode.org#haystack" email: false diff --git a/setup.py b/setup.py index 5c44d97d2..073c37701 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,6 @@ ] tests_require = [ - 'elasticsearch>=2.0.0,<3.0.0', 'pysolr>=3.3.2', 'whoosh==2.5.4', 'python-dateutil', diff --git a/test_haystack/elasticsearch2_tests/__init__.py b/test_haystack/elasticsearch2_tests/__init__.py index a456c9829..ba6384f46 100644 --- a/test_haystack/elasticsearch2_tests/__init__.py +++ b/test_haystack/elasticsearch2_tests/__init__.py @@ -10,11 +10,14 @@ def setup(): try: + import elasticsearch + if not ((2, 0, 0) <= elasticsearch.__version__ < (3, 0, 0)): + raise ImportError from elasticsearch import Elasticsearch, exceptions except ImportError: - raise unittest.SkipTest("elasticsearch-py not installed.") + raise unittest.SkipTest("'elasticsearch>=2.0.0,<3.0.0' not installed.") - url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] + url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] es = Elasticsearch(url) try: es.info() diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py index 0ba061eda..cdefaacc7 100644 --- a/test_haystack/elasticsearch2_tests/test_backend.py +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -36,16 +36,16 @@ def clear_elasticsearch_index(): # Wipe it clean. - raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) try: - raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) raw_es.indices.refresh() except elasticsearch.TransportError: pass # Since we've just completely deleted the index, we'll reset setup_complete so the next access will # correctly define the mappings: - connections['elasticsearch2'].get_backend().setup_complete = False + connections['elasticsearch'].get_backend().setup_complete = False class Elasticsearch2MockSearchIndex(indexes.SearchIndex, indexes.Indexable): @@ -217,7 +217,7 @@ class TestSettings(TestCase): def test_kwargs_are_passed_on(self): from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend backend = ElasticsearchSearchBackend('alias', **{ - 'URL': settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'], + 'URL': settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], 'INDEX_NAME': 'testing', 'KWARGS': {'max_retries': 42} }) @@ -230,18 +230,18 @@ def setUp(self): super(Elasticsearch2SearchBackendTestCase, self).setUp() # Wipe it clean. - self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() self.smmidni = Elasticsearch2MockSearchIndexWithSkipDocument() self.smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() # Force the backend to rebuild the mapping each time. self.sb.existing_mapping = {} @@ -257,19 +257,19 @@ def setUp(self): self.sample_objs.append(mock) def tearDown(self): - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(Elasticsearch2SearchBackendTestCase, self).tearDown() self.sb.silently_fail = True def raw_search(self, query): try: - return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) except elasticsearch.TransportError: return {} def test_non_silent(self): - bad_sb = connections['elasticsearch2'].backend('bad', URL='http://omg.wtf.bbq:1000/', INDEX_NAME='whatver', - SILENTLY_FAIL=False, TIMEOUT=1) + bad_sb = connections['elasticsearch'].backend('bad', URL='http://omg.wtf.bbq:1000/', INDEX_NAME='whatver', + SILENTLY_FAIL=False, TIMEOUT=1) try: bad_sb.update(self.smmi, self.sample_objs) @@ -296,14 +296,14 @@ def test_non_silent(self): pass def test_update_no_documents(self): - url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] - index_name = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME'] + url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] + index_name = settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME'] - sb = connections['elasticsearch2'].backend('elasticsearch2', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=True) + sb = connections['elasticsearch'].backend('elasticsearch', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=True) self.assertEqual(sb.update(self.smmi, []), None) - sb = connections['elasticsearch2'].backend('elasticsearch2', URL=url, INDEX_NAME=index_name, - SILENTLY_FAIL=False) + sb = connections['elasticsearch'].backend('elasticsearch', URL=url, INDEX_NAME=index_name, + SILENTLY_FAIL=False) try: sb.update(self.smmi, []) self.fail() @@ -502,7 +502,7 @@ def test_more_like_this(self): self.assertEqual([result.pk for result in self.sb.more_like_this(self.sample_objs[0])['results']], []) def test_build_schema(self): - old_ui = connections['elasticsearch2'].get_unified_index() + old_ui = connections['elasticsearch'].get_unified_index() (content_field_name, mapping) = self.sb.build_schema(old_ui.all_searchfields()) self.assertEqual(content_field_name, 'text') @@ -542,17 +542,17 @@ def test_build_schema(self): }) def test_verify_type(self): - old_ui = connections['elasticsearch2'].get_unified_index() + old_ui = connections['elasticsearch'].get_unified_index() ui = UnifiedIndex() smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() ui.build(indexes=[smtmmi]) - connections['elasticsearch2']._index = ui - sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = ui + sb = connections['elasticsearch'].get_backend() sb.update(smtmmi, self.sample_objs) self.assertEqual(sb.search('*:*')['hits'], 3) self.assertEqual([result.month for result in sb.search('*:*')['results']], [u'02', u'02', u'02']) - connections['elasticsearch2']._index = old_ui + connections['elasticsearch']._index = old_ui class CaptureHandler(std_logging.Handler): @@ -576,26 +576,26 @@ def setUp(self): # Stow. # Point the backend at a URL that doesn't exist so we can watch the # sparks fly. - self.old_es_url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] - settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] = "%s/foo/" % self.old_es_url + self.old_es_url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] + settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = "%s/foo/" % self.old_es_url self.cap = CaptureHandler() logging.getLogger('haystack').addHandler(self.cap) import haystack logging.getLogger('haystack').removeHandler(haystack.stream) # Setup the rest of the bits. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = ui + self.sb = connections['elasticsearch'].get_backend() def tearDown(self): import haystack # Restore. - settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] = self.old_es_url - connections['elasticsearch2']._index = self.old_ui + settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = self.old_es_url + connections['elasticsearch']._index = self.old_ui logging.getLogger('haystack').removeHandler(self.cap) logging.getLogger('haystack').addHandler(haystack.stream) @@ -633,47 +633,47 @@ def setUp(self): clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() - self.sq = connections['elasticsearch2'].get_query() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() + self.sq = connections['elasticsearch'].get_query() # Force indexing of the content. - self.smmi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') def tearDown(self): - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2SearchQueryTestCase, self).tearDown() def test_log_query(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) with self.settings(DEBUG=False): len(self.sq.get_results()) - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) with self.settings(DEBUG=True): # Redefine it to clear out the cached results. - self.sq = connections['elasticsearch2'].query(using='elasticsearch2') + self.sq = connections['elasticsearch'].query(using='elasticsearch') self.sq.add_filter(SQ(name='bar')) len(self.sq.get_results()) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) - self.assertEqual(connections['elasticsearch2'].queries[0]['query_string'], + self.assertEqual(len(connections['elasticsearch'].queries), 1) + self.assertEqual(connections['elasticsearch'].queries[0]['query_string'], 'name:(bar)') # And again, for good measure. - self.sq = connections['elasticsearch2'].query('elasticsearch2') + self.sq = connections['elasticsearch'].query('elasticsearch') self.sq.add_filter(SQ(name='bar')) self.sq.add_filter(SQ(text='moof')) len(self.sq.get_results()) - self.assertEqual(len(connections['elasticsearch2'].queries), 2) - self.assertEqual(connections['elasticsearch2'].queries[0]['query_string'], + self.assertEqual(len(connections['elasticsearch'].queries), 2) + self.assertEqual(connections['elasticsearch'].queries[0]['query_string'], 'name:(bar)') - self.assertEqual(connections['elasticsearch2'].queries[1]['query_string'], + self.assertEqual(connections['elasticsearch'].queries[1]['query_string'], u'(name:(bar) AND text:(moof))') @@ -689,14 +689,14 @@ def setUp(self): super(LiveElasticsearch2SearchQuerySetTestCase, self).setUp() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') - self.rsqs = RelatedSearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') + self.rsqs = RelatedSearchQuerySet('elasticsearch') # Ugly but not constantly reindexing saves us almost 50% runtime. global lssqstc_all_loaded @@ -708,11 +708,11 @@ def setUp(self): clear_elasticsearch_index() # Force indexing of the content. - self.smmi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2SearchQuerySetTestCase, self).tearDown() def test_load_all(self): @@ -724,28 +724,28 @@ def test_load_all(self): def test_iter(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.sqs.all() results = sorted([int(result.pk) for result in sqs]) self.assertEqual(results, list(range(1, 24))) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_slice(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all().order_by('pub_date') self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all().order_by('pub_date') self.assertEqual(int(results[21].pk), 22) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_values_slicing(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) # TODO: this would be a good candidate for refactoring into a TestCase subclass shared across backends @@ -762,50 +762,50 @@ def test_values_slicing(self): results = self.sqs.all().order_by('pub_date').values_list('pk', flat=True) self.assertListEqual(results[1:11], expected_pks) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_count(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.sqs.all() self.assertEqual(sqs.count(), 23) self.assertEqual(sqs.count(), 23) self.assertEqual(len(sqs), 23) self.assertEqual(sqs.count(), 23) # Should only execute one query to count the length of the result set. - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_manual_iter(self): results = self.sqs.all() reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = set([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_fill_cache(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all() self.assertEqual(len(results._result_cache), 0) - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) - self.assertEqual(len(connections['elasticsearch2'].queries), 2) + self.assertEqual(len(connections['elasticsearch'].queries), 2) def test_cache_is_full(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) self.assertEqual(self.sqs._cache_is_full(), False) results = self.sqs.all() fire_the_iterator_and_fill_cache = [result for result in results] self.assertEqual(results._cache_is_full(), True) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test___and__(self): sqs1 = self.sqs.filter(content='foo') @@ -905,62 +905,62 @@ def test_related_load_all_queryset(self): def test_related_iter(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.rsqs.all() results = set([int(result.pk) for result in sqs]) self.assertEqual(results, set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) - self.assertEqual(len(connections['elasticsearch2'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_slice(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual(int(results[21].pk), 22) - self.assertEqual(len(connections['elasticsearch2'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 4) reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual(set([int(result.pk) for result in results[20:30]]), set([21, 22, 23])) - self.assertEqual(len(connections['elasticsearch2'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_manual_iter(self): results = self.rsqs.all() reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = sorted([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, list(range(1, 24))) - self.assertEqual(len(connections['elasticsearch2'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_fill_cache(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all() self.assertEqual(len(results._result_cache), 0) - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) - self.assertEqual(len(connections['elasticsearch2'].queries), 2) + self.assertEqual(len(connections['elasticsearch'].queries), 2) def test_related_cache_is_full(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) self.assertEqual(self.rsqs._cache_is_full(), False) results = self.rsqs.all() fire_the_iterator_and_fill_cache = [result for result in results] self.assertEqual(results._cache_is_full(), True) - self.assertEqual(len(connections['elasticsearch2'].queries), 5) + self.assertEqual(len(connections['elasticsearch'].queries), 5) def test_quotes_regression(self): sqs = self.sqs.auto_query(u"44°48'40''N 20°28'32''E") @@ -1036,26 +1036,26 @@ def setUp(self): super(LiveElasticsearch2SpellingTestCase, self).setUp() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSpellingIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') # Wipe it clean. clear_elasticsearch_index() # Reboot the schema. - self.sb = connections['elasticsearch2'].get_backend() + self.sb = connections['elasticsearch'].get_backend() self.sb.setup() - self.smmi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2SpellingTestCase, self).tearDown() def test_spelling(self): @@ -1074,21 +1074,21 @@ def setUp(self): # Wipe it clean. clear_elasticsearch_index() - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockModelSearchIndex() self.sammi = Elasticsearch2AnotherMockModelSearchIndex() self.ui.build(indexes=[self.smmi, self.sammi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') - self.smmi.update(using='elasticsearch2') - self.sammi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') + self.sammi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2MoreLikeThisTestCase, self).tearDown() def test_more_like_this(self): @@ -1132,30 +1132,30 @@ def setUp(self): super(LiveElasticsearch2AutocompleteTestCase, self).setUp() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2AutocompleteMockModelSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') # Wipe it clean. clear_elasticsearch_index() # Reboot the schema. - self.sb = connections['elasticsearch2'].get_backend() + self.sb = connections['elasticsearch'].get_backend() self.sb.setup() - self.smmi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2AutocompleteTestCase, self).tearDown() def test_build_schema(self): - self.sb = connections['elasticsearch2'].get_backend() + self.sb = connections['elasticsearch'].get_backend() content_name, mapping = self.sb.build_schema(self.ui.all_searchfields()) self.assertEqual(mapping, { 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, @@ -1231,14 +1231,14 @@ def setUp(self): clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.srtsi = Elasticsearch2RoundTripSearchIndex() self.ui.build(indexes=[self.srtsi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') # Fake indexing. mock = MockModel() @@ -1247,7 +1247,7 @@ def setUp(self): def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2RoundTripTestCase, self).tearDown() def test_round_trip(self): @@ -1282,21 +1282,21 @@ def setUp(self): clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockModelSearchIndex() self.sammi = Elasticsearch2AnotherMockModelSearchIndex() self.ui.build(indexes=[self.smmi, self.sammi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') - self.smmi.update(using='elasticsearch2') - self.sammi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') + self.sammi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2PickleTestCase, self).tearDown() def test_pickling(self): @@ -1317,16 +1317,16 @@ def setUp(self): super(Elasticsearch2BoostBackendTestCase, self).setUp() # Wipe it clean. - self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2BoostMockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() self.sample_objs = [] @@ -1345,17 +1345,17 @@ def setUp(self): self.sample_objs.append(mock) def tearDown(self): - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(Elasticsearch2BoostBackendTestCase, self).tearDown() def raw_search(self, query): - return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) def test_boost(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*')['hits']['total'], 4) - results = SearchQuerySet(using='elasticsearch2').filter(SQ(author='daniel') | SQ(editor='daniel')) + results = SearchQuerySet(using='elasticsearch').filter(SQ(author='daniel') | SQ(editor='daniel')) self.assertEqual(set([result.id for result in results]), set([ 'core.afourthmockmodel.4', @@ -1379,12 +1379,12 @@ def test__to_python(self): class RecreateIndexTestCase(TestCase): def setUp(self): self.raw_es = elasticsearch.Elasticsearch( - settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) def test_recreate_index(self): clear_elasticsearch_index() - sb = connections['elasticsearch2'].get_backend() + sb = connections['elasticsearch'].get_backend() sb.silently_fail = True sb.setup() @@ -1410,12 +1410,12 @@ def setUp(self): clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2FacetingMockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() # Force the backend to rebuild the mapping each time. self.sb.existing_mapping = {} @@ -1438,12 +1438,12 @@ def setUp(self): self.sample_objs.append(mock) def tearDown(self): - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(Elasticsearch2FacetingTestCase, self).tearDown() def test_facet(self): self.sb.update(self.smmi, self.sample_objs) - counts = SearchQuerySet('elasticsearch2').facet('author').facet('editor').facet_counts() + counts = SearchQuerySet('elasticsearch').facet('author').facet('editor').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 5), ('Dan Watson', 4), @@ -1452,7 +1452,7 @@ def test_facet(self): ('Perry White', 5), ('George Taylor', 4), ]) - counts = SearchQuerySet('elasticsearch2').filter(content='white').facet('facet_field', + counts = SearchQuerySet('elasticsearch').filter(content='white').facet('facet_field', order='reverse_count').facet_counts() self.assertEqual(counts['fields']['facet_field'], [ ('Dan Watson', 2), @@ -1461,7 +1461,7 @@ def test_facet(self): def test_multiple_narrow(self): self.sb.update(self.smmi, self.sample_objs) - counts = SearchQuerySet('elasticsearch2').narrow('editor_exact:"Perry White"').narrow( + counts = SearchQuerySet('elasticsearch').narrow('editor_exact:"Perry White"').narrow( 'author_exact:"Daniel Lindsley"').facet('author').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 3), @@ -1469,7 +1469,7 @@ def test_multiple_narrow(self): def test_narrow(self): self.sb.update(self.smmi, self.sample_objs) - counts = SearchQuerySet('elasticsearch2').facet('author').facet('editor').narrow( + counts = SearchQuerySet('elasticsearch').facet('author').facet('editor').narrow( 'editor_exact:"Perry White"').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 3), @@ -1484,8 +1484,8 @@ def test_date_facet(self): start = datetime.date(2013, 9, 1) end = datetime.date(2013, 9, 30) # Facet by day - counts = SearchQuerySet('elasticsearch2').date_facet('pub_date', start_date=start, end_date=end, - gap_by='day').facet_counts() + counts = SearchQuerySet('elasticsearch').date_facet('pub_date', start_date=start, end_date=end, + gap_by='day').facet_counts() self.assertEqual(counts['dates']['pub_date'], [ (datetime.datetime(2013, 9, 1), 2), (datetime.datetime(2013, 9, 2), 3), @@ -1493,8 +1493,8 @@ def test_date_facet(self): (datetime.datetime(2013, 9, 4), 2), ]) # By month - counts = SearchQuerySet('elasticsearch2').date_facet('pub_date', start_date=start, end_date=end, - gap_by='month').facet_counts() + counts = SearchQuerySet('elasticsearch').date_facet('pub_date', start_date=start, end_date=end, + gap_by='month').facet_counts() self.assertEqual(counts['dates']['pub_date'], [ (datetime.datetime(2013, 9, 1), 9), ]) diff --git a/test_haystack/elasticsearch2_tests/test_inputs.py b/test_haystack/elasticsearch2_tests/test_inputs.py index 777334fb2..adc87d16d 100644 --- a/test_haystack/elasticsearch2_tests/test_inputs.py +++ b/test_haystack/elasticsearch2_tests/test_inputs.py @@ -10,7 +10,7 @@ class Elasticsearch2InputTestCase(TestCase): def setUp(self): super(Elasticsearch2InputTestCase, self).setUp() - self.query_obj = connections['elasticsearch2'].get_query() + self.query_obj = connections['elasticsearch'].get_query() def test_raw_init(self): raw = inputs.Raw('hello OR there, :you') diff --git a/test_haystack/elasticsearch2_tests/test_query.py b/test_haystack/elasticsearch2_tests/test_query.py index 968180686..65d3cfef0 100644 --- a/test_haystack/elasticsearch2_tests/test_query.py +++ b/test_haystack/elasticsearch2_tests/test_query.py @@ -17,7 +17,7 @@ class Elasticsearch2SearchQueryTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchQueryTestCase, self).setUp() - self.sq = connections['elasticsearch2'].get_query() + self.sq = connections['elasticsearch'].get_query() def test_build_query_all(self): self.assertEqual(self.sq.build_query(), '*:*') @@ -157,7 +157,7 @@ def test_in_filter_values_list(self): self.assertEqual(self.sq.build_query(), u'((why) AND title:("1" OR "2" OR "3"))') def test_narrow_sq(self): - sqs = SearchQuerySet(using='elasticsearch2').narrow(SQ(foo='moof')) + sqs = SearchQuerySet(using='elasticsearch').narrow(SQ(foo='moof')) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.narrow_queries), 1) self.assertEqual(sqs.query.narrow_queries.pop(), 'foo:(moof)') @@ -166,7 +166,7 @@ def test_narrow_sq(self): class Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase, self).setUp() - self.backend = connections['elasticsearch2'].get_backend() + self.backend = connections['elasticsearch'].get_backend() self._elasticsearch_version = elasticsearch.VERSION elasticsearch.VERSION = (0, 9, 9) @@ -189,7 +189,7 @@ def test_build_query_with_dwithin_range(self): class Elasticsearch2SearchQuerySpatialAfterReleaseTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchQuerySpatialAfterReleaseTestCase, self).setUp() - self.backend = connections['elasticsearch2'].get_backend() + self.backend = connections['elasticsearch'].get_backend() self._elasticsearch_version = elasticsearch.VERSION elasticsearch.VERSION = (1, 0, 0) diff --git a/test_haystack/elasticsearch_tests/__init__.py b/test_haystack/elasticsearch_tests/__init__.py index 4066af099..1736e1590 100644 --- a/test_haystack/elasticsearch_tests/__init__.py +++ b/test_haystack/elasticsearch_tests/__init__.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# -*- coding: utf-8 -*- import unittest import warnings @@ -8,8 +8,12 @@ warnings.simplefilter('ignore', Warning) + def setup(): try: + import elasticsearch + if not ((1, 0, 0) <= elasticsearch.__version__ < (2, 0, 0)): + raise ImportError from elasticsearch import Elasticsearch, ElasticsearchException except ImportError: raise unittest.SkipTest("elasticsearch-py not installed.") @@ -18,5 +22,5 @@ def setup(): try: es.info() except ElasticsearchException as e: - raise unittest.SkipTest("elasticsearch not running on %r" % settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], e) - + raise unittest.SkipTest( + "elasticsearch not running on %r" % settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], e) diff --git a/test_haystack/settings.py b/test_haystack/settings.py index 0fa301297..14cfb7517 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -84,12 +84,6 @@ 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, - 'elasticsearch2': { - 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', - 'URL': 'http://127.0.0.1:29200/', - 'INDEX_NAME': 'test_default', - 'INCLUDE_SPELLING': True, - }, 'simple': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', }, @@ -99,3 +93,11 @@ 'INCLUDE_SPELLING': True, }, } + +if os.getenv('VERSION_ES') == ">=2.0.0,<3.0.0": + HAYSTACK_CONNECTIONS['elasticsearch'] = { + 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', + 'URL': '127.0.0.1:9200/', + 'INDEX_NAME': 'test_default', + 'INCLUDE_SPELLING': True, + } diff --git a/tox.ini b/tox.ini index b63dc9c0f..1c71c9bbc 100644 --- a/tox.ini +++ b/tox.ini @@ -1,13 +1,21 @@ [tox] envlist = docs, - py27-django1.8, - py27-django1.9, - py34-django1.8, - py34-django1.9, - py35-django1.8, - py35-django1.9, - pypy-django1.8, - pypy-django1.9, + py27-django1.8-es1.x, + py27-django1.9-es1.x, + py34-django1.8-es1.x, + py34-django1.9-es1.x, + py35-django1.8-es1.x, + py35-django1.9-es1.x, + pypy-django1.8-es1.x, + pypy-django1.9-es1.x, + py27-django1.8-es2.x, + py27-django1.9-es2.x, + py34-django1.8-es2.x, + py34-django1.9-es2.x, + py35-django1.8-es2.x, + py35-django1.9-es2.x, + pypy-django1.8-es2.x, + pypy-django1.9-es2.x, [base] deps = requests @@ -20,54 +28,140 @@ deps = deps = Django>=1.8,<1.9 +[es2.x] +deps = + elasticsearch>=2.0.0,<3.0.0 + +[es1.x] +deps = + elasticsearch>=1.0.0,<2.0.0 + [testenv] commands = python test_haystack/solr_tests/server/wait-for-solr python {toxinidir}/setup.py test -[testenv:pypy-django1.8] +[testenv:pypy-django1.8-es1.x] +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:pypy-django1.9-es1.x] +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:py27-django1.8-es1.x] +basepython = python2.7 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:py27-django1.9-es1.x] +basepython = python2.7 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:py34-django1.8-es1.x] +basepython = python3.4 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:py34-django1.9-es1.x] +basepython = python3.4 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:py35-django1.8-es1.x] +basepython = python3.5 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:py35-django1.9-es1.x] +basepython = python3.5 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:pypy-django1.8-es2.x] +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:pypy-django1.9] +[testenv:pypy-django1.9-es2.x] +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} -[testenv:py27-django1.8] +[testenv:py27-django1.8-es2.x] basepython = python2.7 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:py27-django1.9] +[testenv:py27-django1.9-es2.x] basepython = python2.7 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} -[testenv:py34-django1.8] +[testenv:py34-django1.8-es2.x] basepython = python3.4 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:py34-django1.9] +[testenv:py34-django1.9-es2.x] basepython = python3.4 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} -[testenv:py35-django1.8] +[testenv:py35-django1.8-es2.x] basepython = python3.5 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:py35-django1.9] +[testenv:py35-django1.9-es2.x] basepython = python3.5 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} From ffebab946c2a40c08ad6120cc74cc090ed712c2e Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 21:54:30 +0100 Subject: [PATCH 22/51] Uses a build matrix to test ES 1.x or ES 2.x - set literals on tests --- .../elasticsearch2_tests/test_backend.py | 45 +++++++++---------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py index cdefaacc7..b321a4eb4 100644 --- a/test_haystack/elasticsearch2_tests/test_backend.py +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -417,7 +417,7 @@ def test_search(self): self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*:*')['hits'], 3) - self.assertEqual(set([result.pk for result in self.sb.search('*:*')['results']]), set([u'2', u'1', u'3'])) + self.assertEqual(set([result.pk for result in self.sb.search('*:*')['results']]), {u'2', u'1', u'3'}) self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('Index', highlight=True)['hits'], 3) @@ -434,7 +434,7 @@ def test_search(self): self.assertEqual(results['hits'], 3) self.assertSetEqual( set(results['facets']['fields']['name']), - set([('daniel3', 1), ('daniel2', 1), ('daniel1', 1)]) + {('daniel3', 1), ('daniel2', 1), ('daniel1', 1)} ) self.assertEqual(self.sb.search('', date_facets={ @@ -451,8 +451,8 @@ def test_search(self): self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['queries'], {u'name': 3}) - self.assertEqual(self.sb.search('', narrow_queries=set(['name:daniel1'])), {'hits': 0, 'results': []}) - results = self.sb.search('Index', narrow_queries=set(['name:daniel1'])) + self.assertEqual(self.sb.search('', narrow_queries={'name:daniel1'}), {'hits': 0, 'results': []}) + results = self.sb.search('Index', narrow_queries={'name:daniel1'}) self.assertEqual(results['hits'], 1) # Ensure that swapping the ``result_class`` works. @@ -782,7 +782,7 @@ def test_manual_iter(self): self.assertEqual(len(connections['elasticsearch'].queries), 0) results = set([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, - set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) + {2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20}) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_fill_cache(self): @@ -900,8 +900,8 @@ def test_related_load_all_queryset(self): sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=10)) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs._load_all_querysets), 1) - self.assertEqual(set([obj.object.id for obj in sqs]), set([12, 17, 11, 16, 23, 15, 22, 14, 19, 21, 13, 18, 20])) - self.assertEqual(set([obj.object.id for obj in sqs[10:20]]), set([21, 22, 23])) + self.assertEqual(set([obj.object.id for obj in sqs]), {12, 17, 11, 16, 23, 15, 22, 14, 19, 21, 13, 18, 20}) + self.assertEqual(set([obj.object.id for obj in sqs[10:20]]), {21, 22, 23}) def test_related_iter(self): reset_search_queries() @@ -909,7 +909,7 @@ def test_related_iter(self): sqs = self.rsqs.all() results = set([int(result.pk) for result in sqs]) self.assertEqual(results, - set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) + {2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20}) self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_slice(self): @@ -928,7 +928,7 @@ def test_related_slice(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') - self.assertEqual(set([int(result.pk) for result in results[20:30]]), set([21, 22, 23])) + self.assertEqual(set([int(result.pk) for result in results[20:30]]), {21, 22, 23}) self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_manual_iter(self): @@ -1095,19 +1095,19 @@ def test_more_like_this(self): mlt = self.sqs.more_like_this(MockModel.objects.get(pk=1)) results = [result.pk for result in mlt] self.assertEqual(mlt.count(), 11) - self.assertEqual(set(results), set([u'10', u'5', u'2', u'21', u'4', u'6', u'23', u'9', u'14'])) + self.assertEqual(set(results), {u'10', u'5', u'2', u'21', u'4', u'6', u'23', u'9', u'14'}) self.assertEqual(len(results), 10) alt_mlt = self.sqs.filter(name='daniel3').more_like_this(MockModel.objects.get(pk=2)) results = [result.pk for result in alt_mlt] self.assertEqual(alt_mlt.count(), 9) - self.assertEqual(set(results), set([u'2', u'16', u'3', u'19', u'4', u'17', u'10', u'22', u'23'])) + self.assertEqual(set(results), {u'2', u'16', u'3', u'19', u'4', u'17', u'10', u'22', u'23'}) self.assertEqual(len(results), 9) alt_mlt_with_models = self.sqs.models(MockModel).more_like_this(MockModel.objects.get(pk=1)) results = [result.pk for result in alt_mlt_with_models] self.assertEqual(alt_mlt_with_models.count(), 10) - self.assertEqual(set(results), set([u'10', u'5', u'21', u'2', u'4', u'6', u'23', u'9', u'14', u'16'])) + self.assertEqual(set(results), {u'10', u'5', u'21', u'2', u'4', u'6', u'23', u'9', u'14', u'16'}) self.assertEqual(len(results), 10) if hasattr(MockModel.objects, 'defer'): @@ -1184,8 +1184,8 @@ def test_build_schema(self): def test_autocomplete(self): autocomplete = self.sqs.autocomplete(text_auto='mod') self.assertEqual(autocomplete.count(), 16) - self.assertEqual(set([result.pk for result in autocomplete]), set( - ['1', '12', '6', '14', '7', '4', '23', '17', '13', '18', '20', '22', '19', '15', '10', '2'])) + self.assertEqual(set([result.pk for result in autocomplete]), + {'1', '12', '6', '14', '7', '4', '23', '17', '13', '18', '20', '22', '19', '15', '10', '2'}) self.assertTrue('mod' in autocomplete[0].text.lower()) self.assertTrue('mod' in autocomplete[1].text.lower()) self.assertTrue('mod' in autocomplete[2].text.lower()) @@ -1197,7 +1197,7 @@ def test_autocomplete(self): autocomplete_2 = self.sqs.autocomplete(text_auto='your mod') self.assertEqual(autocomplete_2.count(), 13) self.assertEqual(set([result.pk for result in autocomplete_2]), - set(['1', '6', '2', '14', '12', '13', '10', '19', '4', '20', '23', '22', '15'])) + {'1', '6', '2', '14', '12', '13', '10', '19', '4', '20', '23', '22', '15'}) map_results = {result.pk: result for result in autocomplete_2} self.assertTrue('your' in map_results['1'].text.lower()) self.assertTrue('mod' in map_results['1'].text.lower()) @@ -1209,18 +1209,18 @@ def test_autocomplete(self): # Test multiple fields. autocomplete_3 = self.sqs.autocomplete(text_auto='Django', name_auto='dan') self.assertEqual(autocomplete_3.count(), 4) - self.assertEqual(set([result.pk for result in autocomplete_3]), set(['12', '1', '22', '14'])) + self.assertEqual(set([result.pk for result in autocomplete_3]), {'12', '1', '22', '14'}) self.assertEqual(len([result.pk for result in autocomplete_3]), 4) # Test numbers in phrases autocomplete_4 = self.sqs.autocomplete(text_auto='Jen 867') self.assertEqual(autocomplete_4.count(), 1) - self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) + self.assertEqual(set([result.pk for result in autocomplete_4]), {'20'}) # Test numbers alone autocomplete_4 = self.sqs.autocomplete(text_auto='867') self.assertEqual(autocomplete_4.count(), 1) - self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) + self.assertEqual(set([result.pk for result in autocomplete_4]), {'20'}) class LiveElasticsearch2RoundTripTestCase(TestCase): @@ -1357,12 +1357,9 @@ def test_boost(self): results = SearchQuerySet(using='elasticsearch').filter(SQ(author='daniel') | SQ(editor='daniel')) - self.assertEqual(set([result.id for result in results]), set([ - 'core.afourthmockmodel.4', - 'core.afourthmockmodel.3', - 'core.afourthmockmodel.1', - 'core.afourthmockmodel.2' - ])) + self.assertEqual(set([result.id for result in results]), + {'core.afourthmockmodel.4', 'core.afourthmockmodel.3', 'core.afourthmockmodel.1', + 'core.afourthmockmodel.2'}) def test__to_python(self): self.assertEqual(self.sb._to_python('abc'), 'abc') From e2b584f557e1fe1e0d7a4d594a37509ff2080e63 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 22:33:27 +0100 Subject: [PATCH 23/51] Uses a build matrix to test ES 1.x or ES 2.x - set literals on tests --- test_haystack/elasticsearch2_tests/test_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_haystack/elasticsearch2_tests/test_query.py b/test_haystack/elasticsearch2_tests/test_query.py index 65d3cfef0..c66191c59 100644 --- a/test_haystack/elasticsearch2_tests/test_query.py +++ b/test_haystack/elasticsearch2_tests/test_query.py @@ -105,7 +105,7 @@ def test_build_query_in_filter_datetime(self): def test_build_query_in_with_set(self): self.sq.add_filter(SQ(content='why')) - self.sq.add_filter(SQ(title__in=set(["A Famous Paper", "An Infamous Article"]))) + self.sq.add_filter(SQ(title__in={"A Famous Paper", "An Infamous Article"})) self.assertTrue('((why) AND title:(' in self.sq.build_query()) self.assertTrue('"A Famous Paper"' in self.sq.build_query()) self.assertTrue('"An Infamous Article"' in self.sq.build_query()) From 217f99659a9eed00a8f12e59637285a01ccc1cf8 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 23:49:50 +0100 Subject: [PATCH 24/51] Adds delay on test_multiprocessing test_haystack.solr_tests.test_management_commands.ManagementCommandTestCase#test_multiprocessing --- test_haystack/solr_tests/test_management_commands.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_haystack/solr_tests/test_management_commands.py b/test_haystack/solr_tests/test_management_commands.py index 9cdaf7948..fbf475e4d 100644 --- a/test_haystack/solr_tests/test_management_commands.py +++ b/test_haystack/solr_tests/test_management_commands.py @@ -3,6 +3,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals import datetime +import time from tempfile import mkdtemp import pysolr @@ -157,6 +158,7 @@ def test_multiprocessing(self): # TODO: Watch the output, make sure there are multiple pids. call_command('update_index', verbosity=2, workers=2, batchsize=5) + time.sleep(2) self.assertEqual(self.solr.search('*:*').hits, 23) call_command('clear_index', interactive=False, verbosity=0) From 851c2c8a3a8a8235055514a9084d4f6f52d6d239 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Fri, 25 Mar 2016 00:01:39 +0100 Subject: [PATCH 25/51] Revert: Adds delay on test_multiprocessing test_haystack.solr_tests.test_management_commands.ManagementCommandTestCase#test_multiprocessing --- test_haystack/solr_tests/test_management_commands.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test_haystack/solr_tests/test_management_commands.py b/test_haystack/solr_tests/test_management_commands.py index fbf475e4d..9cdaf7948 100644 --- a/test_haystack/solr_tests/test_management_commands.py +++ b/test_haystack/solr_tests/test_management_commands.py @@ -3,7 +3,6 @@ from __future__ import absolute_import, division, print_function, unicode_literals import datetime -import time from tempfile import mkdtemp import pysolr @@ -158,7 +157,6 @@ def test_multiprocessing(self): # TODO: Watch the output, make sure there are multiple pids. call_command('update_index', verbosity=2, workers=2, batchsize=5) - time.sleep(2) self.assertEqual(self.solr.search('*:*').hits, 23) call_command('clear_index', interactive=False, verbosity=0) From 05ce4e7fb8143205805007d980e0b3da10144614 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Fri, 25 Mar 2016 12:58:02 +0100 Subject: [PATCH 26/51] Removes pool.join on command update_index --- haystack/management/commands/update_index.py | 1 - 1 file changed, 1 deletion(-) diff --git a/haystack/management/commands/update_index.py b/haystack/management/commands/update_index.py index 995a1db78..52c489f37 100755 --- a/haystack/management/commands/update_index.py +++ b/haystack/management/commands/update_index.py @@ -217,7 +217,6 @@ def update_backend(self, label, using): pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() - pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: From 71af281777ffc37d4245cc013948d93ca04502b0 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Fri, 25 Mar 2016 13:04:57 +0100 Subject: [PATCH 27/51] Revert: Test multiprocessing with context manager --- haystack/management/commands/update_index.py | 1 + 1 file changed, 1 insertion(+) diff --git a/haystack/management/commands/update_index.py b/haystack/management/commands/update_index.py index 52c489f37..995a1db78 100755 --- a/haystack/management/commands/update_index.py +++ b/haystack/management/commands/update_index.py @@ -217,6 +217,7 @@ def update_backend(self, label, using): pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() + pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: From 6ba957cc6d49668903cfe2f9438394355dd8575a Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Sun, 27 Mar 2016 16:37:36 +0200 Subject: [PATCH 28/51] Renames an non-existent fixture file --- test_haystack/elasticsearch2_tests/test_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py index b321a4eb4..d8388359d 100644 --- a/test_haystack/elasticsearch2_tests/test_backend.py +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -624,7 +624,7 @@ def test_all_cases(self): class LiveElasticsearch2SearchQueryTestCase(TestCase): - fixtures = ['initial_data.json'] + fixtures = ['base_data.json'] def setUp(self): super(LiveElasticsearch2SearchQueryTestCase, self).setUp() From b3a360cf45189787b2536cc030d03542a781b51f Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 16:18:19 +0100 Subject: [PATCH 29/51] Elasticsearch 2.x support --- haystack/backends/elasticsearch2_backend.py | 1045 ++++++++++++ setup.py | 2 +- .../elasticsearch2_tests/__init__.py | 22 + .../elasticsearch2_tests/test_backend.py | 1500 +++++++++++++++++ .../elasticsearch2_tests/test_inputs.py | 85 + .../elasticsearch2_tests/test_query.py | 209 +++ test_haystack/settings.py | 6 + 7 files changed, 2868 insertions(+), 1 deletion(-) create mode 100644 haystack/backends/elasticsearch2_backend.py create mode 100644 test_haystack/elasticsearch2_tests/__init__.py create mode 100644 test_haystack/elasticsearch2_tests/test_backend.py create mode 100644 test_haystack/elasticsearch2_tests/test_inputs.py create mode 100644 test_haystack/elasticsearch2_tests/test_query.py diff --git a/haystack/backends/elasticsearch2_backend.py b/haystack/backends/elasticsearch2_backend.py new file mode 100644 index 000000000..4c92d4c03 --- /dev/null +++ b/haystack/backends/elasticsearch2_backend.py @@ -0,0 +1,1045 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals + +import datetime +import re +import warnings + +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured +from django.utils import six + +import haystack +from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query +from haystack.constants import DEFAULT_OPERATOR, DJANGO_CT, DJANGO_ID, FUZZY_MAX_EXPANSIONS, FUZZY_MIN_SIM, ID +from haystack.exceptions import MissingDependency, MoreLikeThisError, SkipDocument +from haystack.inputs import Clean, Exact, PythonData, Raw +from haystack.models import SearchResult +from haystack.utils import get_identifier, get_model_ct +from haystack.utils import log as logging +from haystack.utils.app_loading import haystack_get_model + +try: + import elasticsearch + if not ((2, 0, 0) <= elasticsearch.__version__ < (3, 0, 0)): + raise ImportError + from elasticsearch.helpers import bulk, scan + from elasticsearch.exceptions import NotFoundError +except ImportError: + raise MissingDependency("The 'elasticsearch2' backend requires the installation of 'elasticsearch>=2.0.0,<3.0.0'. Please refer to the documentation.") + + +DATETIME_REGEX = re.compile( + r'^(?P\d{4})-(?P\d{2})-(?P\d{2})T' + r'(?P\d{2}):(?P\d{2}):(?P\d{2})(\.\d+)?$') + + +class Elasticsearch2SearchBackend(BaseSearchBackend): + # Word reserved by Elasticsearch for special use. + RESERVED_WORDS = ( + 'AND', + 'NOT', + 'OR', + 'TO', + ) + + # Characters reserved by Elasticsearch for special use. + # The '\\' must come first, so as not to overwrite the other slash replacements. + RESERVED_CHARACTERS = ( + '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', + '[', ']', '^', '"', '~', '*', '?', ':', '/', + ) + + # Settings to add an n-gram & edge n-gram analyzer. + DEFAULT_SETTINGS = { + 'settings': { + "analysis": { + "analyzer": { + "ngram_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["haystack_ngram", "lowercase"] + }, + "edgengram_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["haystack_edgengram", "lowercase"] + } + }, + "tokenizer": { + "haystack_ngram_tokenizer": { + "type": "nGram", + "min_gram": 3, + "max_gram": 15, + }, + "haystack_edgengram_tokenizer": { + "type": "edgeNGram", + "min_gram": 2, + "max_gram": 15, + "side": "front" + } + }, + "filter": { + "haystack_ngram": { + "type": "nGram", + "min_gram": 3, + "max_gram": 15 + }, + "haystack_edgengram": { + "type": "edgeNGram", + "min_gram": 2, + "max_gram": 15 + } + } + } + } + } + + def __init__(self, connection_alias, **connection_options): + super(Elasticsearch2SearchBackend, self).__init__(connection_alias, **connection_options) + + if 'URL' not in connection_options: + raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias) + + if 'INDEX_NAME' not in connection_options: + raise ImproperlyConfigured("You must specify a 'INDEX_NAME' in your settings for connection '%s'." % connection_alias) + + self.conn = elasticsearch.Elasticsearch(connection_options['URL'], timeout=self.timeout, **connection_options.get('KWARGS', {})) + self.index_name = connection_options['INDEX_NAME'] + self.log = logging.getLogger('haystack') + self.setup_complete = False + self.existing_mapping = {} + self.content_field_name = None + + def setup(self): + """ + Defers loading until needed. + """ + # Get the existing mapping & cache it. We'll compare it + # during the ``update`` & if it doesn't match, we'll put the new + # mapping. + try: + self.existing_mapping = self.conn.indices.get_mapping(index=self.index_name) + except NotFoundError: + pass + except Exception: + if not self.silently_fail: + raise + + unified_index = haystack.connections[self.connection_alias].get_unified_index() + self.content_field_name, field_mapping = self.build_schema(unified_index.all_searchfields()) + current_mapping = { + 'modelresult': { + 'properties': field_mapping, + } + } + + if current_mapping != self.existing_mapping: + try: + # Make sure the index is there first. + self.conn.indices.create(index=self.index_name, body=self.DEFAULT_SETTINGS, ignore=400) + self.conn.indices.put_mapping(index=self.index_name, doc_type='modelresult', body=current_mapping) + self.existing_mapping = current_mapping + except Exception: + if not self.silently_fail: + raise + + self.setup_complete = True + + def update(self, index, iterable, commit=True): + """ + Updates the backend when given a SearchIndex and a collection of + documents. + + :param index: The SearchIndex to update. + :param iterable: The collection of documents. + :param commit: True to refresh the search index after the update. + """ + if not self.setup_complete: + try: + self.setup() + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to add documents to Elasticsearch: %s", e, exc_info=True) + return + + prepped_docs = [] + + for obj in iterable: + try: + prepped_data = index.full_prepare(obj) + final_data = {} + + # Convert the data to make sure it's happy. + for key, value in prepped_data.items(): + final_data[key] = self._from_python(value) + final_data['_id'] = final_data[ID] + + prepped_docs.append(final_data) + except SkipDocument: + self.log.debug(u"Indexing for object `%s` skipped", obj) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + # We'll log the object identifier but won't include the actual object + # to avoid the possibility of that generating encoding errors while + # processing the log message: + self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, + extra={"data": {"index": index, + "object": get_identifier(obj)}}) + + bulk(self.conn, prepped_docs, index=self.index_name, doc_type='modelresult') + + if commit: + self.conn.indices.refresh(index=self.index_name) + + def remove(self, obj_or_string, commit=True): + """ + Removes a document/object from the backend. Can be either a model + instance or the identifier (i.e. ``app_name.model_name.id``) in the + event the object no longer exists. + + :param obj_or_string: The model instance or the identifier. + :param commit: True to refresh the search index after the remove. + """ + doc_id = get_identifier(obj_or_string) + + if not self.setup_complete: + try: + self.setup() + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, + exc_info=True) + return + + try: + self.conn.delete(index=self.index_name, doc_type='modelresult', id=doc_id, ignore=404) + + if commit: + self.conn.indices.refresh(index=self.index_name) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, exc_info=True) + + def clear(self, models=None, commit=True): + """ + Clears the backend of all documents/objects for a collection of models. + + :param models: List or tuple of models to clear. + :param commit: Not used. + """ + if models is not None: + assert isinstance(models, (list, tuple)) + + try: + if models is None: + self.conn.indices.delete(index=self.index_name, ignore=404) + self.setup_complete = False + self.existing_mapping = {} + self.content_field_name = None + else: + models_to_delete = [] + + for model in models: + models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model))) + + # Delete using scroll API + query = {'query': {'query_string': {'query': " OR ".join(models_to_delete)}}} + generator = scan(self.conn, query=query, index=self.index_name, doc_type='modelresult') + actions = ({ + '_op_type': 'delete', + '_id': doc['_id'], + } for doc in generator) + bulk(self.conn, actions=actions, index=self.index_name, doc_type='modelresult') + self.conn.indices.refresh(index=self.index_name) + + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + if models is not None: + self.log.error("Failed to clear Elasticsearch index of models '%s': %s", + ','.join(models_to_delete), e, exc_info=True) + else: + self.log.error("Failed to clear Elasticsearch index: %s", e, exc_info=True) + + def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, + fields='', highlight=False, facets=None, + date_facets=None, query_facets=None, + narrow_queries=None, spelling_query=None, + within=None, dwithin=None, distance_point=None, + models=None, limit_to_registered_models=None, + result_class=None): + index = haystack.connections[self.connection_alias].get_unified_index() + content_field = index.document_field + + if query_string == '*:*': + kwargs = { + 'query': { + "match_all": {} + }, + } + else: + kwargs = { + 'query': { + 'query_string': { + 'default_field': content_field, + 'default_operator': DEFAULT_OPERATOR, + 'query': query_string, + 'analyze_wildcard': True, + 'auto_generate_phrase_queries': True, + 'fuzzy_min_sim': FUZZY_MIN_SIM, + 'fuzzy_max_expansions': FUZZY_MAX_EXPANSIONS, + }, + }, + } + + # so far, no filters + filters = [] + + if fields: + if isinstance(fields, (list, set)): + fields = " ".join(fields) + + kwargs['fields'] = fields + + if sort_by is not None: + order_list = [] + for field, direction in sort_by: + if field == 'distance' and distance_point: + # Do the geo-enabled sort. + lng, lat = distance_point['point'].get_coords() + sort_kwargs = { + "_geo_distance": { + distance_point['field']: [lng, lat], + "order": direction, + "unit": "km" + } + } + else: + if field == 'distance': + warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") + + # Regular sorting. + sort_kwargs = {field: {'order': direction}} + + order_list.append(sort_kwargs) + + kwargs['sort'] = order_list + + if start_offset is not None: + kwargs['from'] = start_offset + + if end_offset is not None: + kwargs['size'] = end_offset - start_offset + + if highlight is True: + kwargs['highlight'] = { + 'fields': { + content_field: {'store': 'yes'}, + } + } + + if self.include_spelling: + kwargs['suggest'] = { + 'suggest': { + 'text': spelling_query or query_string, + 'term': { + # Using content_field here will result in suggestions of stemmed words. + 'field': '_all', + }, + }, + } + + if narrow_queries is None: + narrow_queries = set() + + if facets is not None: + kwargs.setdefault('aggs', {}) + + for facet_fieldname, extra_options in facets.items(): + facet_options = { + 'meta': { + '_type': 'terms', + }, + 'terms': { + 'field': facet_fieldname, + } + } + if 'order' in extra_options: + facet_options['meta']['order'] = extra_options.pop('order') + # Special cases for options applied at the facet level (not the terms level). + if extra_options.pop('global_scope', False): + # Renamed "global_scope" since "global" is a python keyword. + facet_options['global'] = True + if 'facet_filter' in extra_options: + facet_options['facet_filter'] = extra_options.pop('facet_filter') + facet_options['terms'].update(extra_options) + kwargs['aggs'][facet_fieldname] = facet_options + + if date_facets is not None: + kwargs.setdefault('aggs', {}) + + for facet_fieldname, value in date_facets.items(): + # Need to detect on gap_by & only add amount if it's more than one. + interval = value.get('gap_by').lower() + + # Need to detect on amount (can't be applied on months or years). + if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'): + # Just the first character is valid for use. + interval = "%s%s" % (value['gap_amount'], interval[:1]) + + kwargs['aggs'][facet_fieldname] = { + 'meta': { + '_type': 'date_histogram', + }, + 'date_histogram': { + 'field': facet_fieldname, + 'interval': interval, + }, + 'aggs': { + facet_fieldname: { + 'date_range': { + 'field': facet_fieldname, + 'ranges': [ + { + 'from': self._from_python(value.get('start_date')), + 'to': self._from_python(value.get('end_date')), + } + ] + } + } + } + } + + if query_facets is not None: + kwargs.setdefault('aggs', {}) + + for facet_fieldname, value in query_facets: + kwargs['aggs'][facet_fieldname] = { + 'meta': { + '_type': 'query', + }, + 'filter': { + 'query_string': { + 'query': value, + } + }, + } + + if limit_to_registered_models is None: + limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + + if models and len(models): + model_choices = sorted(get_model_ct(model) for model in models) + elif limit_to_registered_models: + # Using narrow queries, limit the results to only models handled + # with the current routers. + model_choices = self.build_models_list() + else: + model_choices = [] + + if len(model_choices) > 0: + filters.append({"terms": {DJANGO_CT: model_choices}}) + + for q in narrow_queries: + filters.append({ + 'query_string': { + 'query': q + } + }) + + if within is not None: + from haystack.utils.geo import generate_bounding_box + + ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2']) + within_filter = { + "geo_bounding_box": { + within['field']: { + "top_left": { + "lat": north, + "lon": west + }, + "bottom_right": { + "lat": south, + "lon": east + } + } + }, + } + filters.append(within_filter) + + if dwithin is not None: + lng, lat = dwithin['point'].get_coords() + + # NB: the 1.0.0 release of elasticsearch introduce an + # incompatible change on the distance filter formating + if elasticsearch.VERSION >= (1, 0, 0): + distance = "%(dist).6f%(unit)s" % { + 'dist': dwithin['distance'].km, + 'unit': "km" + } + else: + distance = dwithin['distance'].km + + dwithin_filter = { + "geo_distance": { + "distance": distance, + dwithin['field']: { + "lat": lat, + "lon": lng + } + } + } + filters.append(dwithin_filter) + + # if we want to filter, change the query type to filteres + if filters: + kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}} + if len(filters) == 1: + kwargs['query']['filtered']["filter"] = filters[0] + else: + kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}} + + return kwargs + + @log_query + def search(self, query_string, **kwargs): + if len(query_string) == 0: + return { + 'results': [], + 'hits': 0, + } + + if not self.setup_complete: + self.setup() + + search_kwargs = self.build_search_kwargs(query_string, **kwargs) + search_kwargs['from'] = kwargs.get('start_offset', 0) + + order_fields = set() + for order in search_kwargs.get('sort', []): + for key in order.keys(): + order_fields.add(key) + + geo_sort = '_geo_distance' in order_fields + + end_offset = kwargs.get('end_offset') + start_offset = kwargs.get('start_offset', 0) + if end_offset is not None and end_offset > start_offset: + search_kwargs['size'] = end_offset - start_offset + + try: + raw_results = self.conn.search(body=search_kwargs, + index=self.index_name, + doc_type='modelresult', + _source=True) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e, exc_info=True) + raw_results = {} + + return self._process_results(raw_results, + highlight=kwargs.get('highlight'), + result_class=kwargs.get('result_class', SearchResult), + distance_point=kwargs.get('distance_point'), + geo_sort=geo_sort) + + def more_like_this(self, model_instance, additional_query_string=None, + start_offset=0, end_offset=None, models=None, + limit_to_registered_models=None, result_class=None, **kwargs): + from haystack import connections + + if not self.setup_complete: + self.setup() + + # Deferred models will have a different class ("RealClass_Deferred_fieldname") + # which won't be in our registry: + model_klass = model_instance._meta.concrete_model + + index = connections[self.connection_alias].get_unified_index().get_index(model_klass) + field_name = index.get_content_field() + params = {} + + if start_offset is not None: + params['from_'] = start_offset + + if end_offset is not None: + params['size'] = end_offset - start_offset + + doc_id = get_identifier(model_instance) + + try: + # More like this Query + # https://www.elastic.co/guide/en/elasticsearch/reference/2.2/query-dsl-mlt-query.html + mlt_query = { + 'query': { + 'more_like_this': { + 'fields': [field_name], + 'like': [{ + "_id": doc_id + }] + } + } + } + + narrow_queries = [] + + if additional_query_string and additional_query_string != '*:*': + additional_filter = { + "query": { + "query_string": { + "query": additional_query_string + } + } + } + narrow_queries.append(additional_filter) + + if limit_to_registered_models is None: + limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + + if models and len(models): + model_choices = sorted(get_model_ct(model) for model in models) + elif limit_to_registered_models: + # Using narrow queries, limit the results to only models handled + # with the current routers. + model_choices = self.build_models_list() + else: + model_choices = [] + + if len(model_choices) > 0: + model_filter = {"terms": {DJANGO_CT: model_choices}} + narrow_queries.append(model_filter) + + if len(narrow_queries) > 0: + mlt_query = { + "query": { + "filtered": { + 'query': mlt_query['query'], + 'filter': { + 'bool': { + 'must': list(narrow_queries) + } + } + } + } + } + + raw_results = self.conn.search( + body=mlt_query, + index=self.index_name, + doc_type='modelresult', + _source=True, **params) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to fetch More Like This from Elasticsearch for document '%s': %s", + doc_id, e, exc_info=True) + raw_results = {} + + return self._process_results(raw_results, result_class=result_class) + + def _process_results(self, raw_results, highlight=False, + result_class=None, distance_point=None, + geo_sort=False): + from haystack import connections + results = [] + hits = raw_results.get('hits', {}).get('total', 0) + facets = {} + spelling_suggestion = None + + if result_class is None: + result_class = SearchResult + + if self.include_spelling and 'suggest' in raw_results: + raw_suggest = raw_results['suggest'].get('suggest') + if raw_suggest: + spelling_suggestion = ' '.join([word['text'] if len(word['options']) == 0 else word['options'][0]['text'] for word in raw_suggest]) + + if 'aggregations' in raw_results: + facets = { + 'fields': {}, + 'dates': {}, + 'queries': {}, + } + + for facet_fieldname, facet_info in raw_results['aggregations'].items(): + facet_type = facet_info['meta']['_type'] + if facet_type == 'terms': + facets['fields'][facet_fieldname] = [(individual['key'], individual['doc_count']) for individual in facet_info['buckets']] + if 'order' in facet_info['meta']: + if facet_info['meta']['order'] == 'reverse_count': + srt = sorted(facets['fields'][facet_fieldname], key=lambda x: x[1]) + facets['fields'][facet_fieldname] = srt + elif facet_type == 'date_histogram': + # Elasticsearch provides UTC timestamps with an extra three + # decimals of precision, which datetime barfs on. + facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['key'] / 1000), individual['doc_count']) for individual in facet_info['buckets']] + elif facet_type == 'query': + facets['queries'][facet_fieldname] = facet_info['doc_count'] + + unified_index = connections[self.connection_alias].get_unified_index() + indexed_models = unified_index.get_indexed_models() + content_field = unified_index.document_field + + for raw_result in raw_results.get('hits', {}).get('hits', []): + source = raw_result['_source'] + app_label, model_name = source[DJANGO_CT].split('.') + additional_fields = {} + model = haystack_get_model(app_label, model_name) + + if model and model in indexed_models: + for key, value in source.items(): + index = unified_index.get_index(model) + string_key = str(key) + + if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): + additional_fields[string_key] = index.fields[string_key].convert(value) + else: + additional_fields[string_key] = self._to_python(value) + + del(additional_fields[DJANGO_CT]) + del(additional_fields[DJANGO_ID]) + + if 'highlight' in raw_result: + additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '') + + if distance_point: + additional_fields['_point_of_origin'] = distance_point + + if geo_sort and raw_result.get('sort'): + from haystack.utils.geo import Distance + additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0])) + else: + additional_fields['_distance'] = None + + result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields) + results.append(result) + else: + hits -= 1 + + return { + 'results': results, + 'hits': hits, + 'facets': facets, + 'spelling_suggestion': spelling_suggestion, + } + + def build_schema(self, fields): + content_field_name = '' + mapping = { + DJANGO_CT: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, + DJANGO_ID: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, + } + + for field_name, field_class in fields.items(): + field_mapping = FIELD_MAPPINGS.get(field_class.field_type, DEFAULT_FIELD_MAPPING).copy() + if field_class.boost != 1.0: + field_mapping['boost'] = field_class.boost + + if field_class.document is True: + content_field_name = field_class.index_fieldname + + # Do this last to override `text` fields. + if field_mapping['type'] == 'string': + if field_class.indexed is False or hasattr(field_class, 'facet_for'): + field_mapping['index'] = 'not_analyzed' + del field_mapping['analyzer'] + + mapping[field_class.index_fieldname] = field_mapping + + return content_field_name, mapping + + def _iso_datetime(self, value): + """ + If value appears to be something datetime-like, return it in ISO format. + + Otherwise, return None. + """ + if hasattr(value, 'strftime'): + if hasattr(value, 'hour'): + return value.isoformat() + else: + return '%sT00:00:00' % value.isoformat() + + def _from_python(self, value): + """Convert more Python data types to ES-understandable JSON.""" + iso = self._iso_datetime(value) + if iso: + return iso + elif isinstance(value, six.binary_type): + # TODO: Be stricter. + return six.text_type(value, errors='replace') + elif isinstance(value, set): + return list(value) + return value + + def _to_python(self, value): + """Convert values from ElasticSearch to native Python values.""" + if isinstance(value, (int, float, complex, list, tuple, bool)): + return value + + if isinstance(value, six.string_types): + possible_datetime = DATETIME_REGEX.search(value) + + if possible_datetime: + date_values = possible_datetime.groupdict() + + for dk, dv in date_values.items(): + date_values[dk] = int(dv) + + return datetime.datetime( + date_values['year'], date_values['month'], + date_values['day'], date_values['hour'], + date_values['minute'], date_values['second']) + + try: + # This is slightly gross but it's hard to tell otherwise what the + # string's original type might have been. Be careful who you trust. + converted_value = eval(value) + + # Try to handle most built-in types. + if isinstance( + converted_value, + (int, list, tuple, set, dict, float, complex)): + return converted_value + except Exception: + # If it fails (SyntaxError or its ilk) or we don't trust it, + # continue on. + pass + + return value + +# DRL_FIXME: Perhaps move to something where, if none of these +# match, call a custom method on the form that returns, per-backend, +# the right type of storage? +DEFAULT_FIELD_MAPPING = {'type': 'string', 'analyzer': 'snowball'} +FIELD_MAPPINGS = { + 'edge_ngram': {'type': 'string', 'analyzer': 'edgengram_analyzer'}, + 'ngram': {'type': 'string', 'analyzer': 'ngram_analyzer'}, + 'date': {'type': 'date'}, + 'datetime': {'type': 'date'}, + + 'location': {'type': 'geo_point'}, + 'boolean': {'type': 'boolean'}, + 'float': {'type': 'float'}, + 'long': {'type': 'long'}, + 'integer': {'type': 'long'}, +} + + +# Sucks that this is almost an exact copy of what's in the Solr backend, +# but we can't import due to dependencies. +class Elasticsearch2SearchQuery(BaseSearchQuery): + def matching_all_fragment(self): + return '*:*' + + def build_query_fragment(self, field, filter_type, value): + from haystack import connections + query_frag = '' + + if not hasattr(value, 'input_type_name'): + # Handle when we've got a ``ValuesListQuerySet``... + if hasattr(value, 'values_list'): + value = list(value) + + if isinstance(value, six.string_types): + # It's not an ``InputType``. Assume ``Clean``. + value = Clean(value) + else: + value = PythonData(value) + + # Prepare the query using the InputType. + prepared_value = value.prepare(self) + + if not isinstance(prepared_value, (set, list, tuple)): + # Then convert whatever we get back to what pysolr wants if needed. + prepared_value = self.backend._from_python(prepared_value) + + # 'content' is a special reserved word, much like 'pk' in + # Django's ORM layer. It indicates 'no special field'. + if field == 'content': + index_fieldname = '' + else: + index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field) + + filter_types = { + 'contains': u'%s', + 'startswith': u'%s*', + 'exact': u'%s', + 'gt': u'{%s TO *}', + 'gte': u'[%s TO *]', + 'lt': u'{* TO %s}', + 'lte': u'[* TO %s]', + 'fuzzy': u'%s~', + } + + if value.post_process is False: + query_frag = prepared_value + else: + if filter_type in ['contains', 'startswith', 'fuzzy']: + if value.input_type_name == 'exact': + query_frag = prepared_value + else: + # Iterate over terms & incorportate the converted form of each into the query. + terms = [] + + if isinstance(prepared_value, six.string_types): + for possible_value in prepared_value.split(' '): + terms.append(filter_types[filter_type] % self.backend._from_python(possible_value)) + else: + terms.append(filter_types[filter_type] % self.backend._from_python(prepared_value)) + + if len(terms) == 1: + query_frag = terms[0] + else: + query_frag = u"(%s)" % " AND ".join(terms) + elif filter_type == 'in': + in_options = [] + + for possible_value in prepared_value: + in_options.append(u'"%s"' % self.backend._from_python(possible_value)) + + query_frag = u"(%s)" % " OR ".join(in_options) + elif filter_type == 'range': + start = self.backend._from_python(prepared_value[0]) + end = self.backend._from_python(prepared_value[1]) + query_frag = u'["%s" TO "%s"]' % (start, end) + elif filter_type == 'exact': + if value.input_type_name == 'exact': + query_frag = prepared_value + else: + prepared_value = Exact(prepared_value).prepare(self) + query_frag = filter_types[filter_type] % prepared_value + else: + if value.input_type_name != 'exact': + prepared_value = Exact(prepared_value).prepare(self) + + query_frag = filter_types[filter_type] % prepared_value + + if len(query_frag) and not isinstance(value, Raw): + if not query_frag.startswith('(') and not query_frag.endswith(')'): + query_frag = "(%s)" % query_frag + + return u"%s%s" % (index_fieldname, query_frag) + + def build_alt_parser_query(self, parser_name, query_string='', **kwargs): + if query_string: + kwargs['v'] = query_string + + kwarg_bits = [] + + for key in sorted(kwargs.keys()): + if isinstance(kwargs[key], six.string_types) and ' ' in kwargs[key]: + kwarg_bits.append(u"%s='%s'" % (key, kwargs[key])) + else: + kwarg_bits.append(u"%s=%s" % (key, kwargs[key])) + + return u"{!%s %s}" % (parser_name, ' '.join(kwarg_bits)) + + def build_params(self, spelling_query=None, **kwargs): + search_kwargs = { + 'start_offset': self.start_offset, + 'result_class': self.result_class + } + order_by_list = None + + if self.order_by: + if order_by_list is None: + order_by_list = [] + + for field in self.order_by: + direction = 'asc' + if field.startswith('-'): + direction = 'desc' + field = field[1:] + order_by_list.append((field, direction)) + + search_kwargs['sort_by'] = order_by_list + + if self.date_facets: + search_kwargs['date_facets'] = self.date_facets + + if self.distance_point: + search_kwargs['distance_point'] = self.distance_point + + if self.dwithin: + search_kwargs['dwithin'] = self.dwithin + + if self.end_offset is not None: + search_kwargs['end_offset'] = self.end_offset + + if self.facets: + search_kwargs['facets'] = self.facets + + if self.fields: + search_kwargs['fields'] = self.fields + + if self.highlight: + search_kwargs['highlight'] = self.highlight + + if self.models: + search_kwargs['models'] = self.models + + if self.narrow_queries: + search_kwargs['narrow_queries'] = self.narrow_queries + + if self.query_facets: + search_kwargs['query_facets'] = self.query_facets + + if self.within: + search_kwargs['within'] = self.within + + if spelling_query: + search_kwargs['spelling_query'] = spelling_query + + return search_kwargs + + def run(self, spelling_query=None, **kwargs): + """Builds and executes the query. Returns a list of search results.""" + final_query = self.build_query() + search_kwargs = self.build_params(spelling_query, **kwargs) + + if kwargs: + search_kwargs.update(kwargs) + + results = self.backend.search(final_query, **search_kwargs) + self._results = results.get('results', []) + self._hit_count = results.get('hits', 0) + self._facet_counts = self.post_process_facets(results) + self._spelling_suggestion = results.get('spelling_suggestion', None) + + def run_mlt(self, **kwargs): + """Builds and executes the query. Returns a list of search results.""" + if self._more_like_this is False or self._mlt_instance is None: + raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.") + + additional_query_string = self.build_query() + search_kwargs = { + 'start_offset': self.start_offset, + 'result_class': self.result_class, + 'models': self.models + } + + if self.end_offset is not None: + search_kwargs['end_offset'] = self.end_offset - self.start_offset + + results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs) + self._results = results.get('results', []) + self._hit_count = results.get('hits', 0) + + +class Elasticsearch2SearchEngine(BaseEngine): + backend = Elasticsearch2SearchBackend + query = Elasticsearch2SearchQuery diff --git a/setup.py b/setup.py index a6cb4ac52..6345d858d 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ ] tests_require = [ - 'elasticsearch>=1.0.0,<2.0.0', + 'elasticsearch>=2.0.0,<3.0.0', 'pysolr>=3.3.2', 'whoosh>=2.5.4,<3.0', 'python-dateutil', diff --git a/test_haystack/elasticsearch2_tests/__init__.py b/test_haystack/elasticsearch2_tests/__init__.py new file mode 100644 index 000000000..8433081cd --- /dev/null +++ b/test_haystack/elasticsearch2_tests/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +import warnings + +from django.conf import settings + +from ..utils import unittest + +warnings.simplefilter('ignore', Warning) + + +def setup(): + try: + from elasticsearch import Elasticsearch, ElasticsearchException + except ImportError: + raise unittest.SkipTest("elasticsearch-py not installed.") + + url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] + es = Elasticsearch(url) + try: + es.info() + except ElasticsearchException as e: + raise unittest.SkipTest("elasticsearch not running on %r" % url, e) diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py new file mode 100644 index 000000000..0ba061eda --- /dev/null +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -0,0 +1,1500 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals + +import datetime +import logging as std_logging +import operator +from decimal import Decimal + +import elasticsearch +from django.conf import settings +from django.test import TestCase +from django.test.utils import override_settings + +from haystack import connections, indexes, reset_search_queries +from haystack.exceptions import SkipDocument +from haystack.inputs import AutoQuery +from haystack.models import SearchResult +from haystack.query import RelatedSearchQuerySet, SearchQuerySet, SQ +from haystack.utils import log as logging +from haystack.utils.geo import Point +from haystack.utils.loading import UnifiedIndex +from ..core.models import AFourthMockModel, AnotherMockModel, ASixthMockModel, MockModel +from ..mocks import MockSearchResult +from ..utils import unittest + +test_pickling = True + +try: + import cPickle as pickle +except ImportError: + try: + import pickle + except ImportError: + test_pickling = False + + +def clear_elasticsearch_index(): + # Wipe it clean. + raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + try: + raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + raw_es.indices.refresh() + except elasticsearch.TransportError: + pass + + # Since we've just completely deleted the index, we'll reset setup_complete so the next access will + # correctly define the mappings: + connections['elasticsearch2'].get_backend().setup_complete = False + + +class Elasticsearch2MockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + name = indexes.CharField(model_attr='author', faceted=True) + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return MockModel + + +class Elasticsearch2MockSearchIndexWithSkipDocument(Elasticsearch2MockSearchIndex): + def prepare_text(self, obj): + if obj.author == 'daniel3': + raise SkipDocument + return u"Indexed!\n%s" % obj.id + + +class Elasticsearch2MockSpellingIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True) + name = indexes.CharField(model_attr='author', faceted=True) + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return MockModel + + def prepare_text(self, obj): + return obj.foo + + +class Elasticsearch2MaintainTypeMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + month = indexes.CharField(indexed=False) + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def prepare_month(self, obj): + return "%02d" % obj.pub_date.month + + def get_model(self): + return MockModel + + +class Elasticsearch2MockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(model_attr='foo', document=True) + name = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return MockModel + + +class Elasticsearch2AnotherMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True) + name = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return AnotherMockModel + + def prepare_text(self, obj): + return u"You might be searching for the user %s" % obj.author + + +class Elasticsearch2BoostMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField( + document=True, use_template=True, + template_name='search/indexes/core/mockmodel_template.txt' + ) + author = indexes.CharField(model_attr='author', weight=2.0) + editor = indexes.CharField(model_attr='editor') + pub_date = indexes.DateTimeField(model_attr='pub_date') + + def get_model(self): + return AFourthMockModel + + def prepare(self, obj): + data = super(Elasticsearch2BoostMockSearchIndex, self).prepare(obj) + + if obj.pk == 4: + data['boost'] = 5.0 + + return data + + +class Elasticsearch2FacetingMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True) + author = indexes.CharField(model_attr='author', faceted=True) + editor = indexes.CharField(model_attr='editor', faceted=True) + pub_date = indexes.DateField(model_attr='pub_date', faceted=True) + facet_field = indexes.FacetCharField(model_attr='author') + + def prepare_text(self, obj): + return '%s %s' % (obj.author, obj.editor) + + def get_model(self): + return AFourthMockModel + + +class Elasticsearch2RoundTripSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, default='') + name = indexes.CharField() + is_active = indexes.BooleanField() + post_count = indexes.IntegerField() + average_rating = indexes.FloatField() + price = indexes.DecimalField() + pub_date = indexes.DateField() + created = indexes.DateTimeField() + tags = indexes.MultiValueField() + sites = indexes.MultiValueField() + + def get_model(self): + return MockModel + + def prepare(self, obj): + prepped = super(Elasticsearch2RoundTripSearchIndex, self).prepare(obj) + prepped.update({ + 'text': 'This is some example text.', + 'name': 'Mister Pants', + 'is_active': True, + 'post_count': 25, + 'average_rating': 3.6, + 'price': Decimal('24.99'), + 'pub_date': datetime.date(2009, 11, 21), + 'created': datetime.datetime(2009, 11, 21, 21, 31, 00), + 'tags': ['staff', 'outdoor', 'activist', 'scientist'], + 'sites': [3, 5, 1], + }) + return prepped + + +class Elasticsearch2ComplexFacetsMockSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, default='') + name = indexes.CharField(faceted=True) + is_active = indexes.BooleanField(faceted=True) + post_count = indexes.IntegerField() + post_count_i = indexes.FacetIntegerField(facet_for='post_count') + average_rating = indexes.FloatField(faceted=True) + pub_date = indexes.DateField(faceted=True) + created = indexes.DateTimeField(faceted=True) + sites = indexes.MultiValueField(faceted=True) + + def get_model(self): + return MockModel + + +class Elasticsearch2AutocompleteMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(model_attr='foo', document=True) + name = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pub_date') + text_auto = indexes.EdgeNgramField(model_attr='foo') + name_auto = indexes.EdgeNgramField(model_attr='author') + + def get_model(self): + return MockModel + + +class Elasticsearch2SpatialSearchIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(model_attr='name', document=True) + location = indexes.LocationField() + + def prepare_location(self, obj): + return "%s,%s" % (obj.lat, obj.lon) + + def get_model(self): + return ASixthMockModel + + +class TestSettings(TestCase): + def test_kwargs_are_passed_on(self): + from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend + backend = ElasticsearchSearchBackend('alias', **{ + 'URL': settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'], + 'INDEX_NAME': 'testing', + 'KWARGS': {'max_retries': 42} + }) + + self.assertEqual(backend.conn.transport.max_retries, 42) + + +class Elasticsearch2SearchBackendTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchBackendTestCase, self).setUp() + + # Wipe it clean. + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + self.smmidni = Elasticsearch2MockSearchIndexWithSkipDocument() + self.smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + + # Force the backend to rebuild the mapping each time. + self.sb.existing_mapping = {} + self.sb.setup() + + self.sample_objs = [] + + for i in range(1, 4): + mock = MockModel() + mock.id = i + mock.author = 'daniel%s' % i + mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) + self.sample_objs.append(mock) + + def tearDown(self): + connections['elasticsearch2']._index = self.old_ui + super(Elasticsearch2SearchBackendTestCase, self).tearDown() + self.sb.silently_fail = True + + def raw_search(self, query): + try: + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + except elasticsearch.TransportError: + return {} + + def test_non_silent(self): + bad_sb = connections['elasticsearch2'].backend('bad', URL='http://omg.wtf.bbq:1000/', INDEX_NAME='whatver', + SILENTLY_FAIL=False, TIMEOUT=1) + + try: + bad_sb.update(self.smmi, self.sample_objs) + self.fail() + except: + pass + + try: + bad_sb.remove('core.mockmodel.1') + self.fail() + except: + pass + + try: + bad_sb.clear() + self.fail() + except: + pass + + try: + bad_sb.search('foo') + self.fail() + except: + pass + + def test_update_no_documents(self): + url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] + index_name = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME'] + + sb = connections['elasticsearch2'].backend('elasticsearch2', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=True) + self.assertEqual(sb.update(self.smmi, []), None) + + sb = connections['elasticsearch2'].backend('elasticsearch2', URL=url, INDEX_NAME=index_name, + SILENTLY_FAIL=False) + try: + sb.update(self.smmi, []) + self.fail() + except: + pass + + def test_update(self): + self.sb.update(self.smmi, self.sample_objs) + + # Check what Elasticsearch thinks is there. + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + self.assertEqual( + sorted([res['_source'] for res in self.raw_search('*:*')['hits']['hits']], key=lambda x: x['id']), [ + { + 'django_id': '1', + 'django_ct': 'core.mockmodel', + 'name': 'daniel1', + 'name_exact': 'daniel1', + 'text': 'Indexed!\n1', + 'pub_date': '2009-02-24T00:00:00', + 'id': 'core.mockmodel.1' + }, + { + 'django_id': '2', + 'django_ct': 'core.mockmodel', + 'name': 'daniel2', + 'name_exact': 'daniel2', + 'text': 'Indexed!\n2', + 'pub_date': '2009-02-23T00:00:00', + 'id': 'core.mockmodel.2' + }, + { + 'django_id': '3', + 'django_ct': 'core.mockmodel', + 'name': 'daniel3', + 'name_exact': 'daniel3', + 'text': 'Indexed!\n3', + 'pub_date': '2009-02-22T00:00:00', + 'id': 'core.mockmodel.3' + } + ]) + + def test_update_with_SkipDocument_raised(self): + self.sb.update(self.smmidni, self.sample_objs) + + # Check what Elasticsearch thinks is there. + res = self.raw_search('*:*')['hits'] + self.assertEqual(res['total'], 2) + self.assertListEqual( + sorted([x['_source']['id'] for x in res['hits']]), + ['core.mockmodel.1', 'core.mockmodel.2'] + ) + + def test_remove(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + + self.sb.remove(self.sample_objs[0]) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 2) + self.assertEqual(sorted([res['_source'] for res in self.raw_search('*:*')['hits']['hits']], + key=operator.itemgetter('django_id')), [ + { + 'django_id': '2', + 'django_ct': 'core.mockmodel', + 'name': 'daniel2', + 'name_exact': 'daniel2', + 'text': 'Indexed!\n2', + 'pub_date': '2009-02-23T00:00:00', + 'id': 'core.mockmodel.2' + }, + { + 'django_id': '3', + 'django_ct': 'core.mockmodel', + 'name': 'daniel3', + 'name_exact': 'daniel3', + 'text': 'Indexed!\n3', + 'pub_date': '2009-02-22T00:00:00', + 'id': 'core.mockmodel.3' + } + ]) + + def test_remove_succeeds_on_404(self): + self.sb.silently_fail = False + self.sb.remove('core.mockmodel.421') + + def test_clear(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear() + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) + + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear([AnotherMockModel]) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear([MockModel]) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) + + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) + + self.sb.clear([AnotherMockModel, MockModel]) + self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) + + def test_search(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + + self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('*:*')['hits'], 3) + self.assertEqual(set([result.pk for result in self.sb.search('*:*')['results']]), set([u'2', u'1', u'3'])) + + self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('Index', highlight=True)['hits'], 3) + self.assertEqual( + sorted([result.highlighted[0] for result in self.sb.search('Index', highlight=True)['results']]), + [u'Indexed!\n1', u'Indexed!\n2', u'Indexed!\n3']) + + self.assertEqual(self.sb.search('Indx')['hits'], 0) + self.assertEqual(self.sb.search('indaxed')['spelling_suggestion'], 'indexed') + self.assertEqual(self.sb.search('arf', spelling_query='indexyd')['spelling_suggestion'], 'indexed') + + self.assertEqual(self.sb.search('', facets={'name': {}}), {'hits': 0, 'results': []}) + results = self.sb.search('Index', facets={'name': {}}) + self.assertEqual(results['hits'], 3) + self.assertSetEqual( + set(results['facets']['fields']['name']), + set([('daniel3', 1), ('daniel2', 1), ('daniel1', 1)]) + ) + + self.assertEqual(self.sb.search('', date_facets={ + 'pub_date': {'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 4, 1), + 'gap_by': 'month', 'gap_amount': 1}}), {'hits': 0, 'results': []}) + results = self.sb.search('Index', date_facets={ + 'pub_date': {'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 4, 1), + 'gap_by': 'month', 'gap_amount': 1}}) + self.assertEqual(results['hits'], 3) + self.assertEqual(results['facets']['dates']['pub_date'], [(datetime.datetime(2009, 2, 1, 0, 0), 3)]) + + self.assertEqual(self.sb.search('', query_facets=[('name', '[* TO e]')]), {'hits': 0, 'results': []}) + results = self.sb.search('Index', query_facets=[('name', '[* TO e]')]) + self.assertEqual(results['hits'], 3) + self.assertEqual(results['facets']['queries'], {u'name': 3}) + + self.assertEqual(self.sb.search('', narrow_queries=set(['name:daniel1'])), {'hits': 0, 'results': []}) + results = self.sb.search('Index', narrow_queries=set(['name:daniel1'])) + self.assertEqual(results['hits'], 1) + + # Ensure that swapping the ``result_class`` works. + self.assertTrue( + isinstance(self.sb.search(u'index', result_class=MockSearchResult)['results'][0], MockSearchResult)) + + # Check the use of ``limit_to_registered_models``. + self.assertEqual(self.sb.search('', limit_to_registered_models=False), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('*:*', limit_to_registered_models=False)['hits'], 3) + self.assertEqual( + sorted([result.pk for result in self.sb.search('*:*', limit_to_registered_models=False)['results']]), + ['1', '2', '3']) + + # Stow. + old_limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = False + + self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('*:*')['hits'], 3) + self.assertEqual(sorted([result.pk for result in self.sb.search('*:*')['results']]), ['1', '2', '3']) + + # Restore. + settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = old_limit_to_registered_models + + def test_spatial_search_parameters(self): + p1 = Point(1.23, 4.56) + kwargs = self.sb.build_search_kwargs('*:*', distance_point={'field': 'location', 'point': p1}, + sort_by=(('distance', 'desc'),)) + + self.assertIn('sort', kwargs) + self.assertEqual(1, len(kwargs['sort'])) + geo_d = kwargs['sort'][0]['_geo_distance'] + + # ElasticSearch supports the GeoJSON-style lng, lat pairs so unlike Solr the values should be + # in the same order as we used to create the Point(): + # http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-geo-distance-filter.html#_lat_lon_as_array_4 + + self.assertDictEqual(geo_d, {'location': [1.23, 4.56], 'unit': 'km', 'order': 'desc'}) + + def test_more_like_this(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) + + # A functional MLT example with enough data to work is below. Rely on + # this to ensure the API is correct enough. + self.assertEqual(self.sb.more_like_this(self.sample_objs[0])['hits'], 0) + self.assertEqual([result.pk for result in self.sb.more_like_this(self.sample_objs[0])['results']], []) + + def test_build_schema(self): + old_ui = connections['elasticsearch2'].get_unified_index() + + (content_field_name, mapping) = self.sb.build_schema(old_ui.all_searchfields()) + self.assertEqual(content_field_name, 'text') + self.assertEqual(len(mapping), 4 + 2) # +2 management fields + self.assertEqual(mapping, { + 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'text': {'type': 'string', 'analyzer': 'snowball'}, + 'pub_date': {'type': 'date'}, + 'name': {'type': 'string', 'analyzer': 'snowball'}, + 'name_exact': {'index': 'not_analyzed', 'type': 'string'} + }) + + ui = UnifiedIndex() + ui.build(indexes=[Elasticsearch2ComplexFacetsMockSearchIndex()]) + (content_field_name, mapping) = self.sb.build_schema(ui.all_searchfields()) + self.assertEqual(content_field_name, 'text') + self.assertEqual(len(mapping), 15 + 2) # +2 management fields + self.assertEqual(mapping, { + 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'name': {'type': 'string', 'analyzer': 'snowball'}, + 'is_active_exact': {'type': 'boolean'}, + 'created': {'type': 'date'}, + 'post_count': {'type': 'long'}, + 'created_exact': {'type': 'date'}, + 'sites_exact': {'index': 'not_analyzed', 'type': 'string'}, + 'is_active': {'type': 'boolean'}, + 'sites': {'type': 'string', 'analyzer': 'snowball'}, + 'post_count_i': {'type': 'long'}, + 'average_rating': {'type': 'float'}, + 'text': {'type': 'string', 'analyzer': 'snowball'}, + 'pub_date_exact': {'type': 'date'}, + 'name_exact': {'index': 'not_analyzed', 'type': 'string'}, + 'pub_date': {'type': 'date'}, + 'average_rating_exact': {'type': 'float'} + }) + + def test_verify_type(self): + old_ui = connections['elasticsearch2'].get_unified_index() + ui = UnifiedIndex() + smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() + ui.build(indexes=[smtmmi]) + connections['elasticsearch2']._index = ui + sb = connections['elasticsearch2'].get_backend() + sb.update(smtmmi, self.sample_objs) + + self.assertEqual(sb.search('*:*')['hits'], 3) + self.assertEqual([result.month for result in sb.search('*:*')['results']], [u'02', u'02', u'02']) + connections['elasticsearch2']._index = old_ui + + +class CaptureHandler(std_logging.Handler): + logs_seen = [] + + def emit(self, record): + CaptureHandler.logs_seen.append(record) + + +class FailedElasticsearch2SearchBackendTestCase(TestCase): + def setUp(self): + self.sample_objs = [] + + for i in range(1, 4): + mock = MockModel() + mock.id = i + mock.author = 'daniel%s' % i + mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) + self.sample_objs.append(mock) + + # Stow. + # Point the backend at a URL that doesn't exist so we can watch the + # sparks fly. + self.old_es_url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] + settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] = "%s/foo/" % self.old_es_url + self.cap = CaptureHandler() + logging.getLogger('haystack').addHandler(self.cap) + import haystack + logging.getLogger('haystack').removeHandler(haystack.stream) + + # Setup the rest of the bits. + self.old_ui = connections['elasticsearch2'].get_unified_index() + ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = ui + self.sb = connections['elasticsearch2'].get_backend() + + def tearDown(self): + import haystack + # Restore. + settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] = self.old_es_url + connections['elasticsearch2']._index = self.old_ui + logging.getLogger('haystack').removeHandler(self.cap) + logging.getLogger('haystack').addHandler(haystack.stream) + + @unittest.expectedFailure + def test_all_cases(self): + # Prior to the addition of the try/except bits, these would all fail miserably. + self.assertEqual(len(CaptureHandler.logs_seen), 0) + + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(len(CaptureHandler.logs_seen), 1) + + self.sb.remove(self.sample_objs[0]) + self.assertEqual(len(CaptureHandler.logs_seen), 2) + + self.sb.search('search') + self.assertEqual(len(CaptureHandler.logs_seen), 3) + + self.sb.more_like_this(self.sample_objs[0]) + self.assertEqual(len(CaptureHandler.logs_seen), 4) + + self.sb.clear([MockModel]) + self.assertEqual(len(CaptureHandler.logs_seen), 5) + + self.sb.clear() + self.assertEqual(len(CaptureHandler.logs_seen), 6) + + +class LiveElasticsearch2SearchQueryTestCase(TestCase): + fixtures = ['initial_data.json'] + + def setUp(self): + super(LiveElasticsearch2SearchQueryTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + self.sq = connections['elasticsearch2'].get_query() + + # Force indexing of the content. + self.smmi.update(using='elasticsearch2') + + def tearDown(self): + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2SearchQueryTestCase, self).tearDown() + + def test_log_query(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + + with self.settings(DEBUG=False): + len(self.sq.get_results()) + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + + with self.settings(DEBUG=True): + # Redefine it to clear out the cached results. + self.sq = connections['elasticsearch2'].query(using='elasticsearch2') + self.sq.add_filter(SQ(name='bar')) + len(self.sq.get_results()) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(connections['elasticsearch2'].queries[0]['query_string'], + 'name:(bar)') + + # And again, for good measure. + self.sq = connections['elasticsearch2'].query('elasticsearch2') + self.sq.add_filter(SQ(name='bar')) + self.sq.add_filter(SQ(text='moof')) + len(self.sq.get_results()) + self.assertEqual(len(connections['elasticsearch2'].queries), 2) + self.assertEqual(connections['elasticsearch2'].queries[0]['query_string'], + 'name:(bar)') + self.assertEqual(connections['elasticsearch2'].queries[1]['query_string'], + u'(name:(bar) AND text:(moof))') + + +lssqstc_all_loaded = None + + +@override_settings(DEBUG=True) +class LiveElasticsearch2SearchQuerySetTestCase(TestCase): + """Used to test actual implementation details of the SearchQuerySet.""" + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2SearchQuerySetTestCase, self).setUp() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + self.rsqs = RelatedSearchQuerySet('elasticsearch2') + + # Ugly but not constantly reindexing saves us almost 50% runtime. + global lssqstc_all_loaded + + if lssqstc_all_loaded is None: + lssqstc_all_loaded = True + + # Wipe it clean. + clear_elasticsearch_index() + + # Force indexing of the content. + self.smmi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2SearchQuerySetTestCase, self).tearDown() + + def test_load_all(self): + sqs = self.sqs.order_by('pub_date').load_all() + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertTrue(len(sqs) > 0) + self.assertEqual(sqs[2].object.foo, + u'In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.') + + def test_iter(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + sqs = self.sqs.all() + results = sorted([int(result.pk) for result in sqs]) + self.assertEqual(results, list(range(1, 24))) + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + def test_slice(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.sqs.all().order_by('pub_date') + self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.sqs.all().order_by('pub_date') + self.assertEqual(int(results[21].pk), 22) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + + def test_values_slicing(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + + # TODO: this would be a good candidate for refactoring into a TestCase subclass shared across backends + + # The values will come back as strings because Hasytack doesn't assume PKs are integers. + # We'll prepare this set once since we're going to query the same results in multiple ways: + expected_pks = [str(i) for i in [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]] + + results = self.sqs.all().order_by('pub_date').values('pk') + self.assertListEqual([i['pk'] for i in results[1:11]], expected_pks) + + results = self.sqs.all().order_by('pub_date').values_list('pk') + self.assertListEqual([i[0] for i in results[1:11]], expected_pks) + + results = self.sqs.all().order_by('pub_date').values_list('pk', flat=True) + self.assertListEqual(results[1:11], expected_pks) + + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + def test_count(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + sqs = self.sqs.all() + self.assertEqual(sqs.count(), 23) + self.assertEqual(sqs.count(), 23) + self.assertEqual(len(sqs), 23) + self.assertEqual(sqs.count(), 23) + # Should only execute one query to count the length of the result set. + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + + def test_manual_iter(self): + results = self.sqs.all() + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = set([int(result.pk) for result in results._manual_iter()]) + self.assertEqual(results, + set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + def test_fill_cache(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.sqs.all() + self.assertEqual(len(results._result_cache), 0) + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results._fill_cache(0, 10) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + results._fill_cache(10, 20) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) + self.assertEqual(len(connections['elasticsearch2'].queries), 2) + + def test_cache_is_full(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(self.sqs._cache_is_full(), False) + results = self.sqs.all() + fire_the_iterator_and_fill_cache = [result for result in results] + self.assertEqual(results._cache_is_full(), True) + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + def test___and__(self): + sqs1 = self.sqs.filter(content='foo') + sqs2 = self.sqs.filter(content='bar') + sqs = sqs1 & sqs2 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 2) + self.assertEqual(sqs.query.build_query(), u'((foo) AND (bar))') + + # Now for something more complex... + sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) + sqs4 = self.sqs.filter(content='bar') + sqs = sqs3 & sqs4 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 3) + self.assertEqual(sqs.query.build_query(), u'(NOT (title:(moof)) AND ((foo) OR (baz)) AND (bar))') + + def test___or__(self): + sqs1 = self.sqs.filter(content='foo') + sqs2 = self.sqs.filter(content='bar') + sqs = sqs1 | sqs2 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 2) + self.assertEqual(sqs.query.build_query(), u'((foo) OR (bar))') + + # Now for something more complex... + sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) + sqs4 = self.sqs.filter(content='bar').models(MockModel) + sqs = sqs3 | sqs4 + + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.query_filter), 2) + self.assertEqual(sqs.query.build_query(), u'((NOT (title:(moof)) AND ((foo) OR (baz))) OR (bar))') + + def test_auto_query(self): + # Ensure bits in exact matches get escaped properly as well. + # This will break horrifically if escaping isn't working. + sqs = self.sqs.auto_query('"pants:rule"') + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(repr(sqs.query.query_filter), '') + self.assertEqual(sqs.query.build_query(), u'("pants\\:rule")') + self.assertEqual(len(sqs), 0) + + # Regressions + + def test_regression_proper_start_offsets(self): + sqs = self.sqs.filter(text='index') + self.assertNotEqual(sqs.count(), 0) + + id_counts = {} + + for item in sqs: + if item.id in id_counts: + id_counts[item.id] += 1 + else: + id_counts[item.id] = 1 + + for key, value in id_counts.items(): + if value > 1: + self.fail("Result with id '%s' seen more than once in the results." % key) + + def test_regression_raw_search_breaks_slicing(self): + sqs = self.sqs.raw_search('text:index') + page_1 = [result.pk for result in sqs[0:10]] + page_2 = [result.pk for result in sqs[10:20]] + + for pk in page_2: + if pk in page_1: + self.fail("Result with id '%s' seen more than once in the results." % pk) + + # RelatedSearchQuerySet Tests + + def test_related_load_all(self): + sqs = self.rsqs.order_by('pub_date').load_all() + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertTrue(len(sqs) > 0) + self.assertEqual(sqs[2].object.foo, + u'In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.') + + def test_related_load_all_queryset(self): + sqs = self.rsqs.load_all().order_by('pub_date') + self.assertEqual(len(sqs._load_all_querysets), 0) + + sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=1)) + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs._load_all_querysets), 1) + self.assertEqual(sorted([obj.object.id for obj in sqs]), list(range(2, 24))) + + sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=10)) + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs._load_all_querysets), 1) + self.assertEqual(set([obj.object.id for obj in sqs]), set([12, 17, 11, 16, 23, 15, 22, 14, 19, 21, 13, 18, 20])) + self.assertEqual(set([obj.object.id for obj in sqs[10:20]]), set([21, 22, 23])) + + def test_related_iter(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + sqs = self.rsqs.all() + results = set([int(result.pk) for result in sqs]) + self.assertEqual(results, + set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) + self.assertEqual(len(connections['elasticsearch2'].queries), 4) + + def test_related_slice(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.rsqs.all().order_by('pub_date') + self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) + self.assertEqual(len(connections['elasticsearch2'].queries), 3) + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.rsqs.all().order_by('pub_date') + self.assertEqual(int(results[21].pk), 22) + self.assertEqual(len(connections['elasticsearch2'].queries), 4) + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.rsqs.all().order_by('pub_date') + self.assertEqual(set([int(result.pk) for result in results[20:30]]), set([21, 22, 23])) + self.assertEqual(len(connections['elasticsearch2'].queries), 4) + + def test_related_manual_iter(self): + results = self.rsqs.all() + + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = sorted([int(result.pk) for result in results._manual_iter()]) + self.assertEqual(results, list(range(1, 24))) + self.assertEqual(len(connections['elasticsearch2'].queries), 4) + + def test_related_fill_cache(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results = self.rsqs.all() + self.assertEqual(len(results._result_cache), 0) + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + results._fill_cache(0, 10) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) + self.assertEqual(len(connections['elasticsearch2'].queries), 1) + results._fill_cache(10, 20) + self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) + self.assertEqual(len(connections['elasticsearch2'].queries), 2) + + def test_related_cache_is_full(self): + reset_search_queries() + self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(self.rsqs._cache_is_full(), False) + results = self.rsqs.all() + fire_the_iterator_and_fill_cache = [result for result in results] + self.assertEqual(results._cache_is_full(), True) + self.assertEqual(len(connections['elasticsearch2'].queries), 5) + + def test_quotes_regression(self): + sqs = self.sqs.auto_query(u"44°48'40''N 20°28'32''E") + # Should not have empty terms. + self.assertEqual(sqs.query.build_query(), u"(44\xb048'40''N 20\xb028'32''E)") + # Should not cause Elasticsearch to 500. + self.assertEqual(sqs.count(), 0) + + sqs = self.sqs.auto_query('blazing') + self.assertEqual(sqs.query.build_query(), u'(blazing)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('blazing saddles') + self.assertEqual(sqs.query.build_query(), u'(blazing saddles)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles') + self.assertEqual(sqs.query.build_query(), u'(\\"blazing saddles)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles"') + self.assertEqual(sqs.query.build_query(), u'("blazing saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing saddles"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'saddles"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'\'saddles"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \')') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'"') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \'\\")') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles" mel') + self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('"blazing saddles" mel brooks') + self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel brooks)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing saddles" brooks') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" brooks)') + self.assertEqual(sqs.count(), 0) + sqs = self.sqs.auto_query('mel "blazing saddles" "brooks') + self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" \\"brooks)') + self.assertEqual(sqs.count(), 0) + + def test_query_generation(self): + sqs = self.sqs.filter(SQ(content=AutoQuery("hello world")) | SQ(title=AutoQuery("hello world"))) + self.assertEqual(sqs.query.build_query(), u"((hello world) OR title:(hello world))") + + def test_result_class(self): + # Assert that we're defaulting to ``SearchResult``. + sqs = self.sqs.all() + self.assertTrue(isinstance(sqs[0], SearchResult)) + + # Custom class. + sqs = self.sqs.result_class(MockSearchResult).all() + self.assertTrue(isinstance(sqs[0], MockSearchResult)) + + # Reset to default. + sqs = self.sqs.result_class(None).all() + self.assertTrue(isinstance(sqs[0], SearchResult)) + + +@override_settings(DEBUG=True) +class LiveElasticsearch2SpellingTestCase(TestCase): + """Used to test actual implementation details of the SearchQuerySet.""" + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2SpellingTestCase, self).setUp() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockSpellingIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + + # Wipe it clean. + clear_elasticsearch_index() + + # Reboot the schema. + self.sb = connections['elasticsearch2'].get_backend() + self.sb.setup() + + self.smmi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2SpellingTestCase, self).tearDown() + + def test_spelling(self): + self.assertEqual(self.sqs.auto_query('structurd').spelling_suggestion(), 'structured') + self.assertEqual(self.sqs.spelling_suggestion('structurd'), 'structured') + self.assertEqual(self.sqs.auto_query('srchindex instanc').spelling_suggestion(), 'searchindex instance') + self.assertEqual(self.sqs.spelling_suggestion('srchindex instanc'), 'searchindex instance') + + +class LiveElasticsearch2MoreLikeThisTestCase(TestCase): + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2MoreLikeThisTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockModelSearchIndex() + self.sammi = Elasticsearch2AnotherMockModelSearchIndex() + self.ui.build(indexes=[self.smmi, self.sammi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + + self.smmi.update(using='elasticsearch2') + self.sammi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2MoreLikeThisTestCase, self).tearDown() + + def test_more_like_this(self): + mlt = self.sqs.more_like_this(MockModel.objects.get(pk=1)) + results = [result.pk for result in mlt] + self.assertEqual(mlt.count(), 11) + self.assertEqual(set(results), set([u'10', u'5', u'2', u'21', u'4', u'6', u'23', u'9', u'14'])) + self.assertEqual(len(results), 10) + + alt_mlt = self.sqs.filter(name='daniel3').more_like_this(MockModel.objects.get(pk=2)) + results = [result.pk for result in alt_mlt] + self.assertEqual(alt_mlt.count(), 9) + self.assertEqual(set(results), set([u'2', u'16', u'3', u'19', u'4', u'17', u'10', u'22', u'23'])) + self.assertEqual(len(results), 9) + + alt_mlt_with_models = self.sqs.models(MockModel).more_like_this(MockModel.objects.get(pk=1)) + results = [result.pk for result in alt_mlt_with_models] + self.assertEqual(alt_mlt_with_models.count(), 10) + self.assertEqual(set(results), set([u'10', u'5', u'21', u'2', u'4', u'6', u'23', u'9', u'14', u'16'])) + self.assertEqual(len(results), 10) + + if hasattr(MockModel.objects, 'defer'): + # Make sure MLT works with deferred bits. + mi = MockModel.objects.defer('foo').get(pk=1) + self.assertEqual(mi._deferred, True) + deferred = self.sqs.models(MockModel).more_like_this(mi) + self.assertEqual(deferred.count(), 0) + self.assertEqual([result.pk for result in deferred], []) + self.assertEqual(len([result.pk for result in deferred]), 0) + + # Ensure that swapping the ``result_class`` works. + self.assertTrue( + isinstance(self.sqs.result_class(MockSearchResult).more_like_this(MockModel.objects.get(pk=1))[0], + MockSearchResult)) + + +class LiveElasticsearch2AutocompleteTestCase(TestCase): + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2AutocompleteTestCase, self).setUp() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2AutocompleteMockModelSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + + # Wipe it clean. + clear_elasticsearch_index() + + # Reboot the schema. + self.sb = connections['elasticsearch2'].get_backend() + self.sb.setup() + + self.smmi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2AutocompleteTestCase, self).tearDown() + + def test_build_schema(self): + self.sb = connections['elasticsearch2'].get_backend() + content_name, mapping = self.sb.build_schema(self.ui.all_searchfields()) + self.assertEqual(mapping, { + 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'name_auto': { + 'type': 'string', + 'analyzer': 'edgengram_analyzer', + }, + 'text': { + 'type': 'string', + 'analyzer': 'snowball', + }, + 'pub_date': { + 'type': 'date' + }, + 'name': { + 'type': 'string', + 'analyzer': 'snowball', + }, + 'text_auto': { + 'type': 'string', + 'analyzer': 'edgengram_analyzer', + } + }) + + def test_autocomplete(self): + autocomplete = self.sqs.autocomplete(text_auto='mod') + self.assertEqual(autocomplete.count(), 16) + self.assertEqual(set([result.pk for result in autocomplete]), set( + ['1', '12', '6', '14', '7', '4', '23', '17', '13', '18', '20', '22', '19', '15', '10', '2'])) + self.assertTrue('mod' in autocomplete[0].text.lower()) + self.assertTrue('mod' in autocomplete[1].text.lower()) + self.assertTrue('mod' in autocomplete[2].text.lower()) + self.assertTrue('mod' in autocomplete[3].text.lower()) + self.assertTrue('mod' in autocomplete[4].text.lower()) + self.assertEqual(len([result.pk for result in autocomplete]), 16) + + # Test multiple words. + autocomplete_2 = self.sqs.autocomplete(text_auto='your mod') + self.assertEqual(autocomplete_2.count(), 13) + self.assertEqual(set([result.pk for result in autocomplete_2]), + set(['1', '6', '2', '14', '12', '13', '10', '19', '4', '20', '23', '22', '15'])) + map_results = {result.pk: result for result in autocomplete_2} + self.assertTrue('your' in map_results['1'].text.lower()) + self.assertTrue('mod' in map_results['1'].text.lower()) + self.assertTrue('your' in map_results['6'].text.lower()) + self.assertTrue('mod' in map_results['6'].text.lower()) + self.assertTrue('your' in map_results['2'].text.lower()) + self.assertEqual(len([result.pk for result in autocomplete_2]), 13) + + # Test multiple fields. + autocomplete_3 = self.sqs.autocomplete(text_auto='Django', name_auto='dan') + self.assertEqual(autocomplete_3.count(), 4) + self.assertEqual(set([result.pk for result in autocomplete_3]), set(['12', '1', '22', '14'])) + self.assertEqual(len([result.pk for result in autocomplete_3]), 4) + + # Test numbers in phrases + autocomplete_4 = self.sqs.autocomplete(text_auto='Jen 867') + self.assertEqual(autocomplete_4.count(), 1) + self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) + + # Test numbers alone + autocomplete_4 = self.sqs.autocomplete(text_auto='867') + self.assertEqual(autocomplete_4.count(), 1) + self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) + + +class LiveElasticsearch2RoundTripTestCase(TestCase): + def setUp(self): + super(LiveElasticsearch2RoundTripTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.srtsi = Elasticsearch2RoundTripSearchIndex() + self.ui.build(indexes=[self.srtsi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + + self.sqs = SearchQuerySet('elasticsearch2') + + # Fake indexing. + mock = MockModel() + mock.id = 1 + self.sb.update(self.srtsi, [mock]) + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2RoundTripTestCase, self).tearDown() + + def test_round_trip(self): + results = self.sqs.filter(id='core.mockmodel.1') + + # Sanity check. + self.assertEqual(results.count(), 1) + + # Check the individual fields. + result = results[0] + self.assertEqual(result.id, 'core.mockmodel.1') + self.assertEqual(result.text, 'This is some example text.') + self.assertEqual(result.name, 'Mister Pants') + self.assertEqual(result.is_active, True) + self.assertEqual(result.post_count, 25) + self.assertEqual(result.average_rating, 3.6) + self.assertEqual(result.price, u'24.99') + self.assertEqual(result.pub_date, datetime.date(2009, 11, 21)) + self.assertEqual(result.created, datetime.datetime(2009, 11, 21, 21, 31, 00)) + self.assertEqual(result.tags, ['staff', 'outdoor', 'activist', 'scientist']) + self.assertEqual(result.sites, [3, 5, 1]) + + +@unittest.skipUnless(test_pickling, 'Skipping pickling tests') +class LiveElasticsearch2PickleTestCase(TestCase): + fixtures = ['bulk_data.json'] + + def setUp(self): + super(LiveElasticsearch2PickleTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2MockModelSearchIndex() + self.sammi = Elasticsearch2AnotherMockModelSearchIndex() + self.ui.build(indexes=[self.smmi, self.sammi]) + connections['elasticsearch2']._index = self.ui + + self.sqs = SearchQuerySet('elasticsearch2') + + self.smmi.update(using='elasticsearch2') + self.sammi.update(using='elasticsearch2') + + def tearDown(self): + # Restore. + connections['elasticsearch2']._index = self.old_ui + super(LiveElasticsearch2PickleTestCase, self).tearDown() + + def test_pickling(self): + results = self.sqs.all() + + for res in results: + # Make sure the cache is full. + pass + + in_a_pickle = pickle.dumps(results) + like_a_cuke = pickle.loads(in_a_pickle) + self.assertEqual(len(like_a_cuke), len(results)) + self.assertEqual(like_a_cuke[0].id, results[0].id) + + +class Elasticsearch2BoostBackendTestCase(TestCase): + def setUp(self): + super(Elasticsearch2BoostBackendTestCase, self).setUp() + + # Wipe it clean. + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2BoostMockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + + self.sample_objs = [] + + for i in range(1, 5): + mock = AFourthMockModel() + mock.id = i + + if i % 2: + mock.author = 'daniel' + mock.editor = 'david' + else: + mock.author = 'david' + mock.editor = 'daniel' + + mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) + self.sample_objs.append(mock) + + def tearDown(self): + connections['elasticsearch2']._index = self.old_ui + super(Elasticsearch2BoostBackendTestCase, self).tearDown() + + def raw_search(self, query): + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + + def test_boost(self): + self.sb.update(self.smmi, self.sample_objs) + self.assertEqual(self.raw_search('*:*')['hits']['total'], 4) + + results = SearchQuerySet(using='elasticsearch2').filter(SQ(author='daniel') | SQ(editor='daniel')) + + self.assertEqual(set([result.id for result in results]), set([ + 'core.afourthmockmodel.4', + 'core.afourthmockmodel.3', + 'core.afourthmockmodel.1', + 'core.afourthmockmodel.2' + ])) + + def test__to_python(self): + self.assertEqual(self.sb._to_python('abc'), 'abc') + self.assertEqual(self.sb._to_python('1'), 1) + self.assertEqual(self.sb._to_python('2653'), 2653) + self.assertEqual(self.sb._to_python('25.5'), 25.5) + self.assertEqual(self.sb._to_python('[1, 2, 3]'), [1, 2, 3]) + self.assertEqual(self.sb._to_python('{"a": 1, "b": 2, "c": 3}'), {'a': 1, 'c': 3, 'b': 2}) + self.assertEqual(self.sb._to_python('2009-05-09T16:14:00'), datetime.datetime(2009, 5, 9, 16, 14)) + self.assertEqual(self.sb._to_python('2009-05-09T00:00:00'), datetime.datetime(2009, 5, 9, 0, 0)) + self.assertEqual(self.sb._to_python(None), None) + + +class RecreateIndexTestCase(TestCase): + def setUp(self): + self.raw_es = elasticsearch.Elasticsearch( + settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + + def test_recreate_index(self): + clear_elasticsearch_index() + + sb = connections['elasticsearch2'].get_backend() + sb.silently_fail = True + sb.setup() + + original_mapping = self.raw_es.indices.get_mapping(index=sb.index_name) + + sb.clear() + sb.setup() + + try: + updated_mapping = self.raw_es.indices.get_mapping(sb.index_name) + except elasticsearch.NotFoundError: + self.fail("There is no mapping after recreating the index") + + self.assertEqual(original_mapping, updated_mapping, + "Mapping after recreating the index differs from the original one") + + +class Elasticsearch2FacetingTestCase(TestCase): + def setUp(self): + super(Elasticsearch2FacetingTestCase, self).setUp() + + # Wipe it clean. + clear_elasticsearch_index() + + # Stow. + self.old_ui = connections['elasticsearch2'].get_unified_index() + self.ui = UnifiedIndex() + self.smmi = Elasticsearch2FacetingMockSearchIndex() + self.ui.build(indexes=[self.smmi]) + connections['elasticsearch2']._index = self.ui + self.sb = connections['elasticsearch2'].get_backend() + + # Force the backend to rebuild the mapping each time. + self.sb.existing_mapping = {} + self.sb.setup() + + self.sample_objs = [] + + for i in range(1, 10): + mock = AFourthMockModel() + mock.id = i + if i > 5: + mock.editor = 'George Taylor' + else: + mock.editor = 'Perry White' + if i % 2: + mock.author = 'Daniel Lindsley' + else: + mock.author = 'Dan Watson' + mock.pub_date = datetime.date(2013, 9, (i % 4) + 1) + self.sample_objs.append(mock) + + def tearDown(self): + connections['elasticsearch2']._index = self.old_ui + super(Elasticsearch2FacetingTestCase, self).tearDown() + + def test_facet(self): + self.sb.update(self.smmi, self.sample_objs) + counts = SearchQuerySet('elasticsearch2').facet('author').facet('editor').facet_counts() + self.assertEqual(counts['fields']['author'], [ + ('Daniel Lindsley', 5), + ('Dan Watson', 4), + ]) + self.assertEqual(counts['fields']['editor'], [ + ('Perry White', 5), + ('George Taylor', 4), + ]) + counts = SearchQuerySet('elasticsearch2').filter(content='white').facet('facet_field', + order='reverse_count').facet_counts() + self.assertEqual(counts['fields']['facet_field'], [ + ('Dan Watson', 2), + ('Daniel Lindsley', 3), + ]) + + def test_multiple_narrow(self): + self.sb.update(self.smmi, self.sample_objs) + counts = SearchQuerySet('elasticsearch2').narrow('editor_exact:"Perry White"').narrow( + 'author_exact:"Daniel Lindsley"').facet('author').facet_counts() + self.assertEqual(counts['fields']['author'], [ + ('Daniel Lindsley', 3), + ]) + + def test_narrow(self): + self.sb.update(self.smmi, self.sample_objs) + counts = SearchQuerySet('elasticsearch2').facet('author').facet('editor').narrow( + 'editor_exact:"Perry White"').facet_counts() + self.assertEqual(counts['fields']['author'], [ + ('Daniel Lindsley', 3), + ('Dan Watson', 2), + ]) + self.assertEqual(counts['fields']['editor'], [ + ('Perry White', 5), + ]) + + def test_date_facet(self): + self.sb.update(self.smmi, self.sample_objs) + start = datetime.date(2013, 9, 1) + end = datetime.date(2013, 9, 30) + # Facet by day + counts = SearchQuerySet('elasticsearch2').date_facet('pub_date', start_date=start, end_date=end, + gap_by='day').facet_counts() + self.assertEqual(counts['dates']['pub_date'], [ + (datetime.datetime(2013, 9, 1), 2), + (datetime.datetime(2013, 9, 2), 3), + (datetime.datetime(2013, 9, 3), 2), + (datetime.datetime(2013, 9, 4), 2), + ]) + # By month + counts = SearchQuerySet('elasticsearch2').date_facet('pub_date', start_date=start, end_date=end, + gap_by='month').facet_counts() + self.assertEqual(counts['dates']['pub_date'], [ + (datetime.datetime(2013, 9, 1), 9), + ]) diff --git a/test_haystack/elasticsearch2_tests/test_inputs.py b/test_haystack/elasticsearch2_tests/test_inputs.py new file mode 100644 index 000000000..777334fb2 --- /dev/null +++ b/test_haystack/elasticsearch2_tests/test_inputs.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import, division, print_function, unicode_literals + +from django.test import TestCase + +from haystack import connections, inputs + + +class Elasticsearch2InputTestCase(TestCase): + def setUp(self): + super(Elasticsearch2InputTestCase, self).setUp() + self.query_obj = connections['elasticsearch2'].get_query() + + def test_raw_init(self): + raw = inputs.Raw('hello OR there, :you') + self.assertEqual(raw.query_string, 'hello OR there, :you') + self.assertEqual(raw.kwargs, {}) + self.assertEqual(raw.post_process, False) + + raw = inputs.Raw('hello OR there, :you', test='really') + self.assertEqual(raw.query_string, 'hello OR there, :you') + self.assertEqual(raw.kwargs, {'test': 'really'}) + self.assertEqual(raw.post_process, False) + + def test_raw_prepare(self): + raw = inputs.Raw('hello OR there, :you') + self.assertEqual(raw.prepare(self.query_obj), 'hello OR there, :you') + + def test_clean_init(self): + clean = inputs.Clean('hello OR there, :you') + self.assertEqual(clean.query_string, 'hello OR there, :you') + self.assertEqual(clean.post_process, True) + + def test_clean_prepare(self): + clean = inputs.Clean('hello OR there, :you') + self.assertEqual(clean.prepare(self.query_obj), 'hello or there, \\:you') + + def test_exact_init(self): + exact = inputs.Exact('hello OR there, :you') + self.assertEqual(exact.query_string, 'hello OR there, :you') + self.assertEqual(exact.post_process, True) + + def test_exact_prepare(self): + exact = inputs.Exact('hello OR there, :you') + self.assertEqual(exact.prepare(self.query_obj), u'"hello OR there, :you"') + + exact = inputs.Exact('hello OR there, :you', clean=True) + self.assertEqual(exact.prepare(self.query_obj), u'"hello or there, \\:you"') + + def test_not_init(self): + not_it = inputs.Not('hello OR there, :you') + self.assertEqual(not_it.query_string, 'hello OR there, :you') + self.assertEqual(not_it.post_process, True) + + def test_not_prepare(self): + not_it = inputs.Not('hello OR there, :you') + self.assertEqual(not_it.prepare(self.query_obj), u'NOT (hello or there, \\:you)') + + def test_autoquery_init(self): + autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') + self.assertEqual(autoquery.query_string, 'panic -don\'t "froody dude"') + self.assertEqual(autoquery.post_process, False) + + def test_autoquery_prepare(self): + autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') + self.assertEqual(autoquery.prepare(self.query_obj), u'panic NOT don\'t "froody dude"') + + def test_altparser_init(self): + altparser = inputs.AltParser('dismax') + self.assertEqual(altparser.parser_name, 'dismax') + self.assertEqual(altparser.query_string, '') + self.assertEqual(altparser.kwargs, {}) + self.assertEqual(altparser.post_process, False) + + altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) + self.assertEqual(altparser.parser_name, 'dismax') + self.assertEqual(altparser.query_string, 'douglas adams') + self.assertEqual(altparser.kwargs, {'mm': 1, 'qf': 'author'}) + self.assertEqual(altparser.post_process, False) + + def test_altparser_prepare(self): + altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) + self.assertEqual(altparser.prepare(self.query_obj), + u"""{!dismax mm=1 qf=author v='douglas adams'}""") diff --git a/test_haystack/elasticsearch2_tests/test_query.py b/test_haystack/elasticsearch2_tests/test_query.py new file mode 100644 index 000000000..968180686 --- /dev/null +++ b/test_haystack/elasticsearch2_tests/test_query.py @@ -0,0 +1,209 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals + +import datetime + +import elasticsearch +from django.test import TestCase + +from haystack import connections +from haystack.inputs import Exact +from haystack.models import SearchResult +from haystack.query import SearchQuerySet, SQ +from haystack.utils.geo import D, Point +from ..core.models import AnotherMockModel, MockModel + + +class Elasticsearch2SearchQueryTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchQueryTestCase, self).setUp() + self.sq = connections['elasticsearch2'].get_query() + + def test_build_query_all(self): + self.assertEqual(self.sq.build_query(), '*:*') + + def test_build_query_single_word(self): + self.sq.add_filter(SQ(content='hello')) + self.assertEqual(self.sq.build_query(), '(hello)') + + def test_build_query_boolean(self): + self.sq.add_filter(SQ(content=True)) + self.assertEqual(self.sq.build_query(), '(True)') + + def test_regression_slash_search(self): + self.sq.add_filter(SQ(content='hello/')) + self.assertEqual(self.sq.build_query(), '(hello\\/)') + + def test_build_query_datetime(self): + self.sq.add_filter(SQ(content=datetime.datetime(2009, 5, 8, 11, 28))) + self.assertEqual(self.sq.build_query(), '(2009-05-08T11:28:00)') + + def test_build_query_multiple_words_and(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_filter(SQ(content='world')) + self.assertEqual(self.sq.build_query(), '((hello) AND (world))') + + def test_build_query_multiple_words_not(self): + self.sq.add_filter(~SQ(content='hello')) + self.sq.add_filter(~SQ(content='world')) + self.assertEqual(self.sq.build_query(), '(NOT ((hello)) AND NOT ((world)))') + + def test_build_query_multiple_words_or(self): + self.sq.add_filter(~SQ(content='hello')) + self.sq.add_filter(SQ(content='hello'), use_or=True) + self.assertEqual(self.sq.build_query(), '(NOT ((hello)) OR (hello))') + + def test_build_query_multiple_words_mixed(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(content='hello'), use_or=True) + self.sq.add_filter(~SQ(content='world')) + self.assertEqual(self.sq.build_query(), u'(((why) OR (hello)) AND NOT ((world)))') + + def test_build_query_phrase(self): + self.sq.add_filter(SQ(content='hello world')) + self.assertEqual(self.sq.build_query(), '(hello AND world)') + + self.sq.add_filter(SQ(content__exact='hello world')) + self.assertEqual(self.sq.build_query(), u'((hello AND world) AND ("hello world"))') + + def test_build_query_boost(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_boost('world', 5) + self.assertEqual(self.sq.build_query(), "(hello) world^5") + + def test_build_query_multiple_filter_types(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(pub_date__lte=Exact('2009-02-10 01:59:00'))) + self.sq.add_filter(SQ(author__gt='daniel')) + self.sq.add_filter(SQ(created__lt=Exact('2009-02-12 12:13:00'))) + self.sq.add_filter(SQ(title__gte='B')) + self.sq.add_filter(SQ(id__in=[1, 2, 3])) + self.sq.add_filter(SQ(rating__range=[3, 5])) + self.assertEqual(self.sq.build_query(), + u'((why) AND pub_date:([* TO "2009-02-10 01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12 12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') + + def test_build_query_multiple_filter_types_with_datetimes(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0))) + self.sq.add_filter(SQ(author__gt='daniel')) + self.sq.add_filter(SQ(created__lt=datetime.datetime(2009, 2, 12, 12, 13, 0))) + self.sq.add_filter(SQ(title__gte='B')) + self.sq.add_filter(SQ(id__in=[1, 2, 3])) + self.sq.add_filter(SQ(rating__range=[3, 5])) + self.assertEqual(self.sq.build_query(), + u'((why) AND pub_date:([* TO "2009-02-10T01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12T12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') + + def test_build_query_in_filter_multiple_words(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article"])) + self.assertEqual(self.sq.build_query(), u'((why) AND title:("A Famous Paper" OR "An Infamous Article"))') + + def test_build_query_in_filter_datetime(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(pub_date__in=[datetime.datetime(2009, 7, 6, 1, 56, 21)])) + self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:("2009-07-06T01:56:21"))') + + def test_build_query_in_with_set(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__in=set(["A Famous Paper", "An Infamous Article"]))) + self.assertTrue('((why) AND title:(' in self.sq.build_query()) + self.assertTrue('"A Famous Paper"' in self.sq.build_query()) + self.assertTrue('"An Infamous Article"' in self.sq.build_query()) + + def test_build_query_wildcard_filter_types(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__startswith='haystack')) + self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack*))') + + def test_build_query_fuzzy_filter_types(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__fuzzy='haystack')) + self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack~))') + + def test_clean(self): + self.assertEqual(self.sq.clean('hello world'), 'hello world') + self.assertEqual(self.sq.clean('hello AND world'), 'hello and world') + self.assertEqual(self.sq.clean('hello AND OR NOT TO + - && || ! ( ) { } [ ] ^ " ~ * ? : \ / world'), + 'hello and or not to \\+ \\- \\&& \\|| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ \\/ world') + self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), + 'so please NOTe i am in a bAND and bORed') + + def test_build_query_with_models(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_model(MockModel) + self.assertEqual(self.sq.build_query(), '(hello)') + + self.sq.add_model(AnotherMockModel) + self.assertEqual(self.sq.build_query(), u'(hello)') + + def test_set_result_class(self): + # Assert that we're defaulting to ``SearchResult``. + self.assertTrue(issubclass(self.sq.result_class, SearchResult)) + + # Custom class. + class IttyBittyResult(object): + pass + + self.sq.set_result_class(IttyBittyResult) + self.assertTrue(issubclass(self.sq.result_class, IttyBittyResult)) + + # Reset to default. + self.sq.set_result_class(None) + self.assertTrue(issubclass(self.sq.result_class, SearchResult)) + + def test_in_filter_values_list(self): + self.sq.add_filter(SQ(content='why')) + self.sq.add_filter(SQ(title__in=[1, 2, 3])) + self.assertEqual(self.sq.build_query(), u'((why) AND title:("1" OR "2" OR "3"))') + + def test_narrow_sq(self): + sqs = SearchQuerySet(using='elasticsearch2').narrow(SQ(foo='moof')) + self.assertTrue(isinstance(sqs, SearchQuerySet)) + self.assertEqual(len(sqs.query.narrow_queries), 1) + self.assertEqual(sqs.query.narrow_queries.pop(), 'foo:(moof)') + + +class Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase, self).setUp() + self.backend = connections['elasticsearch2'].get_backend() + self._elasticsearch_version = elasticsearch.VERSION + elasticsearch.VERSION = (0, 9, 9) + + def tearDown(self): + elasticsearch.VERSION = self._elasticsearch_version + + def test_build_query_with_dwithin_range(self): + """ + Test build_search_kwargs with dwithin range for Elasticsearch versions < 1.0.0 + """ + search_kwargs = self.backend.build_search_kwargs('where', dwithin={ + 'field': "location_field", + 'point': Point(1.2345678, 2.3456789), + 'distance': D(m=500) + }) + self.assertEqual(search_kwargs['query']['filtered']['filter']['bool']['must'][1]['geo_distance'], + {'distance': 0.5, 'location_field': {'lat': 2.3456789, 'lon': 1.2345678}}) + + +class Elasticsearch2SearchQuerySpatialAfterReleaseTestCase(TestCase): + def setUp(self): + super(Elasticsearch2SearchQuerySpatialAfterReleaseTestCase, self).setUp() + self.backend = connections['elasticsearch2'].get_backend() + self._elasticsearch_version = elasticsearch.VERSION + elasticsearch.VERSION = (1, 0, 0) + + def tearDown(self): + elasticsearch.VERSION = self._elasticsearch_version + + def test_build_query_with_dwithin_range(self): + """ + Test build_search_kwargs with dwithin range for Elasticsearch versions >= 1.0.0 + """ + search_kwargs = self.backend.build_search_kwargs('where', dwithin={ + 'field': "location_field", + 'point': Point(1.2345678, 2.3456789), + 'distance': D(m=500) + }) + self.assertEqual(search_kwargs['query']['filtered']['filter']['bool']['must'][1]['geo_distance'], + {'distance': "0.500000km", 'location_field': {'lat': 2.3456789, 'lon': 1.2345678}}) diff --git a/test_haystack/settings.py b/test_haystack/settings.py index d676c0de0..7a796164d 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -84,6 +84,12 @@ 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, + 'elasticsearch2': { + 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', + 'URL': 'http://192.168.99.100:9200/', + 'INDEX_NAME': 'test_default', + 'INCLUDE_SPELLING': True, + }, 'simple': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', }, From 60ff1fc3721879bd3804f9ecc82eed2f8cd25c6a Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 16:29:00 +0100 Subject: [PATCH 30/51] Elasticsearch 2.x support - Fix localhost IP in elasticsearch2 settings --- test_haystack/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_haystack/settings.py b/test_haystack/settings.py index 7a796164d..dca6dd4e7 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -86,7 +86,7 @@ }, 'elasticsearch2': { 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', - 'URL': 'http://192.168.99.100:9200/', + 'URL': 'http://127.0.0.1:9200/', 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, From 7b14d23ac891f2ece7b30df621b61285665d4011 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 17:26:30 +0100 Subject: [PATCH 31/51] Launchs ES 2.x on Travis - Port to connect 29200 --- .travis.yml | 3 +++ test_haystack/settings.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index eee4d5886..9063cbd8c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,6 +26,9 @@ addons: before_install: - mkdir -p $HOME/download-cache + - wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz + - tar zxf elasticsearch-2.2.1.tar.gz + - elasticsearch-2.2.1/bin/elasticsearch -Dtransport.tcp.port=29300 -Dhttp.port=29200 install: - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install mock==1.0.1; fi diff --git a/test_haystack/settings.py b/test_haystack/settings.py index dca6dd4e7..0fa301297 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -86,7 +86,7 @@ }, 'elasticsearch2': { 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', - 'URL': 'http://127.0.0.1:9200/', + 'URL': 'http://127.0.0.1:29200/', 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, From e2f66c854bec0106d91d88f36058b7dad812ae74 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 17:30:34 +0100 Subject: [PATCH 32/51] Launchs ES 2.x on Travis - daemonize ES 2.x --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9063cbd8c..d435f060e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,7 @@ before_install: - mkdir -p $HOME/download-cache - wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz - tar zxf elasticsearch-2.2.1.tar.gz - - elasticsearch-2.2.1/bin/elasticsearch -Dtransport.tcp.port=29300 -Dhttp.port=29200 + - elasticsearch-2.2.1/bin/elasticsearch -d -Dtransport.tcp.port=29300 -Dhttp.port=29200 install: - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install mock==1.0.1; fi From 86a2ab5e87125fc4f459b4403f34aa3fc88cafae Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 17:39:33 +0100 Subject: [PATCH 33/51] Launchs ES 2.x on Travis - commented out ES 1.x service --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index d435f060e..bb01867e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -51,8 +51,8 @@ matrix: allow_failures: - python: "pypy" -services: - - elasticsearch +#services: +# - elasticsearch notifications: irc: "irc.freenode.org#haystack" From 6ca562d77a29302e3f4febc15b8d574c9696e58f Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 18:28:28 +0100 Subject: [PATCH 34/51] Launchs ES 2.x on Travis - Fix catching exception on skipping tests --- test_haystack/elasticsearch2_tests/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_haystack/elasticsearch2_tests/__init__.py b/test_haystack/elasticsearch2_tests/__init__.py index 8433081cd..a456c9829 100644 --- a/test_haystack/elasticsearch2_tests/__init__.py +++ b/test_haystack/elasticsearch2_tests/__init__.py @@ -10,7 +10,7 @@ def setup(): try: - from elasticsearch import Elasticsearch, ElasticsearchException + from elasticsearch import Elasticsearch, exceptions except ImportError: raise unittest.SkipTest("elasticsearch-py not installed.") @@ -18,5 +18,5 @@ def setup(): es = Elasticsearch(url) try: es.info() - except ElasticsearchException as e: + except exceptions.ConnectionError as e: raise unittest.SkipTest("elasticsearch not running on %r" % url, e) From 2a950de3b933bcfca77e0e6f97d547b4c9e6c55f Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 01:59:57 +0100 Subject: [PATCH 35/51] Uses a build matrix to test ES 1.x or ES 2.x --- .travis.yml | 25 +- setup.py | 1 - .../elasticsearch2_tests/__init__.py | 7 +- .../elasticsearch2_tests/test_backend.py | 270 +++++++++--------- .../elasticsearch2_tests/test_inputs.py | 2 +- .../elasticsearch2_tests/test_query.py | 8 +- test_haystack/elasticsearch_tests/__init__.py | 7 +- test_haystack/settings.py | 14 +- tox.ini | 126 ++++++-- 9 files changed, 284 insertions(+), 176 deletions(-) diff --git a/.travis.yml b/.travis.yml index bb01867e2..eac4d35d8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,13 +26,21 @@ addons: before_install: - mkdir -p $HOME/download-cache - - wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz - - tar zxf elasticsearch-2.2.1.tar.gz - - elasticsearch-2.2.1/bin/elasticsearch -d -Dtransport.tcp.port=29300 -Dhttp.port=29200 + - > + if [[ $VERSION_ES == '>=1.0.0,<2.0.0' ]]; + then + wget https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-1.7.5.tar.gz + tar zxf elasticsearch-1.7.5.tar.gz + elasticsearch-1.7.5/bin/elasticsearch -d -Dhttp.port=9200 + else + wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.2.1/elasticsearch-2.2.1.tar.gz + tar zxf elasticsearch-2.2.1.tar.gz + elasticsearch-2.2.1/bin/elasticsearch -d -Dhttp.port=9200 + fi install: - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install mock==1.0.1; fi - - pip install requests "Django${DJANGO_VERSION}" + - pip install requests "Django${DJANGO_VERSION}" "elasticsearch${VERSION_ES}" - python setup.py clean build install before_script: @@ -44,16 +52,15 @@ script: env: matrix: - - DJANGO_VERSION=">=1.8,<1.9" - - DJANGO_VERSION=">=1.9,<1.10" + - DJANGO_VERSION=">=1.8,<1.9" VERSION_ES=">=1.0.0,<2.0.0" + - DJANGO_VERSION=">=1.9,<1.10" VERSION_ES=">=1.0.0,<2.0.0" + - DJANGO_VERSION=">=1.8,<1.9" VERSION_ES=">=2.0.0,<3.0.0" + - DJANGO_VERSION=">=1.9,<1.10" VERSION_ES=">=2.0.0,<3.0.0" matrix: allow_failures: - python: "pypy" -#services: -# - elasticsearch - notifications: irc: "irc.freenode.org#haystack" email: false diff --git a/setup.py b/setup.py index 6345d858d..0ad15e441 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,6 @@ ] tests_require = [ - 'elasticsearch>=2.0.0,<3.0.0', 'pysolr>=3.3.2', 'whoosh>=2.5.4,<3.0', 'python-dateutil', diff --git a/test_haystack/elasticsearch2_tests/__init__.py b/test_haystack/elasticsearch2_tests/__init__.py index a456c9829..ba6384f46 100644 --- a/test_haystack/elasticsearch2_tests/__init__.py +++ b/test_haystack/elasticsearch2_tests/__init__.py @@ -10,11 +10,14 @@ def setup(): try: + import elasticsearch + if not ((2, 0, 0) <= elasticsearch.__version__ < (3, 0, 0)): + raise ImportError from elasticsearch import Elasticsearch, exceptions except ImportError: - raise unittest.SkipTest("elasticsearch-py not installed.") + raise unittest.SkipTest("'elasticsearch>=2.0.0,<3.0.0' not installed.") - url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] + url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] es = Elasticsearch(url) try: es.info() diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py index 0ba061eda..cdefaacc7 100644 --- a/test_haystack/elasticsearch2_tests/test_backend.py +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -36,16 +36,16 @@ def clear_elasticsearch_index(): # Wipe it clean. - raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) try: - raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) raw_es.indices.refresh() except elasticsearch.TransportError: pass # Since we've just completely deleted the index, we'll reset setup_complete so the next access will # correctly define the mappings: - connections['elasticsearch2'].get_backend().setup_complete = False + connections['elasticsearch'].get_backend().setup_complete = False class Elasticsearch2MockSearchIndex(indexes.SearchIndex, indexes.Indexable): @@ -217,7 +217,7 @@ class TestSettings(TestCase): def test_kwargs_are_passed_on(self): from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend backend = ElasticsearchSearchBackend('alias', **{ - 'URL': settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'], + 'URL': settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], 'INDEX_NAME': 'testing', 'KWARGS': {'max_retries': 42} }) @@ -230,18 +230,18 @@ def setUp(self): super(Elasticsearch2SearchBackendTestCase, self).setUp() # Wipe it clean. - self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() self.smmidni = Elasticsearch2MockSearchIndexWithSkipDocument() self.smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() # Force the backend to rebuild the mapping each time. self.sb.existing_mapping = {} @@ -257,19 +257,19 @@ def setUp(self): self.sample_objs.append(mock) def tearDown(self): - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(Elasticsearch2SearchBackendTestCase, self).tearDown() self.sb.silently_fail = True def raw_search(self, query): try: - return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) except elasticsearch.TransportError: return {} def test_non_silent(self): - bad_sb = connections['elasticsearch2'].backend('bad', URL='http://omg.wtf.bbq:1000/', INDEX_NAME='whatver', - SILENTLY_FAIL=False, TIMEOUT=1) + bad_sb = connections['elasticsearch'].backend('bad', URL='http://omg.wtf.bbq:1000/', INDEX_NAME='whatver', + SILENTLY_FAIL=False, TIMEOUT=1) try: bad_sb.update(self.smmi, self.sample_objs) @@ -296,14 +296,14 @@ def test_non_silent(self): pass def test_update_no_documents(self): - url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] - index_name = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME'] + url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] + index_name = settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME'] - sb = connections['elasticsearch2'].backend('elasticsearch2', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=True) + sb = connections['elasticsearch'].backend('elasticsearch', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=True) self.assertEqual(sb.update(self.smmi, []), None) - sb = connections['elasticsearch2'].backend('elasticsearch2', URL=url, INDEX_NAME=index_name, - SILENTLY_FAIL=False) + sb = connections['elasticsearch'].backend('elasticsearch', URL=url, INDEX_NAME=index_name, + SILENTLY_FAIL=False) try: sb.update(self.smmi, []) self.fail() @@ -502,7 +502,7 @@ def test_more_like_this(self): self.assertEqual([result.pk for result in self.sb.more_like_this(self.sample_objs[0])['results']], []) def test_build_schema(self): - old_ui = connections['elasticsearch2'].get_unified_index() + old_ui = connections['elasticsearch'].get_unified_index() (content_field_name, mapping) = self.sb.build_schema(old_ui.all_searchfields()) self.assertEqual(content_field_name, 'text') @@ -542,17 +542,17 @@ def test_build_schema(self): }) def test_verify_type(self): - old_ui = connections['elasticsearch2'].get_unified_index() + old_ui = connections['elasticsearch'].get_unified_index() ui = UnifiedIndex() smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() ui.build(indexes=[smtmmi]) - connections['elasticsearch2']._index = ui - sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = ui + sb = connections['elasticsearch'].get_backend() sb.update(smtmmi, self.sample_objs) self.assertEqual(sb.search('*:*')['hits'], 3) self.assertEqual([result.month for result in sb.search('*:*')['results']], [u'02', u'02', u'02']) - connections['elasticsearch2']._index = old_ui + connections['elasticsearch']._index = old_ui class CaptureHandler(std_logging.Handler): @@ -576,26 +576,26 @@ def setUp(self): # Stow. # Point the backend at a URL that doesn't exist so we can watch the # sparks fly. - self.old_es_url = settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] - settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] = "%s/foo/" % self.old_es_url + self.old_es_url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] + settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = "%s/foo/" % self.old_es_url self.cap = CaptureHandler() logging.getLogger('haystack').addHandler(self.cap) import haystack logging.getLogger('haystack').removeHandler(haystack.stream) # Setup the rest of the bits. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = ui + self.sb = connections['elasticsearch'].get_backend() def tearDown(self): import haystack # Restore. - settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL'] = self.old_es_url - connections['elasticsearch2']._index = self.old_ui + settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = self.old_es_url + connections['elasticsearch']._index = self.old_ui logging.getLogger('haystack').removeHandler(self.cap) logging.getLogger('haystack').addHandler(haystack.stream) @@ -633,47 +633,47 @@ def setUp(self): clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() - self.sq = connections['elasticsearch2'].get_query() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() + self.sq = connections['elasticsearch'].get_query() # Force indexing of the content. - self.smmi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') def tearDown(self): - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2SearchQueryTestCase, self).tearDown() def test_log_query(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) with self.settings(DEBUG=False): len(self.sq.get_results()) - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) with self.settings(DEBUG=True): # Redefine it to clear out the cached results. - self.sq = connections['elasticsearch2'].query(using='elasticsearch2') + self.sq = connections['elasticsearch'].query(using='elasticsearch') self.sq.add_filter(SQ(name='bar')) len(self.sq.get_results()) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) - self.assertEqual(connections['elasticsearch2'].queries[0]['query_string'], + self.assertEqual(len(connections['elasticsearch'].queries), 1) + self.assertEqual(connections['elasticsearch'].queries[0]['query_string'], 'name:(bar)') # And again, for good measure. - self.sq = connections['elasticsearch2'].query('elasticsearch2') + self.sq = connections['elasticsearch'].query('elasticsearch') self.sq.add_filter(SQ(name='bar')) self.sq.add_filter(SQ(text='moof')) len(self.sq.get_results()) - self.assertEqual(len(connections['elasticsearch2'].queries), 2) - self.assertEqual(connections['elasticsearch2'].queries[0]['query_string'], + self.assertEqual(len(connections['elasticsearch'].queries), 2) + self.assertEqual(connections['elasticsearch'].queries[0]['query_string'], 'name:(bar)') - self.assertEqual(connections['elasticsearch2'].queries[1]['query_string'], + self.assertEqual(connections['elasticsearch'].queries[1]['query_string'], u'(name:(bar) AND text:(moof))') @@ -689,14 +689,14 @@ def setUp(self): super(LiveElasticsearch2SearchQuerySetTestCase, self).setUp() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') - self.rsqs = RelatedSearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') + self.rsqs = RelatedSearchQuerySet('elasticsearch') # Ugly but not constantly reindexing saves us almost 50% runtime. global lssqstc_all_loaded @@ -708,11 +708,11 @@ def setUp(self): clear_elasticsearch_index() # Force indexing of the content. - self.smmi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2SearchQuerySetTestCase, self).tearDown() def test_load_all(self): @@ -724,28 +724,28 @@ def test_load_all(self): def test_iter(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.sqs.all() results = sorted([int(result.pk) for result in sqs]) self.assertEqual(results, list(range(1, 24))) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_slice(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all().order_by('pub_date') self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all().order_by('pub_date') self.assertEqual(int(results[21].pk), 22) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_values_slicing(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) # TODO: this would be a good candidate for refactoring into a TestCase subclass shared across backends @@ -762,50 +762,50 @@ def test_values_slicing(self): results = self.sqs.all().order_by('pub_date').values_list('pk', flat=True) self.assertListEqual(results[1:11], expected_pks) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_count(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.sqs.all() self.assertEqual(sqs.count(), 23) self.assertEqual(sqs.count(), 23) self.assertEqual(len(sqs), 23) self.assertEqual(sqs.count(), 23) # Should only execute one query to count the length of the result set. - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_manual_iter(self): results = self.sqs.all() reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = set([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_fill_cache(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all() self.assertEqual(len(results._result_cache), 0) - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) - self.assertEqual(len(connections['elasticsearch2'].queries), 2) + self.assertEqual(len(connections['elasticsearch'].queries), 2) def test_cache_is_full(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) self.assertEqual(self.sqs._cache_is_full(), False) results = self.sqs.all() fire_the_iterator_and_fill_cache = [result for result in results] self.assertEqual(results._cache_is_full(), True) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test___and__(self): sqs1 = self.sqs.filter(content='foo') @@ -905,62 +905,62 @@ def test_related_load_all_queryset(self): def test_related_iter(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.rsqs.all() results = set([int(result.pk) for result in sqs]) self.assertEqual(results, set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) - self.assertEqual(len(connections['elasticsearch2'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_slice(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) - self.assertEqual(len(connections['elasticsearch2'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 3) reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual(int(results[21].pk), 22) - self.assertEqual(len(connections['elasticsearch2'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 4) reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual(set([int(result.pk) for result in results[20:30]]), set([21, 22, 23])) - self.assertEqual(len(connections['elasticsearch2'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_manual_iter(self): results = self.rsqs.all() reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = sorted([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, list(range(1, 24))) - self.assertEqual(len(connections['elasticsearch2'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_fill_cache(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all() self.assertEqual(len(results._result_cache), 0) - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) - self.assertEqual(len(connections['elasticsearch2'].queries), 1) + self.assertEqual(len(connections['elasticsearch'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) - self.assertEqual(len(connections['elasticsearch2'].queries), 2) + self.assertEqual(len(connections['elasticsearch'].queries), 2) def test_related_cache_is_full(self): reset_search_queries() - self.assertEqual(len(connections['elasticsearch2'].queries), 0) + self.assertEqual(len(connections['elasticsearch'].queries), 0) self.assertEqual(self.rsqs._cache_is_full(), False) results = self.rsqs.all() fire_the_iterator_and_fill_cache = [result for result in results] self.assertEqual(results._cache_is_full(), True) - self.assertEqual(len(connections['elasticsearch2'].queries), 5) + self.assertEqual(len(connections['elasticsearch'].queries), 5) def test_quotes_regression(self): sqs = self.sqs.auto_query(u"44°48'40''N 20°28'32''E") @@ -1036,26 +1036,26 @@ def setUp(self): super(LiveElasticsearch2SpellingTestCase, self).setUp() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSpellingIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') # Wipe it clean. clear_elasticsearch_index() # Reboot the schema. - self.sb = connections['elasticsearch2'].get_backend() + self.sb = connections['elasticsearch'].get_backend() self.sb.setup() - self.smmi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2SpellingTestCase, self).tearDown() def test_spelling(self): @@ -1074,21 +1074,21 @@ def setUp(self): # Wipe it clean. clear_elasticsearch_index() - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockModelSearchIndex() self.sammi = Elasticsearch2AnotherMockModelSearchIndex() self.ui.build(indexes=[self.smmi, self.sammi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') - self.smmi.update(using='elasticsearch2') - self.sammi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') + self.sammi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2MoreLikeThisTestCase, self).tearDown() def test_more_like_this(self): @@ -1132,30 +1132,30 @@ def setUp(self): super(LiveElasticsearch2AutocompleteTestCase, self).setUp() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2AutocompleteMockModelSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') # Wipe it clean. clear_elasticsearch_index() # Reboot the schema. - self.sb = connections['elasticsearch2'].get_backend() + self.sb = connections['elasticsearch'].get_backend() self.sb.setup() - self.smmi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2AutocompleteTestCase, self).tearDown() def test_build_schema(self): - self.sb = connections['elasticsearch2'].get_backend() + self.sb = connections['elasticsearch'].get_backend() content_name, mapping = self.sb.build_schema(self.ui.all_searchfields()) self.assertEqual(mapping, { 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, @@ -1231,14 +1231,14 @@ def setUp(self): clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.srtsi = Elasticsearch2RoundTripSearchIndex() self.ui.build(indexes=[self.srtsi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') # Fake indexing. mock = MockModel() @@ -1247,7 +1247,7 @@ def setUp(self): def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2RoundTripTestCase, self).tearDown() def test_round_trip(self): @@ -1282,21 +1282,21 @@ def setUp(self): clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockModelSearchIndex() self.sammi = Elasticsearch2AnotherMockModelSearchIndex() self.ui.build(indexes=[self.smmi, self.sammi]) - connections['elasticsearch2']._index = self.ui + connections['elasticsearch']._index = self.ui - self.sqs = SearchQuerySet('elasticsearch2') + self.sqs = SearchQuerySet('elasticsearch') - self.smmi.update(using='elasticsearch2') - self.sammi.update(using='elasticsearch2') + self.smmi.update(using='elasticsearch') + self.sammi.update(using='elasticsearch') def tearDown(self): # Restore. - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2PickleTestCase, self).tearDown() def test_pickling(self): @@ -1317,16 +1317,16 @@ def setUp(self): super(Elasticsearch2BoostBackendTestCase, self).setUp() # Wipe it clean. - self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2BoostMockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() self.sample_objs = [] @@ -1345,17 +1345,17 @@ def setUp(self): self.sample_objs.append(mock) def tearDown(self): - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(Elasticsearch2BoostBackendTestCase, self).tearDown() def raw_search(self, query): - return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch2']['INDEX_NAME']) + return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) def test_boost(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*')['hits']['total'], 4) - results = SearchQuerySet(using='elasticsearch2').filter(SQ(author='daniel') | SQ(editor='daniel')) + results = SearchQuerySet(using='elasticsearch').filter(SQ(author='daniel') | SQ(editor='daniel')) self.assertEqual(set([result.id for result in results]), set([ 'core.afourthmockmodel.4', @@ -1379,12 +1379,12 @@ def test__to_python(self): class RecreateIndexTestCase(TestCase): def setUp(self): self.raw_es = elasticsearch.Elasticsearch( - settings.HAYSTACK_CONNECTIONS['elasticsearch2']['URL']) + settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) def test_recreate_index(self): clear_elasticsearch_index() - sb = connections['elasticsearch2'].get_backend() + sb = connections['elasticsearch'].get_backend() sb.silently_fail = True sb.setup() @@ -1410,12 +1410,12 @@ def setUp(self): clear_elasticsearch_index() # Stow. - self.old_ui = connections['elasticsearch2'].get_unified_index() + self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2FacetingMockSearchIndex() self.ui.build(indexes=[self.smmi]) - connections['elasticsearch2']._index = self.ui - self.sb = connections['elasticsearch2'].get_backend() + connections['elasticsearch']._index = self.ui + self.sb = connections['elasticsearch'].get_backend() # Force the backend to rebuild the mapping each time. self.sb.existing_mapping = {} @@ -1438,12 +1438,12 @@ def setUp(self): self.sample_objs.append(mock) def tearDown(self): - connections['elasticsearch2']._index = self.old_ui + connections['elasticsearch']._index = self.old_ui super(Elasticsearch2FacetingTestCase, self).tearDown() def test_facet(self): self.sb.update(self.smmi, self.sample_objs) - counts = SearchQuerySet('elasticsearch2').facet('author').facet('editor').facet_counts() + counts = SearchQuerySet('elasticsearch').facet('author').facet('editor').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 5), ('Dan Watson', 4), @@ -1452,7 +1452,7 @@ def test_facet(self): ('Perry White', 5), ('George Taylor', 4), ]) - counts = SearchQuerySet('elasticsearch2').filter(content='white').facet('facet_field', + counts = SearchQuerySet('elasticsearch').filter(content='white').facet('facet_field', order='reverse_count').facet_counts() self.assertEqual(counts['fields']['facet_field'], [ ('Dan Watson', 2), @@ -1461,7 +1461,7 @@ def test_facet(self): def test_multiple_narrow(self): self.sb.update(self.smmi, self.sample_objs) - counts = SearchQuerySet('elasticsearch2').narrow('editor_exact:"Perry White"').narrow( + counts = SearchQuerySet('elasticsearch').narrow('editor_exact:"Perry White"').narrow( 'author_exact:"Daniel Lindsley"').facet('author').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 3), @@ -1469,7 +1469,7 @@ def test_multiple_narrow(self): def test_narrow(self): self.sb.update(self.smmi, self.sample_objs) - counts = SearchQuerySet('elasticsearch2').facet('author').facet('editor').narrow( + counts = SearchQuerySet('elasticsearch').facet('author').facet('editor').narrow( 'editor_exact:"Perry White"').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 3), @@ -1484,8 +1484,8 @@ def test_date_facet(self): start = datetime.date(2013, 9, 1) end = datetime.date(2013, 9, 30) # Facet by day - counts = SearchQuerySet('elasticsearch2').date_facet('pub_date', start_date=start, end_date=end, - gap_by='day').facet_counts() + counts = SearchQuerySet('elasticsearch').date_facet('pub_date', start_date=start, end_date=end, + gap_by='day').facet_counts() self.assertEqual(counts['dates']['pub_date'], [ (datetime.datetime(2013, 9, 1), 2), (datetime.datetime(2013, 9, 2), 3), @@ -1493,8 +1493,8 @@ def test_date_facet(self): (datetime.datetime(2013, 9, 4), 2), ]) # By month - counts = SearchQuerySet('elasticsearch2').date_facet('pub_date', start_date=start, end_date=end, - gap_by='month').facet_counts() + counts = SearchQuerySet('elasticsearch').date_facet('pub_date', start_date=start, end_date=end, + gap_by='month').facet_counts() self.assertEqual(counts['dates']['pub_date'], [ (datetime.datetime(2013, 9, 1), 9), ]) diff --git a/test_haystack/elasticsearch2_tests/test_inputs.py b/test_haystack/elasticsearch2_tests/test_inputs.py index 777334fb2..adc87d16d 100644 --- a/test_haystack/elasticsearch2_tests/test_inputs.py +++ b/test_haystack/elasticsearch2_tests/test_inputs.py @@ -10,7 +10,7 @@ class Elasticsearch2InputTestCase(TestCase): def setUp(self): super(Elasticsearch2InputTestCase, self).setUp() - self.query_obj = connections['elasticsearch2'].get_query() + self.query_obj = connections['elasticsearch'].get_query() def test_raw_init(self): raw = inputs.Raw('hello OR there, :you') diff --git a/test_haystack/elasticsearch2_tests/test_query.py b/test_haystack/elasticsearch2_tests/test_query.py index 968180686..65d3cfef0 100644 --- a/test_haystack/elasticsearch2_tests/test_query.py +++ b/test_haystack/elasticsearch2_tests/test_query.py @@ -17,7 +17,7 @@ class Elasticsearch2SearchQueryTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchQueryTestCase, self).setUp() - self.sq = connections['elasticsearch2'].get_query() + self.sq = connections['elasticsearch'].get_query() def test_build_query_all(self): self.assertEqual(self.sq.build_query(), '*:*') @@ -157,7 +157,7 @@ def test_in_filter_values_list(self): self.assertEqual(self.sq.build_query(), u'((why) AND title:("1" OR "2" OR "3"))') def test_narrow_sq(self): - sqs = SearchQuerySet(using='elasticsearch2').narrow(SQ(foo='moof')) + sqs = SearchQuerySet(using='elasticsearch').narrow(SQ(foo='moof')) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.narrow_queries), 1) self.assertEqual(sqs.query.narrow_queries.pop(), 'foo:(moof)') @@ -166,7 +166,7 @@ def test_narrow_sq(self): class Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase, self).setUp() - self.backend = connections['elasticsearch2'].get_backend() + self.backend = connections['elasticsearch'].get_backend() self._elasticsearch_version = elasticsearch.VERSION elasticsearch.VERSION = (0, 9, 9) @@ -189,7 +189,7 @@ def test_build_query_with_dwithin_range(self): class Elasticsearch2SearchQuerySpatialAfterReleaseTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchQuerySpatialAfterReleaseTestCase, self).setUp() - self.backend = connections['elasticsearch2'].get_backend() + self.backend = connections['elasticsearch'].get_backend() self._elasticsearch_version = elasticsearch.VERSION elasticsearch.VERSION = (1, 0, 0) diff --git a/test_haystack/elasticsearch_tests/__init__.py b/test_haystack/elasticsearch_tests/__init__.py index 4066af099..d27d2a93c 100644 --- a/test_haystack/elasticsearch_tests/__init__.py +++ b/test_haystack/elasticsearch_tests/__init__.py @@ -10,6 +10,9 @@ def setup(): try: + import elasticsearch + if not ((1, 0, 0) <= elasticsearch.__version__ < (2, 0, 0)): + raise ImportError from elasticsearch import Elasticsearch, ElasticsearchException except ImportError: raise unittest.SkipTest("elasticsearch-py not installed.") @@ -18,5 +21,5 @@ def setup(): try: es.info() except ElasticsearchException as e: - raise unittest.SkipTest("elasticsearch not running on %r" % settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], e) - + raise unittest.SkipTest( + "elasticsearch not running on %r" % settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], e) diff --git a/test_haystack/settings.py b/test_haystack/settings.py index 0fa301297..14cfb7517 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -84,12 +84,6 @@ 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, - 'elasticsearch2': { - 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', - 'URL': 'http://127.0.0.1:29200/', - 'INDEX_NAME': 'test_default', - 'INCLUDE_SPELLING': True, - }, 'simple': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', }, @@ -99,3 +93,11 @@ 'INCLUDE_SPELLING': True, }, } + +if os.getenv('VERSION_ES') == ">=2.0.0,<3.0.0": + HAYSTACK_CONNECTIONS['elasticsearch'] = { + 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', + 'URL': '127.0.0.1:9200/', + 'INDEX_NAME': 'test_default', + 'INCLUDE_SPELLING': True, + } diff --git a/tox.ini b/tox.ini index b63dc9c0f..1c71c9bbc 100644 --- a/tox.ini +++ b/tox.ini @@ -1,13 +1,21 @@ [tox] envlist = docs, - py27-django1.8, - py27-django1.9, - py34-django1.8, - py34-django1.9, - py35-django1.8, - py35-django1.9, - pypy-django1.8, - pypy-django1.9, + py27-django1.8-es1.x, + py27-django1.9-es1.x, + py34-django1.8-es1.x, + py34-django1.9-es1.x, + py35-django1.8-es1.x, + py35-django1.9-es1.x, + pypy-django1.8-es1.x, + pypy-django1.9-es1.x, + py27-django1.8-es2.x, + py27-django1.9-es2.x, + py34-django1.8-es2.x, + py34-django1.9-es2.x, + py35-django1.8-es2.x, + py35-django1.9-es2.x, + pypy-django1.8-es2.x, + pypy-django1.9-es2.x, [base] deps = requests @@ -20,54 +28,140 @@ deps = deps = Django>=1.8,<1.9 +[es2.x] +deps = + elasticsearch>=2.0.0,<3.0.0 + +[es1.x] +deps = + elasticsearch>=1.0.0,<2.0.0 + [testenv] commands = python test_haystack/solr_tests/server/wait-for-solr python {toxinidir}/setup.py test -[testenv:pypy-django1.8] +[testenv:pypy-django1.8-es1.x] +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:pypy-django1.9-es1.x] +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:py27-django1.8-es1.x] +basepython = python2.7 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:py27-django1.9-es1.x] +basepython = python2.7 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:py34-django1.8-es1.x] +basepython = python3.4 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:py34-django1.9-es1.x] +basepython = python3.4 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:py35-django1.8-es1.x] +basepython = python3.5 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.8]deps} + {[base]deps} + +[testenv:py35-django1.9-es1.x] +basepython = python3.5 +setenv = VERSION_ES=>=1.0.0,<2.0.0 +deps = + {[es1.x]deps} + {[django1.9]deps} + {[base]deps} + +[testenv:pypy-django1.8-es2.x] +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:pypy-django1.9] +[testenv:pypy-django1.9-es2.x] +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} -[testenv:py27-django1.8] +[testenv:py27-django1.8-es2.x] basepython = python2.7 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:py27-django1.9] +[testenv:py27-django1.9-es2.x] basepython = python2.7 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} -[testenv:py34-django1.8] +[testenv:py34-django1.8-es2.x] basepython = python3.4 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:py34-django1.9] +[testenv:py34-django1.9-es2.x] basepython = python3.4 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} -[testenv:py35-django1.8] +[testenv:py35-django1.8-es2.x] basepython = python3.5 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.8]deps} {[base]deps} -[testenv:py35-django1.9] +[testenv:py35-django1.9-es2.x] basepython = python3.5 +setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = + {[es2.x]deps} {[django1.9]deps} {[base]deps} From cbf9b446d4dae5ccd5144a10e1c40eb58ac3e4ba Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 21:54:30 +0100 Subject: [PATCH 36/51] Uses a build matrix to test ES 1.x or ES 2.x - set literals on tests --- .../elasticsearch2_tests/test_backend.py | 45 +++++++++---------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py index cdefaacc7..b321a4eb4 100644 --- a/test_haystack/elasticsearch2_tests/test_backend.py +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -417,7 +417,7 @@ def test_search(self): self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*:*')['hits'], 3) - self.assertEqual(set([result.pk for result in self.sb.search('*:*')['results']]), set([u'2', u'1', u'3'])) + self.assertEqual(set([result.pk for result in self.sb.search('*:*')['results']]), {u'2', u'1', u'3'}) self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('Index', highlight=True)['hits'], 3) @@ -434,7 +434,7 @@ def test_search(self): self.assertEqual(results['hits'], 3) self.assertSetEqual( set(results['facets']['fields']['name']), - set([('daniel3', 1), ('daniel2', 1), ('daniel1', 1)]) + {('daniel3', 1), ('daniel2', 1), ('daniel1', 1)} ) self.assertEqual(self.sb.search('', date_facets={ @@ -451,8 +451,8 @@ def test_search(self): self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['queries'], {u'name': 3}) - self.assertEqual(self.sb.search('', narrow_queries=set(['name:daniel1'])), {'hits': 0, 'results': []}) - results = self.sb.search('Index', narrow_queries=set(['name:daniel1'])) + self.assertEqual(self.sb.search('', narrow_queries={'name:daniel1'}), {'hits': 0, 'results': []}) + results = self.sb.search('Index', narrow_queries={'name:daniel1'}) self.assertEqual(results['hits'], 1) # Ensure that swapping the ``result_class`` works. @@ -782,7 +782,7 @@ def test_manual_iter(self): self.assertEqual(len(connections['elasticsearch'].queries), 0) results = set([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, - set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) + {2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20}) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_fill_cache(self): @@ -900,8 +900,8 @@ def test_related_load_all_queryset(self): sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=10)) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs._load_all_querysets), 1) - self.assertEqual(set([obj.object.id for obj in sqs]), set([12, 17, 11, 16, 23, 15, 22, 14, 19, 21, 13, 18, 20])) - self.assertEqual(set([obj.object.id for obj in sqs[10:20]]), set([21, 22, 23])) + self.assertEqual(set([obj.object.id for obj in sqs]), {12, 17, 11, 16, 23, 15, 22, 14, 19, 21, 13, 18, 20}) + self.assertEqual(set([obj.object.id for obj in sqs[10:20]]), {21, 22, 23}) def test_related_iter(self): reset_search_queries() @@ -909,7 +909,7 @@ def test_related_iter(self): sqs = self.rsqs.all() results = set([int(result.pk) for result in sqs]) self.assertEqual(results, - set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) + {2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20}) self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_slice(self): @@ -928,7 +928,7 @@ def test_related_slice(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') - self.assertEqual(set([int(result.pk) for result in results[20:30]]), set([21, 22, 23])) + self.assertEqual(set([int(result.pk) for result in results[20:30]]), {21, 22, 23}) self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_manual_iter(self): @@ -1095,19 +1095,19 @@ def test_more_like_this(self): mlt = self.sqs.more_like_this(MockModel.objects.get(pk=1)) results = [result.pk for result in mlt] self.assertEqual(mlt.count(), 11) - self.assertEqual(set(results), set([u'10', u'5', u'2', u'21', u'4', u'6', u'23', u'9', u'14'])) + self.assertEqual(set(results), {u'10', u'5', u'2', u'21', u'4', u'6', u'23', u'9', u'14'}) self.assertEqual(len(results), 10) alt_mlt = self.sqs.filter(name='daniel3').more_like_this(MockModel.objects.get(pk=2)) results = [result.pk for result in alt_mlt] self.assertEqual(alt_mlt.count(), 9) - self.assertEqual(set(results), set([u'2', u'16', u'3', u'19', u'4', u'17', u'10', u'22', u'23'])) + self.assertEqual(set(results), {u'2', u'16', u'3', u'19', u'4', u'17', u'10', u'22', u'23'}) self.assertEqual(len(results), 9) alt_mlt_with_models = self.sqs.models(MockModel).more_like_this(MockModel.objects.get(pk=1)) results = [result.pk for result in alt_mlt_with_models] self.assertEqual(alt_mlt_with_models.count(), 10) - self.assertEqual(set(results), set([u'10', u'5', u'21', u'2', u'4', u'6', u'23', u'9', u'14', u'16'])) + self.assertEqual(set(results), {u'10', u'5', u'21', u'2', u'4', u'6', u'23', u'9', u'14', u'16'}) self.assertEqual(len(results), 10) if hasattr(MockModel.objects, 'defer'): @@ -1184,8 +1184,8 @@ def test_build_schema(self): def test_autocomplete(self): autocomplete = self.sqs.autocomplete(text_auto='mod') self.assertEqual(autocomplete.count(), 16) - self.assertEqual(set([result.pk for result in autocomplete]), set( - ['1', '12', '6', '14', '7', '4', '23', '17', '13', '18', '20', '22', '19', '15', '10', '2'])) + self.assertEqual(set([result.pk for result in autocomplete]), + {'1', '12', '6', '14', '7', '4', '23', '17', '13', '18', '20', '22', '19', '15', '10', '2'}) self.assertTrue('mod' in autocomplete[0].text.lower()) self.assertTrue('mod' in autocomplete[1].text.lower()) self.assertTrue('mod' in autocomplete[2].text.lower()) @@ -1197,7 +1197,7 @@ def test_autocomplete(self): autocomplete_2 = self.sqs.autocomplete(text_auto='your mod') self.assertEqual(autocomplete_2.count(), 13) self.assertEqual(set([result.pk for result in autocomplete_2]), - set(['1', '6', '2', '14', '12', '13', '10', '19', '4', '20', '23', '22', '15'])) + {'1', '6', '2', '14', '12', '13', '10', '19', '4', '20', '23', '22', '15'}) map_results = {result.pk: result for result in autocomplete_2} self.assertTrue('your' in map_results['1'].text.lower()) self.assertTrue('mod' in map_results['1'].text.lower()) @@ -1209,18 +1209,18 @@ def test_autocomplete(self): # Test multiple fields. autocomplete_3 = self.sqs.autocomplete(text_auto='Django', name_auto='dan') self.assertEqual(autocomplete_3.count(), 4) - self.assertEqual(set([result.pk for result in autocomplete_3]), set(['12', '1', '22', '14'])) + self.assertEqual(set([result.pk for result in autocomplete_3]), {'12', '1', '22', '14'}) self.assertEqual(len([result.pk for result in autocomplete_3]), 4) # Test numbers in phrases autocomplete_4 = self.sqs.autocomplete(text_auto='Jen 867') self.assertEqual(autocomplete_4.count(), 1) - self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) + self.assertEqual(set([result.pk for result in autocomplete_4]), {'20'}) # Test numbers alone autocomplete_4 = self.sqs.autocomplete(text_auto='867') self.assertEqual(autocomplete_4.count(), 1) - self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) + self.assertEqual(set([result.pk for result in autocomplete_4]), {'20'}) class LiveElasticsearch2RoundTripTestCase(TestCase): @@ -1357,12 +1357,9 @@ def test_boost(self): results = SearchQuerySet(using='elasticsearch').filter(SQ(author='daniel') | SQ(editor='daniel')) - self.assertEqual(set([result.id for result in results]), set([ - 'core.afourthmockmodel.4', - 'core.afourthmockmodel.3', - 'core.afourthmockmodel.1', - 'core.afourthmockmodel.2' - ])) + self.assertEqual(set([result.id for result in results]), + {'core.afourthmockmodel.4', 'core.afourthmockmodel.3', 'core.afourthmockmodel.1', + 'core.afourthmockmodel.2'}) def test__to_python(self): self.assertEqual(self.sb._to_python('abc'), 'abc') From 7be7e03251dc05073282b777ea2405005150d1be Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 22:33:27 +0100 Subject: [PATCH 37/51] Uses a build matrix to test ES 1.x or ES 2.x - set literals on tests --- test_haystack/elasticsearch2_tests/test_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_haystack/elasticsearch2_tests/test_query.py b/test_haystack/elasticsearch2_tests/test_query.py index 65d3cfef0..c66191c59 100644 --- a/test_haystack/elasticsearch2_tests/test_query.py +++ b/test_haystack/elasticsearch2_tests/test_query.py @@ -105,7 +105,7 @@ def test_build_query_in_filter_datetime(self): def test_build_query_in_with_set(self): self.sq.add_filter(SQ(content='why')) - self.sq.add_filter(SQ(title__in=set(["A Famous Paper", "An Infamous Article"]))) + self.sq.add_filter(SQ(title__in={"A Famous Paper", "An Infamous Article"})) self.assertTrue('((why) AND title:(' in self.sq.build_query()) self.assertTrue('"A Famous Paper"' in self.sq.build_query()) self.assertTrue('"An Infamous Article"' in self.sq.build_query()) From 19b0c74a3ad43963989005704932a62ca636168e Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 23:49:50 +0100 Subject: [PATCH 38/51] Adds delay on test_multiprocessing test_haystack.solr_tests.test_management_commands.ManagementCommandTestCase#test_multiprocessing --- test_haystack/solr_tests/test_management_commands.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_haystack/solr_tests/test_management_commands.py b/test_haystack/solr_tests/test_management_commands.py index 9cdaf7948..fbf475e4d 100644 --- a/test_haystack/solr_tests/test_management_commands.py +++ b/test_haystack/solr_tests/test_management_commands.py @@ -3,6 +3,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals import datetime +import time from tempfile import mkdtemp import pysolr @@ -157,6 +158,7 @@ def test_multiprocessing(self): # TODO: Watch the output, make sure there are multiple pids. call_command('update_index', verbosity=2, workers=2, batchsize=5) + time.sleep(2) self.assertEqual(self.solr.search('*:*').hits, 23) call_command('clear_index', interactive=False, verbosity=0) From e95de985ef8fa0e192fc4ecbcb901bd3b5ddf4a2 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Fri, 25 Mar 2016 00:01:39 +0100 Subject: [PATCH 39/51] Revert: Adds delay on test_multiprocessing test_haystack.solr_tests.test_management_commands.ManagementCommandTestCase#test_multiprocessing --- test_haystack/solr_tests/test_management_commands.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test_haystack/solr_tests/test_management_commands.py b/test_haystack/solr_tests/test_management_commands.py index fbf475e4d..9cdaf7948 100644 --- a/test_haystack/solr_tests/test_management_commands.py +++ b/test_haystack/solr_tests/test_management_commands.py @@ -3,7 +3,6 @@ from __future__ import absolute_import, division, print_function, unicode_literals import datetime -import time from tempfile import mkdtemp import pysolr @@ -158,7 +157,6 @@ def test_multiprocessing(self): # TODO: Watch the output, make sure there are multiple pids. call_command('update_index', verbosity=2, workers=2, batchsize=5) - time.sleep(2) self.assertEqual(self.solr.search('*:*').hits, 23) call_command('clear_index', interactive=False, verbosity=0) From e287b4a8885f99beb0cf38c202a9df498afcbb83 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Fri, 25 Mar 2016 12:58:02 +0100 Subject: [PATCH 40/51] Removes pool.join on command update_index --- haystack/management/commands/update_index.py | 1 - 1 file changed, 1 deletion(-) diff --git a/haystack/management/commands/update_index.py b/haystack/management/commands/update_index.py index 995a1db78..52c489f37 100755 --- a/haystack/management/commands/update_index.py +++ b/haystack/management/commands/update_index.py @@ -217,7 +217,6 @@ def update_backend(self, label, using): pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() - pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: From 9babc67b298a3ef2a3740e0b471b1bd74f991981 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Fri, 25 Mar 2016 13:04:57 +0100 Subject: [PATCH 41/51] Revert: Test multiprocessing with context manager --- haystack/management/commands/update_index.py | 1 + 1 file changed, 1 insertion(+) diff --git a/haystack/management/commands/update_index.py b/haystack/management/commands/update_index.py index 52c489f37..995a1db78 100755 --- a/haystack/management/commands/update_index.py +++ b/haystack/management/commands/update_index.py @@ -217,6 +217,7 @@ def update_backend(self, label, using): pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() + pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: From 2e372ee5050bf15133affa0d62f202dec57d6705 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 16:18:19 +0100 Subject: [PATCH 42/51] Elasticsearch 2.x support --- test_haystack/settings.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test_haystack/settings.py b/test_haystack/settings.py index 14cfb7517..31ad91875 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -84,6 +84,12 @@ 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, + 'elasticsearch2': { + 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', + 'URL': 'http://192.168.99.100:9200/', + 'INDEX_NAME': 'test_default', + 'INCLUDE_SPELLING': True, + }, 'simple': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', }, From 1c97df85e935014b64a8042eaf32b2a22c6b4756 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 16:29:00 +0100 Subject: [PATCH 43/51] Elasticsearch 2.x support - Fix localhost IP in elasticsearch2 settings --- test_haystack/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_haystack/settings.py b/test_haystack/settings.py index 31ad91875..8c9ffdfa3 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -86,7 +86,7 @@ }, 'elasticsearch2': { 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', - 'URL': 'http://192.168.99.100:9200/', + 'URL': 'http://127.0.0.1:9200/', 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, From a79ee14c583126c3bda30afcda3c75ecc09837d4 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Wed, 23 Mar 2016 17:26:30 +0100 Subject: [PATCH 44/51] Launchs ES 2.x on Travis - Port to connect 29200 --- test_haystack/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_haystack/settings.py b/test_haystack/settings.py index 8c9ffdfa3..a15346112 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -86,7 +86,7 @@ }, 'elasticsearch2': { 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', - 'URL': 'http://127.0.0.1:9200/', + 'URL': 'http://127.0.0.1:29200/', 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, From 092a486c322d9d5fee3bc0471bd660fe84a79ba7 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 01:59:57 +0100 Subject: [PATCH 45/51] Uses a build matrix to test ES 1.x or ES 2.x --- test_haystack/elasticsearch_tests/__init__.py | 3 ++- test_haystack/settings.py | 6 ------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/test_haystack/elasticsearch_tests/__init__.py b/test_haystack/elasticsearch_tests/__init__.py index d27d2a93c..1736e1590 100644 --- a/test_haystack/elasticsearch_tests/__init__.py +++ b/test_haystack/elasticsearch_tests/__init__.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# -*- coding: utf-8 -*- import unittest import warnings @@ -8,6 +8,7 @@ warnings.simplefilter('ignore', Warning) + def setup(): try: import elasticsearch diff --git a/test_haystack/settings.py b/test_haystack/settings.py index a15346112..14cfb7517 100644 --- a/test_haystack/settings.py +++ b/test_haystack/settings.py @@ -84,12 +84,6 @@ 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, - 'elasticsearch2': { - 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', - 'URL': 'http://127.0.0.1:29200/', - 'INDEX_NAME': 'test_default', - 'INCLUDE_SPELLING': True, - }, 'simple': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', }, From b809b31abc3f267eb74f3e9f18a232975e9d1dbc Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Thu, 24 Mar 2016 23:49:50 +0100 Subject: [PATCH 46/51] Adds delay on test_multiprocessing test_haystack.solr_tests.test_management_commands.ManagementCommandTestCase#test_multiprocessing --- test_haystack/solr_tests/test_management_commands.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_haystack/solr_tests/test_management_commands.py b/test_haystack/solr_tests/test_management_commands.py index 9cdaf7948..fbf475e4d 100644 --- a/test_haystack/solr_tests/test_management_commands.py +++ b/test_haystack/solr_tests/test_management_commands.py @@ -3,6 +3,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals import datetime +import time from tempfile import mkdtemp import pysolr @@ -157,6 +158,7 @@ def test_multiprocessing(self): # TODO: Watch the output, make sure there are multiple pids. call_command('update_index', verbosity=2, workers=2, batchsize=5) + time.sleep(2) self.assertEqual(self.solr.search('*:*').hits, 23) call_command('clear_index', interactive=False, verbosity=0) From 8960ca55e6c38fc59ec05b401d46f5932a50049b Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Fri, 25 Mar 2016 00:01:39 +0100 Subject: [PATCH 47/51] Revert: Adds delay on test_multiprocessing test_haystack.solr_tests.test_management_commands.ManagementCommandTestCase#test_multiprocessing --- test_haystack/solr_tests/test_management_commands.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test_haystack/solr_tests/test_management_commands.py b/test_haystack/solr_tests/test_management_commands.py index fbf475e4d..9cdaf7948 100644 --- a/test_haystack/solr_tests/test_management_commands.py +++ b/test_haystack/solr_tests/test_management_commands.py @@ -3,7 +3,6 @@ from __future__ import absolute_import, division, print_function, unicode_literals import datetime -import time from tempfile import mkdtemp import pysolr @@ -158,7 +157,6 @@ def test_multiprocessing(self): # TODO: Watch the output, make sure there are multiple pids. call_command('update_index', verbosity=2, workers=2, batchsize=5) - time.sleep(2) self.assertEqual(self.solr.search('*:*').hits, 23) call_command('clear_index', interactive=False, verbosity=0) From 4b9dcf8fa0bc990e8d157f02d23d801d011df1bb Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Fri, 25 Mar 2016 12:58:02 +0100 Subject: [PATCH 48/51] Removes pool.join on command update_index --- haystack/management/commands/update_index.py | 1 - 1 file changed, 1 deletion(-) diff --git a/haystack/management/commands/update_index.py b/haystack/management/commands/update_index.py index 995a1db78..52c489f37 100755 --- a/haystack/management/commands/update_index.py +++ b/haystack/management/commands/update_index.py @@ -217,7 +217,6 @@ def update_backend(self, label, using): pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() - pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: From 30a6a1d9b007cb786866183a7777d3f77a1cb28f Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Fri, 25 Mar 2016 13:04:57 +0100 Subject: [PATCH 49/51] Revert: Test multiprocessing with context manager --- haystack/management/commands/update_index.py | 1 + 1 file changed, 1 insertion(+) diff --git a/haystack/management/commands/update_index.py b/haystack/management/commands/update_index.py index 52c489f37..995a1db78 100755 --- a/haystack/management/commands/update_index.py +++ b/haystack/management/commands/update_index.py @@ -217,6 +217,7 @@ def update_backend(self, label, using): pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() + pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: From fe0d37498e639f336c76a549273a90926f2de345 Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Sun, 27 Mar 2016 16:37:36 +0200 Subject: [PATCH 50/51] Renames an non-existent fixture file --- test_haystack/elasticsearch2_tests/test_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py index b321a4eb4..d8388359d 100644 --- a/test_haystack/elasticsearch2_tests/test_backend.py +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -624,7 +624,7 @@ def test_all_cases(self): class LiveElasticsearch2SearchQueryTestCase(TestCase): - fixtures = ['initial_data.json'] + fixtures = ['base_data.json'] def setUp(self): super(LiveElasticsearch2SearchQueryTestCase, self).setUp() From d42090f5dda4323e5d377a9ec4fda719c8ea365d Mon Sep 17 00:00:00 2001 From: Pedro Aquilino Date: Mon, 28 Mar 2016 11:06:42 +0200 Subject: [PATCH 51/51] Fix differences on connection.queries counts --- .../elasticsearch2_tests/test_backend.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/test_haystack/elasticsearch2_tests/test_backend.py b/test_haystack/elasticsearch2_tests/test_backend.py index d8388359d..14fd3b1aa 100644 --- a/test_haystack/elasticsearch2_tests/test_backend.py +++ b/test_haystack/elasticsearch2_tests/test_backend.py @@ -4,9 +4,11 @@ import datetime import logging as std_logging import operator +import unittest from decimal import Decimal import elasticsearch +from django.apps import apps from django.conf import settings from django.test import TestCase from django.test.utils import override_settings @@ -21,7 +23,6 @@ from haystack.utils.loading import UnifiedIndex from ..core.models import AFourthMockModel, AnotherMockModel, ASixthMockModel, MockModel from ..mocks import MockSearchResult -from ..utils import unittest test_pickling = True @@ -580,8 +581,8 @@ def setUp(self): settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = "%s/foo/" % self.old_es_url self.cap = CaptureHandler() logging.getLogger('haystack').addHandler(self.cap) - import haystack - logging.getLogger('haystack').removeHandler(haystack.stream) + config = apps.get_app_config('haystack') + logging.getLogger('haystack').removeHandler(config.stream) # Setup the rest of the bits. self.old_ui = connections['elasticsearch'].get_unified_index() @@ -592,12 +593,12 @@ def setUp(self): self.sb = connections['elasticsearch'].get_backend() def tearDown(self): - import haystack # Restore. settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = self.old_es_url connections['elasticsearch']._index = self.old_ui + config = apps.get_app_config('haystack') logging.getLogger('haystack').removeHandler(self.cap) - logging.getLogger('haystack').addHandler(haystack.stream) + logging.getLogger('haystack').addHandler(config.stream) @unittest.expectedFailure def test_all_cases(self): @@ -910,26 +911,26 @@ def test_related_iter(self): results = set([int(result.pk) for result in sqs]) self.assertEqual(results, {2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20}) - self.assertEqual(len(connections['elasticsearch'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_related_slice(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) - self.assertEqual(len(connections['elasticsearch'].queries), 3) + self.assertEqual(len(connections['elasticsearch'].queries), 1) reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual(int(results[21].pk), 22) - self.assertEqual(len(connections['elasticsearch'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 1) reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual(set([int(result.pk) for result in results[20:30]]), {21, 22, 23}) - self.assertEqual(len(connections['elasticsearch'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_related_manual_iter(self): results = self.rsqs.all() @@ -938,7 +939,7 @@ def test_related_manual_iter(self): self.assertEqual(len(connections['elasticsearch'].queries), 0) results = sorted([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, list(range(1, 24))) - self.assertEqual(len(connections['elasticsearch'].queries), 4) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_related_fill_cache(self): reset_search_queries() @@ -960,7 +961,7 @@ def test_related_cache_is_full(self): results = self.rsqs.all() fire_the_iterator_and_fill_cache = [result for result in results] self.assertEqual(results._cache_is_full(), True) - self.assertEqual(len(connections['elasticsearch'].queries), 5) + self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_quotes_regression(self): sqs = self.sqs.auto_query(u"44°48'40''N 20°28'32''E")