Skip to content

Commit

Permalink
Added support for Elasticsearch 1.0+
Browse files Browse the repository at this point in the history
Elasticsearch 0.90.x is quite out-of-date, and this requirement is preventing us from sharing an ES cluster with newer IDAs.

ECOM-3261 and SOL-295
  • Loading branch information
clintonb committed Jan 5, 2016
1 parent 14ca307 commit 753fa9c
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 56 deletions.
6 changes: 3 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ cache:
- $HOME/.cache/pip

before_install:
- curl -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.13.zip
- unzip elasticsearch-0.90.13.zip
- elasticsearch-0.90.13/bin/elasticsearch
- curl -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.5.2.zip
- unzip elasticsearch-1.5.2.zip
- elasticsearch-1.5.2/bin/elasticsearch -d


install:
Expand Down
55 changes: 23 additions & 32 deletions search/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.conf import settings
from django.core.cache import cache
from elasticsearch import Elasticsearch, exceptions
from elasticsearch.helpers import bulk
from elasticsearch.helpers import bulk, BulkIndexError

from search.search_engine_base import SearchEngine
from search.utils import ValueRange, _is_iterable
Expand Down Expand Up @@ -244,25 +244,25 @@ def _get_mappings(self, doc_type):
We cache the properties of each doc_type, if they are not available, we'll load them again from Elasticsearch
"""
doc_mappings = ElasticSearchEngine.get_mappings(self.index_name, doc_type)
if not doc_mappings:
try:
doc_mappings = self._es.indices.get_mapping(
index=self.index_name,
doc_type=doc_type,
)[doc_type]
# Try loading the mapping from the cache.
mapping = ElasticSearchEngine.get_mappings(self.index_name, doc_type)

# Fall back to Elasticsearch
if not mapping:
mapping = self._es.indices.get_mapping(
index=self.index_name,
doc_type=doc_type,
).get(self.index_name, {}).get('mappings', {}).get(doc_type, {})

# Cache the mapping, if one was retrieved
if mapping:
ElasticSearchEngine.set_mappings(
self.index_name,
doc_type,
doc_mappings
mapping
)
except exceptions.NotFoundError:
# In this case there are no mappings for this doc_type on the elasticsearch server
# This is a normal case when a new doc_type is being created, and it is expected that
# we'll hit it for new doc_type s
return {}

return doc_mappings
return mapping

def _clear_mapping(self, doc_type):
""" Remove the cached mappings, so that they get loaded from ES next time they are requested """
Expand Down Expand Up @@ -393,30 +393,21 @@ def remove(self, doc_type, doc_ids, **kwargs):
# pylint: disable=unexpected-keyword-arg
actions = []
for doc_id in doc_ids:
log.debug("remove index for %s object with id %s", doc_type, doc_id)
log.debug("Removing document of type %s and index %s", doc_type, doc_id)
action = {
'_op_type': 'delete',
"_index": self.index_name,
"_type": doc_type,
"_id": doc_id
}
actions.append(action)
# bulk() returns a tuple with summary information
# number of successfully executed actions and number of errors if stats_only is set to True.
_, indexing_errors = bulk(
self._es,
actions,
# let notfound not cause error
ignore=[404],
**kwargs
)
if indexing_errors:
ElasticSearchEngine.log_indexing_error(indexing_errors)
# Broad exception handler to protect around bulk call
except Exception as ex:
# log information and re-raise
log.exception("error while deleting document from index - %s", ex.message)
raise ex
bulk(self._es, actions, **kwargs)
except BulkIndexError as ex:
valid_errors = [error for error in ex.errors if error['delete']['status'] != 404]

if valid_errors:
log.exception("An error occurred while removing documents from the index.")
raise

# A few disabled pylint violations here:
# This procedure takes each of the possible input parameters and builds the query with each argument
Expand Down
23 changes: 14 additions & 9 deletions search/tests/test_engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,22 @@
# Some of the subclasses that get used as settings-overrides will yield this pylint
# error, but they do get used when included as part of the override_settings
# pylint: disable=too-few-public-methods
""" Tests for search functionalty """
from datetime import datetime
""" Tests for search functionality """
import json
import os
from datetime import datetime

from mock import patch
from django.test import TestCase
from django.test.utils import override_settings
from elasticsearch import exceptions
from elasticsearch.helpers import BulkIndexError
from mock import patch

from search.api import perform_search, NoSearchEngineError
from search.elastic import RESERVED_CHARACTERS
from search.tests.mock_search_engine import MockSearchEngine, json_date_to_datetime
from search.tests.tests import MockSearchTests
from search.tests.utils import ErroringElasticImpl, SearcherMixin
from search.api import perform_search, NoSearchEngineError

from .mock_search_engine import MockSearchEngine, json_date_to_datetime
from .tests import MockSearchTests


@override_settings(SEARCH_ENGINE="search.tests.utils.ForceRefreshElasticSearchEngine")
Expand Down Expand Up @@ -201,8 +201,13 @@ def test_search_failure(self):

def test_remove_failure_bulk(self):
""" the remove operation should fail """
with patch('search.elastic.bulk', return_value=[0, [exceptions.ElasticsearchException()]]):
with self.assertRaises(exceptions.ElasticsearchException):
doc_id = 'test_id'
doc_type = 'test_doc'
error = {'delete': {
'status': 500, '_type': doc_type, '_index': 'test_index', '_version': 1, 'found': True, '_id': doc_id
}}
with patch('search.elastic.bulk', side_effect=BulkIndexError('Simulated error', [error])):
with self.assertRaises(BulkIndexError):
self.searcher.remove("test_doc", ["test_id"])

def test_remove_failure_general(self):
Expand Down
11 changes: 1 addition & 10 deletions search/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,16 +369,7 @@ def test_delete_item_slashes(self):

def test_delete_item_not_present(self):
""" make sure that we get no error removing an item that does not exist """
test_string = "This is a test of the emergency broadcast system"
self.searcher.index("test_doc", [{"id": "FAKE_ID", "content": {"name": "abc"}}])
self.searcher.remove("test_doc", ["FAKE_ID"])

response = self.searcher.search(test_string)
self.assertEqual(response["total"], 0)

self.searcher.remove("test_doc", ["FAKE_ID"])
response = self.searcher.search(test_string)
self.assertEqual(response["total"], 0)
self.searcher.remove("test_doc", ["TOTALLY_FAKE_ID"])

def test_filter_items(self):
""" Make sure that filters work """
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='edx-search',
version='0.1.1',
version='1.0.0',
description='Search and Index routines for index access',
author='edX',
url='https://github.com/edx/edx-search',
Expand All @@ -22,6 +22,6 @@
packages=['search'],
install_requires=[
"django >= 1.8, < 1.9",
"elasticsearch<1.0.0"
"elasticsearch>=1.0.0,<2.0.0"
]
)

0 comments on commit 753fa9c

Please sign in to comment.