Adding support for setting specific index_analyzer and search_analyze…

…r properties on string fields.
elastic · Apr 27, 2015 · 937ce23 · 937ce23
1 parent 5da24f3
commit 937ce23
Show file tree

Hide file tree

Showing 4 changed files with 76 additions and 15 deletions.
diff --git a/elasticsearch_dsl/field.py b/elasticsearch_dsl/field.py
@@ -159,7 +159,9 @@ def _to_python(self, data):
 class String(Field):
     _param_defs = {
         'fields': {'type': 'field', 'hash': True},
-        'analyzer': {'type': 'analyzer'}
+        'analyzer': {'type': 'analyzer'},
+        'index_analyzer': {'type': 'analyzer'},
+        'search_analyzer': {'type': 'analyzer'},
     }
     name = 'string'
 

diff --git a/elasticsearch_dsl/mapping.py b/elasticsearch_dsl/mapping.py
@@ -35,20 +35,21 @@ def from_es(cls, index, doc_type, using='default'):
     def _collect_analysis(self):
         analysis = {}
         for f in self.properties._collect_fields():
-            if not hasattr(f, 'analyzer'):
-                continue
-            analyzer = f.analyzer
-            if analyzer.name != 'custom':
-                continue
-            d = analyzer.get_analysis_definition()
-            # empty custom analyzer, probably already defined out of our control
-            if not d:
-                continue
-
-            # merge the defintion
-            # TODO: conflict detection/resolution
-            for key in d:
-                analysis.setdefault(key, {}).update(d[key])
+            for analyzer_name in {'analyzer', 'index_analyzer', 'search_analyzer'}:
+                if not hasattr(f, analyzer_name):
+                    continue
+                analyzer = getattr(f, analyzer_name)
+                if analyzer.name != 'custom':
+                    continue
+                d = analyzer.get_analysis_definition()
+                # empty custom analyzer, probably already defined out of our control
+                if not d:
+                    continue
+
+                # merge the defintion
+                # TODO: conflict detection/resolution
+                for key in d:
+                    analysis.setdefault(key, {}).update(d[key])
 
         return analysis
 

diff --git a/test_elasticsearch_dsl/test_field.py b/test_elasticsearch_dsl/test_field.py
@@ -43,3 +43,26 @@ def test_nested_provides_direct_access_to_its_fields():
 
     assert 'name' in f
     assert f['name'] == field.String(index='not_analyzed')
+
+
+def test_field_supports_multiple_analyzers():
+    f = field.String(index_analyzer='snowball', search_analyzer='keyword')
+    assert {'index_analyzer': 'snowball', 'search_analyzer': 'keyword', 'type': 'string'} == f.to_dict()
+
+
+def test_multifield_supports_multiple_analyzers():
+    f = field.String(fields={
+        'f1': field.String(search_analyzer='keyword', index_analyzer='snowball'),
+        'f2': field.String(analyzer='keyword')
+    })
+    assert {
+       'fields': {
+           'f1': {'index_analyzer': 'snowball',
+                  'search_analyzer': 'keyword',
+                  'type': 'string'
+           },
+           'f2': {
+               'analyzer': 'keyword', 'type': 'string'}
+       },
+       'type': 'string'
+    } == f.to_dict()
diff --git a/test_elasticsearch_dsl/test_mapping.py b/test_elasticsearch_dsl/test_mapping.py
@@ -93,3 +93,38 @@ def test_mapping_can_collect_all_analyzers():
             'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'},
         }
     } == m._collect_analysis()
+
+
+def test_mapping_can_collect_multiple_analyzers():
+    a1 = analysis.analyzer(
+        'my_analyzer1',
+        tokenizer='keyword',
+        filter=['lowercase', analysis.token_filter('my_filter1', 'stop', stopwords=['a', 'b'])],
+    )
+    a2 = analysis.analyzer(
+        'my_analyzer2',
+        tokenizer=analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3),
+        filter=[analysis.token_filter('my_filter2', 'stop', stopwords=['c', 'd'])],
+    )
+    m = mapping.Mapping('article')
+    m.field('title', 'string', analyzer=a1, index_analyzer=a1, search_analyzer=a2)
+    m.field(
+        'text', 'string', analyzer=a1,
+        fields={
+            'english': String(index_analyzer=a1),
+            'unknown': String(index_analyzer=a1, search_analyzer=a2),
+        }
+    )
+    assert {
+       'analyzer': {
+           'my_analyzer1': {'filter': ['lowercase', 'my_filter1'],
+                            'tokenizer': 'keyword',
+                            'type': 'custom'},
+           'my_analyzer2': {'filter': ['my_filter2'],
+                            'tokenizer': 'trigram',
+                            'type': 'custom'}},
+       'filter': {
+           'my_filter1': {'stopwords': ['a', 'b'], 'type': 'stop'},
+           'my_filter2': {'stopwords': ['c', 'd'], 'type': 'stop'}},
+       'tokenizer': {'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'}}
+    } == m._collect_analysis()