Skip to content

Commit

Permalink
Adding support for setting specific index_analyzer and search_analyze…
Browse files Browse the repository at this point in the history
…r properties on string fields.
  • Loading branch information
Warren Kiser committed Apr 27, 2015
1 parent 5da24f3 commit 937ce23
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 15 deletions.
4 changes: 3 additions & 1 deletion elasticsearch_dsl/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,9 @@ def _to_python(self, data):
class String(Field):
_param_defs = {
'fields': {'type': 'field', 'hash': True},
'analyzer': {'type': 'analyzer'}
'analyzer': {'type': 'analyzer'},
'index_analyzer': {'type': 'analyzer'},
'search_analyzer': {'type': 'analyzer'},
}
name = 'string'

Expand Down
29 changes: 15 additions & 14 deletions elasticsearch_dsl/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,21 @@ def from_es(cls, index, doc_type, using='default'):
def _collect_analysis(self):
analysis = {}
for f in self.properties._collect_fields():
if not hasattr(f, 'analyzer'):
continue
analyzer = f.analyzer
if analyzer.name != 'custom':
continue
d = analyzer.get_analysis_definition()
# empty custom analyzer, probably already defined out of our control
if not d:
continue

# merge the defintion
# TODO: conflict detection/resolution
for key in d:
analysis.setdefault(key, {}).update(d[key])
for analyzer_name in {'analyzer', 'index_analyzer', 'search_analyzer'}:
if not hasattr(f, analyzer_name):
continue
analyzer = getattr(f, analyzer_name)
if analyzer.name != 'custom':
continue
d = analyzer.get_analysis_definition()
# empty custom analyzer, probably already defined out of our control
if not d:
continue

# merge the defintion
# TODO: conflict detection/resolution
for key in d:
analysis.setdefault(key, {}).update(d[key])

return analysis

Expand Down
23 changes: 23 additions & 0 deletions test_elasticsearch_dsl/test_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,26 @@ def test_nested_provides_direct_access_to_its_fields():

assert 'name' in f
assert f['name'] == field.String(index='not_analyzed')


def test_field_supports_multiple_analyzers():
f = field.String(index_analyzer='snowball', search_analyzer='keyword')
assert {'index_analyzer': 'snowball', 'search_analyzer': 'keyword', 'type': 'string'} == f.to_dict()


def test_multifield_supports_multiple_analyzers():
f = field.String(fields={
'f1': field.String(search_analyzer='keyword', index_analyzer='snowball'),
'f2': field.String(analyzer='keyword')
})
assert {
'fields': {
'f1': {'index_analyzer': 'snowball',
'search_analyzer': 'keyword',
'type': 'string'
},
'f2': {
'analyzer': 'keyword', 'type': 'string'}
},
'type': 'string'
} == f.to_dict()
35 changes: 35 additions & 0 deletions test_elasticsearch_dsl/test_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,38 @@ def test_mapping_can_collect_all_analyzers():
'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'},
}
} == m._collect_analysis()


def test_mapping_can_collect_multiple_analyzers():
a1 = analysis.analyzer(
'my_analyzer1',
tokenizer='keyword',
filter=['lowercase', analysis.token_filter('my_filter1', 'stop', stopwords=['a', 'b'])],
)
a2 = analysis.analyzer(
'my_analyzer2',
tokenizer=analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3),
filter=[analysis.token_filter('my_filter2', 'stop', stopwords=['c', 'd'])],
)
m = mapping.Mapping('article')
m.field('title', 'string', analyzer=a1, index_analyzer=a1, search_analyzer=a2)
m.field(
'text', 'string', analyzer=a1,
fields={
'english': String(index_analyzer=a1),
'unknown': String(index_analyzer=a1, search_analyzer=a2),
}
)
assert {
'analyzer': {
'my_analyzer1': {'filter': ['lowercase', 'my_filter1'],
'tokenizer': 'keyword',
'type': 'custom'},
'my_analyzer2': {'filter': ['my_filter2'],
'tokenizer': 'trigram',
'type': 'custom'}},
'filter': {
'my_filter1': {'stopwords': ['a', 'b'], 'type': 'stop'},
'my_filter2': {'stopwords': ['c', 'd'], 'type': 'stop'}},
'tokenizer': {'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'}}
} == m._collect_analysis()

0 comments on commit 937ce23

Please sign in to comment.