In [1]:
from elasticsearch import Elasticsearch

In [2]:
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

In [3]:
body = {
    "settings": {
            "analysis": {
                "analyzer": {
                    "synonym_analyzer": {
                        # The whitespace tokenizer breaks text into terms whenever it encounters a whitespace character.
                        "tokenizer": "whitespace",
                        "filter": ["my_synonyms"]
                    }
                },
                "filter": {
                    "my_synonyms": {
                        "type": "synonym",
                        "synonyms": [
                            "母亲 , 妈妈 , mother",
                            "父亲 , 爸爸 , father",
                            "老虎 , 狮子 => 动物",
                            "西红柿 , 黄瓜, 香蕉 => 水果, 蔬菜"
                        ],
                        "updateable": True 
                    }
                }
            }
    }
}

if es.indices.exists('es0'):
    es.indices.delete('es0')
es.indices.create('es0', body=body)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'es0'}

In [4]:
print(es.indices.analyze(index="es0", body={
    "analyzer": "synonym_analyzer",
    "text": "我 爱 妈妈"}), end='\n\n')  # 母亲,mother也被查询

print(es.indices.analyze(index="es0", body={
    "analyzer": "synonym_analyzer",
    "text": "我 爱 mother"}), end='\n\n')  # 母亲,妈妈也被查询

{'tokens': [{'token': '我', 'start_offset': 0, 'end_offset': 1, 'type': 'word', 'position': 0}, {'token': '爱', 'start_offset': 2, 'end_offset': 3, 'type': 'word', 'position': 1}, {'token': '妈妈', 'start_offset': 4, 'end_offset': 6, 'type': 'word', 'position': 2}, {'token': '母亲', 'start_offset': 4, 'end_offset': 6, 'type': 'SYNONYM', 'position': 2}, {'token': 'mother', 'start_offset': 4, 'end_offset': 6, 'type': 'SYNONYM', 'position': 2}]}

{'tokens': [{'token': '我', 'start_offset': 0, 'end_offset': 1, 'type': 'word', 'position': 0}, {'token': '爱', 'start_offset': 2, 'end_offset': 3, 'type': 'word', 'position': 1}, {'token': 'mother', 'start_offset': 4, 'end_offset': 10, 'type': 'word', 'position': 2}, {'token': '母亲', 'start_offset': 4, 'end_offset': 10, 'type': 'SYNONYM', 'position': 2}, {'token': '妈妈', 'start_offset': 4, 'end_offset': 10, 'type': 'SYNONYM', 'position': 2}]}

