In [4]:
from elasticsearch import Elasticsearch

es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

In [5]:
body = {
    "settings": {
        "analysis": {
            "analyzer": {
                "analyzer_index": {
                    "tokenizer": "ik_max_word",
                    "char_filter": ["emoticons"],
                    "filter": ["english_stop"]
                },
                "analyzer_search": {
                    "tokenizer": "ik_max_word",
                    "char_filter": ["emoticons"],
                    "filter": [
                        "my_synonyms",  # 推荐搜索时使用
                        "english_stop"
                    ]
                }
            },
            "filter": {
                "my_synonyms": {
                    "type": "synonym",
                    "synonyms": [
                        "母亲 , 妈妈 , mother",
                        "父亲 , 爸爸 , father",
                        "老虎 , 狮子 => 动物",
                        "西红柿 , 黄瓜, 香蕉 => 水果, 蔬菜"
                    ],
                    "updateable": True
                },
                "english_stop": {
                    "type": "stop",
                    "stopwords": "_english_"
                }
            },
            "char_filter": {
                "emoticons": {
                    "type": "mapping",
                    "mappings": [
                        "II => 2",
                        "ii => 2",
                        "Ⅱ => 2"
                        "III => 3",
                        "iii => 3",
                        "Ⅲ => 3"
                    ]
                }
            }
        }
    },
    "mappings": {
        "properties": {
            "information": {
                "type": "text",
                "analyzer": "analyzer_index",
                "search_analyzer": "analyzer_search"   # ★★★★★仅会作用于full_text_queries
            }
        }
    }
}

if es.indices.exists('es0'):
    es.indices.delete('es0')
es.indices.create('es0', body=body)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'es0'}

In [6]:
es.index(index='es0', id=0, body={"information": "我爱妈妈"})
es.index(index='es0', id=1, body={"information": "我爱mother"})
es.index(index='es0', id=2, body={"information": "我爱father"})
es.index(index='es0', id=3, body={"information": "动物很可爱"})
es.index(index='es0', id=4, body={"information": "小孩很可爱"})
es.index(index='es0', id=5, body={"information": "蔬菜有利于健康"})
es.index(index='es0', id=6, body={"information": "香蕉很好吃"})
es.index(index='es0', id=7, body={"information": "水果很好吃"})

{'_index': 'es0',
 '_type': '_doc',
 '_id': '7',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 7,
 '_primary_term': 1}

In [7]:
# 查询内容与setting参数_同近义词过滤_搜索时指定analyzer_demo.ipynb相同
es.search(index='es0', body={
    "query": {
        "match": {
            "information": {
                "query": "我爱我的母亲"
            }
        }
    }
})

{'took': 1,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 3, 'relation': 'eq'},
  'max_score': 2.8251233,
  'hits': [{'_index': 'es0',
    '_type': '_doc',
    '_id': '0',
    '_score': 2.8251233,
    '_source': {'information': '我爱妈妈'}},
   {'_index': 'es0',
    '_type': '_doc',
    '_id': '1',
    '_score': 2.8251233,
    '_source': {'information': '我爱mother'}},
   {'_index': 'es0',
    '_type': '_doc',
    '_id': '2',
    '_score': 0.9751481,
    '_source': {'information': '我爱father'}}]}}

In [8]:
es.search(index='es0', body={
    "query": {
        "match": {
            "information": {
                "query": "老虎很可爱"
            }
        }
    }
})

{'took': 1,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 4.4950805,
  'hits': [{'_index': 'es0',
    '_type': '_doc',
    '_id': '3',
    '_score': 4.4950805,
    '_source': {'information': '动物很可爱'}},
   {'_index': 'es0',
    '_type': '_doc',
    '_id': '4',
    '_score': 2.6451051,
    '_source': {'information': '小孩很可爱'}}]}}

In [9]:
es.search(index='es0', body={
    "query": {
        "match": {
            "information": {
                "query": "香蕉很好吃"  # 香蕉被转换为水果(单向同义词),故水果很好吃_score更高
            }
        }
    }
})

{'took': 1,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 3, 'relation': 'eq'},
  'max_score': 4.4950805,
  'hits': [{'_index': 'es0',
    '_type': '_doc',
    '_id': '7',
    '_score': 4.4950805,
    '_source': {'information': '水果很好吃'}},
   {'_index': 'es0',
    '_type': '_doc',
    '_id': '6',
    '_score': 2.6451051,
    '_source': {'information': '香蕉很好吃'}},
   {'_index': 'es0',
    '_type': '_doc',
    '_id': '5',
    '_score': 1.4683185,
    '_source': {'information': '蔬菜有利于健康'}}]}}

In [10]:
es.search(index='es0', body=
{
    "query": {
        "term": {
            "information": {
                "value": "妈妈",
            }
        }
    }
})

{'took': 0,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 1.8499753,
  'hits': [{'_index': 'es0',
    '_type': '_doc',
    '_id': '0',
    '_score': 1.8499753,
    '_source': {'information': '我爱妈妈'}}]}}

In [11]:
# term-level_queries时analyzer仍使用该字段的analyzer进行搜索
es.search(index='es0', body=
{
    "query": {
        "term": {
            "information": {
                "value": "母亲"
            }
        }
    }
})

{'took': 0,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [None]:
# Reloads an index’s search analyzers and their resource
# 参考:https://www.elastic.co/guide/en/elasticsearch/reference/7.11/indices-reload-analyzers.html
# 参考:setting参数_同近义词过滤0.ipynb updateable字段
es.indices.reload_search_analyzers('es0')