In [20]:
from elasticsearch import Elasticsearch
# 参考:https://www.elastic.co/guide/en/elasticsearch/reference/7.11/analysis-custom-analyzer.html

In [21]:
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

body = {
    "settings": {
        "analysis": {
            "analyzer": {
                "my_custom_analyzer": {
                    "type": "custom",
                    # A built-in or customised tokenizer. (Required)
                    "tokenizer": "standard",
                    # Character filters are used to preprocess the stream of characters before it is passed to the tokenizer.
                    # 参考:https://www.elastic.co/guide/en/elasticsearch/reference/7.11/analysis-charfilters.html
                    "char_filter": [
                        "html_strip"
                    ],
                    # Token filters accept a stream of tokens from a tokenizer and can modify tokens (eg lowercasing), delete tokens (eg remove stopwords) or add tokens (eg synonyms).
                    # 参考:https://www.elastic.co/guide/en/elasticsearch/reference/7.11/analysis-tokenfilters.html
                    "filter": [
                        "lowercase",
                        "english_stop"
                    ]
                }
            },
            "filter": {
                "english_stop": {
                    "type": "stop",
                    "stopwords": "_english_"
                }
            }
        }
    },
    "mappings": {
        "properties": {
            "information": {
                "type": "text"
            },
            "say": {
                "type": "text",
                # Only text fields support the analyzer mapping parameter.
                # 可作用于:text,token-count
                "analyzer": "my_custom_analyzer"
            }
        }
    }
}

if es.indices.exists('es0'):
    es.indices.delete('es0')
es.indices.create('es0', body=body)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'es0'}

In [22]:
es.indices.get('es0')

{'es0': {'aliases': {},
  'mappings': {'properties': {'information': {'type': 'text'},
    'say': {'type': 'text', 'analyzer': 'my_custom_analyzer'}}},
  'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}},
    'number_of_shards': '1',
    'provided_name': 'es0',
    'creation_date': '1693382944259',
    'analysis': {'filter': {'english_stop': {'type': 'stop',
       'stopwords': '_english_'}},
     'analyzer': {'my_custom_analyzer': {'filter': ['lowercase',
        'english_stop'],
       'char_filter': ['html_strip'],
       'type': 'custom',
       'tokenizer': 'standard'}}},
    'number_of_replicas': '1',
    'uuid': 'CH92wqqOTmaWYqhqkX04jg',
    'version': {'created': '7110299'}}}}}

In [23]:
es.indices.delete('es0')

{'acknowledged': True}