<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#Setup" data-toc-modified-id="Setup-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Setup</a></span></li><li><span><a href="#查询时实时搜索" data-toc-modified-id="查询时实时搜索-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>查询时实时搜索</a></span><ul class="toc-item"><li><span><a href="#数据准备" data-toc-modified-id="数据准备-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>数据准备</a></span></li><li><span><a href="#搜索" data-toc-modified-id="搜索-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>搜索</a></span></li></ul></li><li><span><a href="#索引时实时搜索" data-toc-modified-id="索引时实时搜索-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>索引时实时搜索</a></span><ul class="toc-item"><li><span><a href="#创建数据表" data-toc-modified-id="创建数据表-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>创建数据表</a></span></li><li><span><a href="#检查设置" data-toc-modified-id="检查设置-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>检查设置</a></span></li><li><span><a href="#数据准备" data-toc-modified-id="数据准备-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>数据准备</a></span></li><li><span><a href="#搜索" data-toc-modified-id="搜索-3.4"><span class="toc-item-num">3.4&nbsp;&nbsp;</span>搜索</a></span></li></ul></li></ul></div>

## Setup

In [1]:
from elasticsearch import Elasticsearch
from elasticsearch import helpers
from datetime import datetime
import pprint

In [2]:
es = Elasticsearch()

## 查询时实时搜索

### 数据准备

In [3]:
test_score_index = "test-index-2"

In [4]:
doc1 = {
    'cmd_format_t': 'This is a brown dog a b',
}
doc2 = {
    'cmd_format_t': 'This is a brown dog a asf sdfsdf s sdfsdfsdfsf  sdf s s s s s',
}
doc3 = {
    'cmd_format_t': 'This is a brown dog a b',
} 
actions = [{ "_index": test_score_index, "_type": "my_type", "_id": idx, "_source": value} for idx, value in enumerate([doc1, doc2, doc3])]
helpers.bulk(es, actions)  

(3, [])

In [5]:
es.search(index=test_score_index, body={"query": {"match_all": {}}})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '0',
    '_index': 'test-index-2',
    '_score': 1.0,
    '_source': {'cmd_format_t': 'This is a brown dog a b'},
    '_type': 'my_type'},
   {'_id': '2',
    '_index': 'test-index-2',
    '_score': 1.0,
    '_source': {'cmd_format_t': 'This is a brown dog a b'},
    '_type': 'my_type'},
   {'_id': '1',
    '_index': 'test-index-2',
    '_score': 1.0,
    '_source': {'cmd_format_t': 'This is a brown dog a asf sdfsdf s sdfsdfsdfsf  sdf s s s s s'},
    '_type': 'my_type'}],
  'max_score': 1.0,
  'total': 3},
 'timed_out': False,
 'took': 0}

### 搜索

搜索 "is dog b" 等价于搜索 "is dog b*"

In [25]:
es.search(index=test_score_index, explain=False, body={
    "query": {
        "match_phrase_prefix": {
            "cmd_format_t": {
                "query": "is dog b",
                "slop": 30
            }
        }
    },
    "highlight": {
        "fields" : {
            "cmd_format_t" : {}
        }
    }
})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '0',
    '_index': 'test-index-2',
    '_score': 0.55034834,
    '_source': {'cmd_format_t': 'This is a brown dog a b'},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['This <em>is</em> a <em>brown</em> <em>dog</em> a b']}},
   {'_id': '2',
    '_index': 'test-index-2',
    '_score': 0.55034834,
    '_source': {'cmd_format_t': 'This is a brown dog a b'},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['This <em>is</em> a <em>brown</em> <em>dog</em> a b']}},
   {'_id': '1',
    '_index': 'test-index-2',
    '_score': 0.41276127,
    '_source': {'cmd_format_t': 'This is a brown dog a asf sdfsdf s sdfsdfsdfsf  sdf s s s s s'},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['This <em>is</em> a <em>brown</em> <em>dog</em> a asf sdfsdf s sdfsdfsdfsf  sdf s s s s s']}}],
  'max_score': 0.55034834,
  'total': 3},
 'timed_out': False,
 'took': 2}

## 索引时实时搜索

### 创建数据表

In [38]:
test_score_index_3 = "test-index-3"

In [39]:
my_data_settting = {
    "settings": {
        "number_of_shards": 1,
        "analysis": {
            "filter": {
                "autocomplete_filter": { 
                    "type": "edge_ngram",
                    "min_gram": 1,
                    "max_gram": 20
                }
            },
            "analyzer": {
                "autocomplete": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "autocomplete_filter" 
                    ]
                }
            }
        }
    },
    "mappings": {
        "my_type": {
            "properties": {
                "cmd_format_t": {
                    "type": "text",
                    "analyzer": "autocomplete",
                    "search_analyzer": "standard",
                }
            }
        }
    }
}

In [41]:
#es.indices.delete(index=test_score_index_3)
es.indices.create(index=test_score_index_3, body=my_data_settting)

{'acknowledged': True, 'index': 'test-index-3', 'shards_acknowledged': True}

### 检查设置

In [42]:
pprint.pprint(es.indices.get_settings(index=test_score_index_3))
pprint.pprint(es.indices.get_mapping(index=test_score_index_3))

{'test-index-3': {'settings': {'index': {'analysis': {'analyzer': {'autocomplete': {'filter': ['lowercase',
                                                                                               'autocomplete_filter'],
                                                                                    'tokenizer': 'standard',
                                                                                    'type': 'custom'}},
                                                      'filter': {'autocomplete_filter': {'max_gram': '20',
                                                                                         'min_gram': '1',
                                                                                         'type': 'edge_ngram'}}},
                                         'creation_date': '1524801416578',
                                         'number_of_replicas': '1',
                                         'number_of_shards': '1',
                        

In [47]:
es.indices.analyze(index=test_score_index_3, body={"analyzer": "autocomplete", "text": "mpls-te"})

{'tokens': [{'end_offset': 4,
   'position': 0,
   'start_offset': 0,
   'token': 'm',
   'type': '<ALPHANUM>'},
  {'end_offset': 4,
   'position': 0,
   'start_offset': 0,
   'token': 'mp',
   'type': '<ALPHANUM>'},
  {'end_offset': 4,
   'position': 0,
   'start_offset': 0,
   'token': 'mpl',
   'type': '<ALPHANUM>'},
  {'end_offset': 4,
   'position': 0,
   'start_offset': 0,
   'token': 'mpls',
   'type': '<ALPHANUM>'},
  {'end_offset': 7,
   'position': 1,
   'start_offset': 5,
   'token': 't',
   'type': '<ALPHANUM>'},
  {'end_offset': 7,
   'position': 1,
   'start_offset': 5,
   'token': 'te',
   'type': '<ALPHANUM>'}]}

### 数据准备

In [48]:
doc1 = {
    'cmd_format_t': 'Brown foxes',
}
doc2 = {
    'cmd_format_t': 'Yellow furballs',
}
doc3 = {
    'cmd_format_t': 'This is a brown dog a b',
} 
actions = [{ "_index": test_score_index_3, "_type": "my_type", "_id": idx, "_source": value} for idx, value in enumerate([doc1, doc2, doc3])]
helpers.bulk(es, actions)  

(3, [])

### 搜索

In [53]:
es.search(index=test_score_index_3, explain=False, body={
    "query": {
        "match": {
            "cmd_format_t": {
                "query": "brow fo"
            }
        }
    },
    "highlight": {
        "fields" : {
            "cmd_format_t" : {}
        }
    }
})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'hits': {'hits': [{'_id': '0',
    '_index': 'test-index-3',
    '_score': 2.2293887,
    '_source': {'cmd_format_t': 'Brown foxes'},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['<em>Brown</em> <em>foxes</em>']}},
   {'_id': '2',
    '_index': 'test-index-3',
    '_score': 0.58717906,
    '_source': {'cmd_format_t': 'This is a brown dog a b'},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['This is a <em>brown</em> dog a b']}}],
  'max_score': 2.2293887,
  'total': 2},
 'timed_out': False,
 'took': 1}