<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#参考" data-toc-modified-id="参考-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>参考</a></span></li><li><span><a href="#Setup" data-toc-modified-id="Setup-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Setup</a></span></li><li><span><a href="#数据准备" data-toc-modified-id="数据准备-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>数据准备</a></span></li><li><span><a href="#获取评分详情" data-toc-modified-id="获取评分详情-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>获取评分详情</a></span></li></ul></div>

## 参考

https://www.scienjus.com/elasticsearch-function-score-query/

## Setup

In [172]:
from elasticsearch import Elasticsearch
from elasticsearch import helpers
from datetime import datetime
import pprint

In [173]:
es = Elasticsearch()

## 数据准备

In [174]:
test_score_index = "test-index"

In [175]:
my_data_settting = {
    "settings": {
        "analysis": {
            "analyzer": {
                "my_analyzer": {
                    "tokenizer": "standard",
                    "char_filter": [
                        "my_char_filter"
                    ]
                } 
            },
            "char_filter": {
                "my_char_filter": {
                    "type": "mapping",
                    "mappings": [
                        "- => _hyphen_",
                    ]
                }
            }
        }
    },
    "mappings": {
        "my_type": {
            "properties": {
                "cmd_format_t": {
                    "type": "text",
                    "search_analyzer": "my_analyzer",
                    "analyzer": "my_analyzer",
                    "norms": { "enabled": False }
                }
            }
        }
    }
}

In [176]:
es.indices.delete(index=test_score_index)
es.indices.create(index=test_score_index, body=my_data_settting)

{'acknowledged': True}

{'acknowledged': True, 'index': 'test-index', 'shards_acknowledged': True}

In [251]:
doc1 = {
    'author': 'tangting',
    'cmd_format_t': 'This is a brown dog a b',
    'len': 3,
    'cmd_format_min_len': 4,
    'cmd_format_max_len': 6
}
doc2 = {
    'author': 'tangting',
    'cmd_format_t': 'This is a brown dog a asf sdfsdf s sdfsdfsdfsf  sdf s s s s s',
    'len': 3,
    'cmd_format_min_len': 4,
    'cmd_format_max_len': 8
}
doc3 = {
    'author': 'tangting',
    'cmd_format_t': 'This is a brown dog a b',
    'len': 3,
    'cmd_format_min_len': 4,
    'cmd_format_max_len': 10
} 
actions = [{ "_index": test_score_index, "_type": "my_type", "_id": idx, "_source": value} for idx, value in enumerate([doc1, doc2, doc3])]
helpers.bulk(es, actions)  

(3, [])

In [178]:
es.search(index=test_score_index, body={"query": {"match_all": {}}})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '0',
    '_index': 'test-index',
    '_score': 1.0,
    '_source': {'author': 'tangting',
     'cmd_format_t': 'This is a brown dog a b',
     'len': 3},
    '_type': 'my_type'},
   {'_id': '2',
    '_index': 'test-index',
    '_score': 1.0,
    '_source': {'author': 'tangting',
     'cmd_format_t': 'This is a brown dog a b',
     'len': 15},
    '_type': 'my_type'},
   {'_id': '1',
    '_index': 'test-index',
    '_score': 1.0,
    '_source': {'author': 'tangting',
     'cmd_format_t': 'This is a brown dog a asf sdfsdf s sdfsdfsdfsf  sdf s s s s s',
     'len': 4},
    '_type': 'my_type'}],
  'max_score': 1.0,
  'total': 3},
 'timed_out': False,
 'took': 1}

## 获取评分详情

In [152]:
es.search(index=test_score_index, explain=False, body={
    "query": {
        "match_phrase": {
            "cmd_format_t": {
                "query": "brown  a dog", "slop": 30
            }
        }
    },
    "highlight": {
        "fields" : {
            "cmd_format_t" : {}
        }
    }
})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '0',
    '_index': 'test-index',
    '_score': 0.6781078,
    '_source': {'author': 'tangting',
     'cmd_format_t': 'This is a brown dog a b',
     'len': 3},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['This is <em>a</em> <em>brown</em> <em>dog</em> <em>a</em> b']}},
   {'_id': '2',
    '_index': 'test-index',
    '_score': 0.6781078,
    '_source': {'author': 'tangting',
     'cmd_format_t': 'This is a brown dog a b',
     'len': 5},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['This is <em>a</em> <em>brown</em> <em>dog</em> <em>a</em> b']}},
   {'_id': '1',
    '_index': 'test-index',
    '_score': 0.6781078,
    '_source': {'author': 'tangting',
     'cmd_format_t': 'This is a brown dog a asf sdfsdf s sdfsdfsdfsf  sdf s s s s s',
     'len': 4},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['This is <em>a</em> <em>brown</em> <em>dog</em> <em>a<

In [164]:
es.search(index=test_score_index, explain=False, body={
    "query": {
        "function_score": {
            "query": {
                "match": {
                    "cmd_format_t": {
                        "query": "brown dog"
                    }
                }
            },
            "field_value_factor": {
                "field": "len",
                "modifier": "reciprocal",
                "factor": 0.9
            },
            "boost_mode": "multiply"
        }
    },
    "highlight": {
        "fields" : {
            "cmd_format_t" : {}
        }
    }
})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '0',
    '_index': 'test-index',
    '_score': 0.21309784,
    '_source': {'author': 'tangting',
     'cmd_format_t': 'This is a brown dog a b',
     'len': 3},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['This is a <em>brown</em> <em>dog</em> a b']}},
   {'_id': '1',
    '_index': 'test-index',
    '_score': 0.15982339,
    '_source': {'author': 'tangting',
     'cmd_format_t': 'This is a brown dog a asf sdfsdf s sdfsdfsdfsf  sdf s s s s s',
     'len': 4},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['This is a <em>brown</em> <em>dog</em> a asf sdfsdf s sdfsdfsdfsf  sdf s s s s s']}},
   {'_id': '2',
    '_index': 'test-index',
    '_score': 0.12785871,
    '_source': {'author': 'tangting',
     'cmd_format_t': 'This is a brown dog a b',
     'len': 5},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['This is a <em>brown</em> <em>dog</em> a b']}}],


In [179]:
es.search(index=test_score_index, explain=True, body={
    "query": {
        "function_score": {
            "query": {
                "match": {
                    "cmd_format_t": {
                        "query": "brown dog"
                    }
                }
            },
            "gauss": {
                "len": { 
                    "origin": "3",  # 这里应该是输入的句子长度
                    "offset": "1",
                    "scale":  "20"
                }
            },
            "boost_mode": "multiply"
        }
    },
    "highlight": {
        "fields" : {
            "cmd_format_t" : {}
        }
    }
})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_explanation': {'description': 'function score, product of:',
     'details': [{'description': 'sum of:',
       'details': [{'description': 'weight(cmd_format_t:brown in 0) [PerFieldSimilarity], result of:',
         'details': [{'description': 'score(doc=0,freq=1.0 = termFreq=1.0\n), product of:',
           'details': [{'description': 'idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:',
             'details': [{'description': 'docFreq',
               'details': [],
               'value': 1.0},
              {'description': 'docCount', 'details': [], 'value': 1.0}],
             'value': 0.2876821},
            {'description': 'tfNorm, computed as (freq * (k1 + 1)) / (freq + k1) from:',
             'details': [{'description': 'termFreq=1.0',
               'details': [],
               'value': 1.0},
              {'description': 'parameter k1', 'details': [], 'va

In [200]:
es.search(index=test_score_index, explain=True, body={
    "query": {
        "function_score": {
            "query": {
                "match_phrase": {
                    "cmd_format_t": {
                        "query": "brown dog",
                        "slop": 30
                    }
                }
            },
            "functions": [
                {   
                    "field_value_factor": {
                        "field": "len",
                        "modifier": "reciprocal",
                        "factor": 0.9
                    }
                },
                {
                    "gauss": {
                        "len": { 
                            "origin": "3",  # 这里应该是输入的句子长度
                            "offset": "1",
                            "scale":  "20"
                        }
                    }
                }
            ],
            "score_mode": "multiply",
            "boost_mode": "multiply"
        }
    },
    "highlight": {
        "fields" : {
            "cmd_format_t" : {}
        }
    }
})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_explanation': {'description': 'function score, product of:',
     'details': [{'description': 'weight(cmd_format_t:"brown dog"~30 in 0) [PerFieldSimilarity], result of:',
       'details': [{'description': 'score(doc=0,freq=1.0 = phraseFreq=1.0\n), product of:',
         'details': [{'description': 'idf(), sum of:',
           'details': [{'description': 'idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:',
             'details': [{'description': 'docFreq',
               'details': [],
               'value': 1.0},
              {'description': 'docCount', 'details': [], 'value': 1.0}],
             'value': 0.2876821},
            {'description': 'idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:',
             'details': [{'description': 'docFreq',
               'details': [],
               'value': 1.0},
              {'description': 'd

In [231]:
"return  1 / (Math.max((Math.abs(%s - %d) - 1), 0) + 1)" % ("doc['len'].value + doc['len'].value) / 2", target_len)

"return  1 / (Math.max((Math.abs(doc['len'].value + doc['len'].value) / 2 - 1) - 1), 0) + 1)"

In [256]:
target_len = 9
len_max = 'cmd_format_max_len'
len_min = 'cmd_format_min_len'
len_mid = "((doc['%s'].value + doc['%s'].value) / 2)" % (len_max, len_min)
len_offset = "((doc['%s'].value - doc['%s'].value) / 2)" % (len_max, len_min)
custom_script = "return  1 / (Math.max((Math.abs(%s - %d) - %s), 0) + 1)" % (
    len_mid, 
    target_len,
    len_offset
)

# custom_script = "return  1 / (Math.max((Math.abs(doc['len'].value  + doc['len'].value - %d) - 1), 0) + 1)" % (target_len)
es.search(index=test_score_index, explain=False, body={
    "query": {
        "function_score": {
            "query": {
                "match_phrase": {
                    "cmd_format_t": {
                        "query": "brown dog",
                        "slop": 30
                    }
                }
            },
            "functions": [
                {   
                    "field_value_factor": {
                        "field": "len",
                        "modifier": "reciprocal",
                        "factor": 0.9
                    }
                },
                {
                    "script_score": {
                        "script": custom_script
                    }
                }
            ],
            "score_mode": "multiply",
            "boost_mode": "multiply"
        }
    },
    "highlight": {
        "fields" : {
            "cmd_format_t" : {}
        }
    }
})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '2',
    '_index': 'test-index',
    '_score': 0.21309784,
    '_source': {'author': 'tangting',
     'cmd_format_max_len': 10,
     'cmd_format_min_len': 4,
     'cmd_format_t': 'This is a brown dog a b',
     'len': 3},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['This is a <em>brown</em> <em>dog</em> a b']}},
   {'_id': '1',
    '_index': 'test-index',
    '_score': 0.10654892,
    '_source': {'author': 'tangting',
     'cmd_format_max_len': 8,
     'cmd_format_min_len': 4,
     'cmd_format_t': 'This is a brown dog a asf sdfsdf s sdfsdfsdfsf  sdf s s s s s',
     'len': 3},
    '_type': 'my_type',
    'highlight': {'cmd_format_t': ['This is a <em>brown</em> <em>dog</em> a asf sdfsdf s sdfsdfsdfsf  sdf s s s s s']}},
   {'_id': '0',
    '_index': 'test-index',
    '_score': 0.05327446,
    '_source': {'author': 'tangting',
     'cmd_format_max_len': 6,
     'cmd_format_min_

In [186]:
0.3703704 * 0.3703704

0.13717423319615998