An example of using "function score" feature in elasticsearch

In [1]:
from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient
from elasticsearch.client import CatClient

In [2]:
elastic_db = Elasticsearch(['localhost:9200'])
indices_client = IndicesClient(elastic_db)
cat_client = CatClient(elastic_db)

In [3]:
# To view the indices on Elastisearch:
print(cat_client.indices(v=True))

health status index   uuid                   pri rep docs.count docs.deleted store.size pri.store.size
yellow open   .kibana _RRwYu7rSd2H7CuBLtZ4dw   1   1          2            1     12.2kb         12.2kb



In [4]:
# Create the dest_idx with required settings and mappings
idx_structure = {
    "settings":{
    },
    "mappings": {
        "doc": {
            "properties": {
                "sku":{"type":"text"},
                "productFamily":{"type":"text"},
                "price":{"type":"integer"},
                "vcpu":{"type":"integer"},
                "memory":{"type":"integer"}
            }
        }
    }
}

response = indices_client.create(index='fscore_idx',body=idx_structure)
print(response)

{'shards_acknowledged': True, 'index': 'fscore_idx', 'acknowledged': True}


In [5]:
# Add document 1
doc_body = {
    "sku": "sku1",
    "productFamily":"sku1_product",
    "price":25,
    "vcpu": 2,
    "memory":4
}

response = elastic_db.index(index='fscore_idx', doc_type='doc', body=doc_body,refresh=True)
print(response)

{'_version': 1, '_id': 'SX00PWUBOrnFUpGSCBWq', '_shards': {'failed': 0, 'total': 2, 'successful': 1}, '_primary_term': 1, '_index': 'fscore_idx', '_seq_no': 0, '_type': 'doc', 'result': 'created', 'forced_refresh': True}


In [6]:
# Add document 2
doc_body = {
    "sku": "sku2",
    "productFamily":"sku2_product",
    "price":50,
    "vcpu": 4,
    "memory": 8
}

response = elastic_db.index(index='fscore_idx', doc_type='doc', body=doc_body,refresh=True)
print(response)

{'_version': 1, '_id': 'Sn00PWUBOrnFUpGSCRV1', '_shards': {'failed': 0, 'total': 2, 'successful': 1}, '_primary_term': 1, '_index': 'fscore_idx', '_seq_no': 0, '_type': 'doc', 'result': 'created', 'forced_refresh': True}


In [7]:
# Add document 3
doc_body = {
    "sku": "sku3",
    "productFamily":"sku3_product",
    "price":75,
    "vcpu": 8,
    "memory": 16 
}

response = elastic_db.index(index='fscore_idx', doc_type='doc', body=doc_body,refresh=True)
print(response)

{'_version': 1, '_id': 'S300PWUBOrnFUpGSChUv', '_shards': {'failed': 0, 'total': 2, 'successful': 1}, '_primary_term': 1, '_index': 'fscore_idx', '_seq_no': 0, '_type': 'doc', 'result': 'created', 'forced_refresh': True}


In [8]:
# Add document 4
doc_body = {
    "sku": "sku4",
    "productFamily":"sku4_product",
    "price":100,
    "vcpu": 16,
    "memory": 32
}

response = elastic_db.index(index='fscore_idx', doc_type='doc', body=doc_body,refresh=True)
print(response)

{'_version': 1, '_id': 'TH00PWUBOrnFUpGSChXU', '_shards': {'failed': 0, 'total': 2, 'successful': 1}, '_primary_term': 1, '_index': 'fscore_idx', '_seq_no': 0, '_type': 'doc', 'result': 'created', 'forced_refresh': True}


In [9]:
# To view the indices on Elastisearch:
print(cat_client.indices(v=True))

health status index      uuid                   pri rep docs.count docs.deleted store.size pri.store.size
yellow open   fscore_idx pB9m3sUVTDiZJ3CImdp4IQ   5   1          4            0      3.8kb          3.8kb
yellow open   .kibana    _RRwYu7rSd2H7CuBLtZ4dw   1   1          2            1     12.2kb         12.2kb



In [10]:
# QUERY FUNCTION. 
# Considers vcpu and memory field
# Other fields can be added
# Origin parameter will be the value to search
# offset/scale/decay parameters have to be set by trial and error depending on data and requirement
# "explain":True/False used to check the break up of score
sch_body = {
    "explain": True,
    "query":{
        "function_score":{
            "query":{
                "match_all":{}
            },
            "functions":[
                {
                    "gauss":{
                        "vcpu":{
                            "origin":"4",
                            "offset": "0.5",
                            "scale":"0.75",
                            "decay":"0.01"
                        }
                    }
                },
                {
                    "gauss":{
                        "memory":{
                            "origin":"7",
                            "offset": "0.5",
                            "scale":"0.75",
                            "decay":"0.0001"
                        }
                    }
                }
            ],
            "score_mode":"sum",
            "boost_mode": "replace"
        }
    }
}

response = elastic_db.search(index='fscore_idx', body=sch_body, explain=False)
print(response['hits']['hits'][0])

{'_index': 'fscore_idx', '_id': 'Sn00PWUBOrnFUpGSCRV1', '_source': {'price': 50, 'sku': 'sku2', 'productFamily': 'sku2_product', 'memory': 8, 'vcpu': 4}, '_score': 1.016681, '_type': 'doc'}
