An example of searching in nested fields in elasticsearch

In [1]:
from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient
from elasticsearch.client import CatClient

In [2]:
elastic_db = Elasticsearch(['localhost:9200'])
indices_client = IndicesClient(elastic_db)
cat_client = CatClient(elastic_db)

In [3]:
# To view the indices on Elastisearch:
print(cat_client.indices(v=True))

health status index   uuid                   pri rep docs.count docs.deleted store.size pri.store.size
yellow open   .kibana _RRwYu7rSd2H7CuBLtZ4dw   1   1          2            1     12.2kb         12.2kb



In [4]:
# Create the nested_idx with required settings and mappings
idx_structure = {
    "settings": {},
    "mappings": {
        "doc": {
            "properties": { 
                "user_name":{"type":"text"},
                "user_email":{"type":"text"},
                "project":{
                    "type": "nested",
                    "properties":{
                        "project_name":{"type":"text"},
                        "project_id":{"type":"text"},
                        "asset":{
                            "type":"nested",
                            "properties":{
                                "asset_name":{"type":"text"},
                                "asset_id":{"type":"text"}
                            }
                        }
                    }
                }
            }
        }
    }
}

response = indices_client.create(index='nested_idx',body=idx_structure)
print(response)

{'shards_acknowledged': True, 'acknowledged': True, 'index': 'nested_idx'}


In [5]:
# Add document 1
doc_body = {
    "user_name": "u1",
    "user_email":"u1@u1.com",
    "project": [
        {
            "project_name": "p1",
            "project_id": "p1Id",
            "asset":[
                {
                    "asset_name":"a1",
                    "asset_id":"a1Id"
                },
                {
                    "asset_name":"a2",
                    "asset_id":"a2Id"
                }
            ]
        },
        {
            "project_name": "p2",
            "project_id": "p2Id",
            "asset":[
                {
                    "asset_name":"a3",
                    "asset_id":"a3Id"
                },
                {
                    "asset_name":"a4",
                    "asset_id":"a4Id"
                }
            ]
        }
    ]
}

response = elastic_db.index(index='nested_idx', doc_type='doc', body=doc_body,refresh=True)
print(response)

{'result': 'created', '_type': 'doc', '_seq_no': 0, 'forced_refresh': True, '_version': 1, '_shards': {'successful': 1, 'failed': 0, 'total': 2}, '_index': 'nested_idx', '_id': 'cX0cQmUBOrnFUpGSLBWR', '_primary_term': 1}


In [6]:
# Add document 2
doc_body = {
    "user_name": "u2",
    "user_email":"u2@u2.com",
    "project": [
        {
            "project_name": "p3",
            "project_id": "p3Id",
            "asset":[
                {
                    "asset_name":"a5",
                    "asset_id":"a5Id"
                },
                {
                    "asset_name":"a6",
                    "asset_id":"a6Id"
                }
            ]
        },
        {
            "project_name": "p4",
            "project_id": "p4Id",
            "asset":[
                {
                    "asset_name":"a7",
                    "asset_id":"a7Id"
                },
                {
                    "asset_name":"a8",
                    "asset_id":"a8Id"
                }
            ]
        }
    ]
}

response = elastic_db.index(index='nested_idx', doc_type='doc', body=doc_body,refresh=True)
print(response)

{'result': 'created', '_type': 'doc', '_seq_no': 0, 'forced_refresh': True, '_version': 1, '_shards': {'successful': 1, 'failed': 0, 'total': 2}, '_index': 'nested_idx', '_id': 'cn0cQmUBOrnFUpGSLRVs', '_primary_term': 1}


In [7]:
# Search on user_name
query_body = {
    "query": {
        "match":{"user_name":"u2"}
    }
}

response = elastic_db.search(index='nested_idx',body=query_body)
print(response)

{'timed_out': False, 'hits': {'hits': [{'_score': 0.2876821, '_index': 'nested_idx', '_type': 'doc', '_source': {'user_email': 'u2@u2.com', 'user_name': 'u2', 'project': [{'project_name': 'p3', 'project_id': 'p3Id', 'asset': [{'asset_id': 'a5Id', 'asset_name': 'a5'}, {'asset_id': 'a6Id', 'asset_name': 'a6'}]}, {'project_name': 'p4', 'project_id': 'p4Id', 'asset': [{'asset_id': 'a7Id', 'asset_name': 'a7'}, {'asset_id': 'a8Id', 'asset_name': 'a8'}]}]}, '_id': 'cn0cQmUBOrnFUpGSLRVs'}], 'max_score': 0.2876821, 'total': 1}, 'took': 6, '_shards': {'skipped': 0, 'successful': 5, 'failed': 0, 'total': 5}}


In [8]:
# Search on project_name
query_body = {
    "query": {
        "nested":{
            "path":"project",
            "query":{
                "match":{
                    "project.project_name":"p3"
                }
            }
        }
    }
}

response = elastic_db.search(index='nested_idx',body=query_body)
print(response)

{'timed_out': False, 'hits': {'hits': [{'_score': 0.6931472, '_index': 'nested_idx', '_type': 'doc', '_source': {'user_email': 'u2@u2.com', 'user_name': 'u2', 'project': [{'project_name': 'p3', 'project_id': 'p3Id', 'asset': [{'asset_id': 'a5Id', 'asset_name': 'a5'}, {'asset_id': 'a6Id', 'asset_name': 'a6'}]}, {'project_name': 'p4', 'project_id': 'p4Id', 'asset': [{'asset_id': 'a7Id', 'asset_name': 'a7'}, {'asset_id': 'a8Id', 'asset_name': 'a8'}]}]}, '_id': 'cn0cQmUBOrnFUpGSLRVs'}], 'max_score': 0.6931472, 'total': 1}, 'took': 4, '_shards': {'skipped': 0, 'successful': 5, 'failed': 0, 'total': 5}}


In [9]:
# Search on two level nesting. On asset_name
query_body = {
    "query": {
        "nested":{
            "path":"project",
            "query":{
                "nested":{
                    "path":"project.asset",
                    "query":{
                        "match":{
                            "project.asset.asset_name":"a6"
                        }
                    }
                }
            }
        }
    }
}

response = elastic_db.search(index='nested_idx',body=query_body)
print(response)

{'timed_out': False, 'hits': {'hits': [{'_score': 1.2039728, '_index': 'nested_idx', '_type': 'doc', '_source': {'user_email': 'u2@u2.com', 'user_name': 'u2', 'project': [{'project_name': 'p3', 'project_id': 'p3Id', 'asset': [{'asset_id': 'a5Id', 'asset_name': 'a5'}, {'asset_id': 'a6Id', 'asset_name': 'a6'}]}, {'project_name': 'p4', 'project_id': 'p4Id', 'asset': [{'asset_id': 'a7Id', 'asset_name': 'a7'}, {'asset_id': 'a8Id', 'asset_name': 'a8'}]}]}, '_id': 'cn0cQmUBOrnFUpGSLRVs'}], 'max_score': 1.2039728, 'total': 1}, 'took': 2, '_shards': {'skipped': 0, 'successful': 5, 'failed': 0, 'total': 5}}
