In [9]:

from typing import Tuple
import json
from opensearchpy import OpenSearch
from opensearchpy.helpers import bulk
from loguru import logger

In [10]:
def get_opensearch(host: str='localhost', 
                   port: int=9200, 
                   auth: Tuple[str, str]=('admin', 'admin')
                   ) -> OpenSearch:

    #### Step 2.a: Create a connection to OpenSearch
    client = OpenSearch(
    hosts=[{'host': host, 'port': port}],
    http_compress=True,  # enables gzip compression for request bodies
    http_auth=auth,
    # client_cert = client_cert_path,
    # client_key = client_key_path,
    use_ssl=False,
    verify_certs=False,
    ssl_assert_hostname=False,
    ssl_show_warn=False)

    return client

In [11]:
client = get_opensearch()

In [12]:
client.ping()

True

## Indexing

In [17]:
mapping_path = '/Users/americanthinker/Training/search_fundamentals_course/opensearch/bbuy_products.json'
def get_mapping(file_path: str=mapping_path):
    with open(file_path) as f:
        mapping = json.load(f)
        return mapping

In [63]:
mapping = get_mapping()
mapping

{'settings': {'index.refresh_interval': '5s'},
 'mappings': {'properties': {'sku': {'type': 'long'},
   'productID': {'type': 'integer', 'coerce': True},
   'name': {'type': 'text',
    'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}},
    'analyzer': 'english'},
   'type': {'type': 'text',
    'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
   'regularPrice': {'type': 'float'},
   'salePrice': {'type': 'float'},
   'onSale': {'type': 'boolean'},
   'salesRankShortTerm': {'type': 'integer'},
   'salesRankMediumTerm': {'type': 'integer'},
   'salesRankLongTerm': {'type': 'integer'},
   'bestSellingRank': {'type': 'integer'},
   'url': {'type': 'text',
    'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
   'categoryPath': {'type': 'text',
    'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
   'categoryPathIds': {'type': 'text',
    'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
   'categoryLeaf': {'ty

In [113]:
#client.indices.delete('bbuy_queries')

In [118]:
print(client.cat.indices(params={'v':'true'}))

health status index                                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
yellow open   full-load                                 yHVRCl33QxeTCal-WSWs-Q   1   1    1275077            0    850.1mb        850.1mb
yellow open   search_fun_bulk                           IqX_TVi3QAyyyLCM4VJmYg   1   1          4            0      8.8kb          8.8kb
yellow open   search_fun_revisited_custom_mappings      mUYsVGTGSOWQIumfCsW2aw   1   1          4            0     21.2kb         21.2kb
green  open   opensearch_dashboards_sample_data_flights t72TeXP-RLGfPu2GO5q9kg   1   0      13059            0      5.7mb          5.7mb
yellow open   bbuy_queries                              2q-9Ce5RSOuC3hBF290Znw   1   1    1865269            0    421.3mb        421.3mb
green  open   .kibana_1                                 JnrGXHSsQvq9t9oOKrLRtw   1   0         60            0     41.3kb         41.3kb



In [120]:
client.indices.get_mapping('bbuy_queries')
#client.count(index='bbuy_queries')['count']

{'bbuy_queries': {'mappings': {'properties': {'@timestamp': {'type': 'date'},
    '@version': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'category': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'click_time': {'type': 'date'},
    'query': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256},
      'stemmed': {'type': 'text', 'analyzer': 'english'}}},
    'query_time': {'type': 'date'},
    'sku': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'user': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}}}}}

## Queries

In [125]:
index_name = 'bbuy_queries'
def query(q: str, size: int, fields: list=['category', 'query']):
    _query = {
  'size': size,
  'query': {
    'multi_match': {
      'query': q,
      'fields': fields
    }
  }
}
    return _query

In [127]:
def search(q: str, size: int=10, full: bool=False, fields: list=['name', 'shortDescription'] ):
    resp = client.search(body = query(q, size=size, fields=fields), index = index_name)
    if full:
        return resp
    else: 
        hits = resp['hits']['hits']
        for ind, hit in enumerate(hits, 1):
            for field in fields:
                print(f'{ind}:  {field}:\t\t{hit["_source"][field]}')
        

In [133]:
search('corona', size=10, fields=['query'])

1:  query:		Corona
