# Using Fast and Powerful Full-text search Engine - Elasticsearch 

To setup elastic search using Docker single node:

```
docker pull docker.elastic.co/elasticsearch/elasticsearch:7.10.0  
docker run -d -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:7.10.0
```

**Verify Elasticsearch is running**

In [1]:
!curl -XGET "localhost:9200"

{
  "name" : "fdccf21a1a80",
  "cluster_name" : "docker-cluster",
  "cluster_uuid" : "9Q1vsCQXQd6hrzheq0YiiA",
  "version" : {
    "number" : "7.10.0",
    "build_flavor" : "default",
    "build_type" : "docker",
    "build_hash" : "51e9d6f22758d0374a0f3f5c6e8f3a7997850f96",
    "build_date" : "2020-11-09T21:30:33.964949Z",
    "build_snapshot" : false,
    "lucene_version" : "8.7.0",
    "minimum_wire_compatibility_version" : "6.8.0",
    "minimum_index_compatibility_version" : "6.0.0-beta1"
  },
  "tagline" : "You Know, for Search"
}


**Import necessary libraries**

In [2]:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import json
import time

In [4]:
%%writefile elastic.py

def create_index(es_client):
    """ Creates an Elasticsearch index."""
    is_created = False
    # Index settings
    settings = load_index_settings(INDEX_FILE)
    print(f'[*] Creating `{INDEX_NAME}` index...')
    try:
        if es_client.indices.exists(INDEX_NAME):
            es_client.indices.delete(index=INDEX_NAME, ignore=[404])
        es_client.indices.create(index=INDEX_NAME, body=settings)
        is_created = True
        print(f'[*] Index `{INDEX_NAME}` created successfully.')
    except Exception as ex:
        print(str(ex))
    finally:
        return is_created
    return is_created



def index_data(es_client, data, BATCH_SIZE=10000):
    """ Indexs all the rows in data."""
    docs = []
    count = 0
    for line in data:
        js_object = {}
        js_object['body'] = line
        docs.append(js_object)
        count += 1

        if count % BATCH_SIZE == 0:
            index_batch(docs)
            docs = []
            print('Indexed {} documents.'.format(count))
    if docs:
        index_batch(docs)
        print('Indexed {} documents.'.format(count))

    es_client.indices.refresh(index=INDEX_NAME)
    print("Done indexing.")


def index_batch(docs):
    """ Indexes a batch of documents."""
    requests = []
    for i, doc in enumerate(docs):
        request = doc
        request["_op_type"] = "index"
        request["_index"] = INDEX_NAME
        request["body"] = doc['body']
        requests.append(request)
    bulk(es_client, requests)

def run_query_loop():
    """ Asks user to enter a query to search."""
    while True:
        try:
            handle_query()
        except KeyboardInterrupt:
            break
    return


def handle_query():
    """ Searches the user query and finds the best matches using elasticsearch."""
    query = input("Enter query: ")

    search_start = time.time()
    search = {"size": SEARCH_SIZE,"query": {"match": {"body": query}}}
    print(search)
    response = es_client.search(index=INDEX_NAME, body=json.dumps(search))
    search_time = time.time() - search_start
    print()
    print("{} total hits.".format(response["hits"]["total"]["value"]))
    print("search time: {:.2f} ms".format(search_time * 1000))
    for hit in response["hits"]["hits"]:
        print("id: {}, score: {}".format(hit["_id"], hit["_score"]))
        print(hit["_source"])
        print()

Writing elastic.py


In [5]:
%reload_ext autoreload
%autoreload 2
from elastic import *