In [4]:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import json
import time

In [5]:
es = Elasticsearch()
es

<Elasticsearch([{}])>

In [None]:
def create_index(es_client):
    """ Creates an Elasticsearch index."""
    is_created = False
    # Index settings
    settings = {
        "settings": {
            "number_of_shards": 2,
            "number_of_replicas": 1
        },
        "mappings": {
            "dynamic": "true",
            "_source": {
            "enabled": "true"
            },
            "properties": {
                "body": {
                    "type": "text"
                }
            }
        }
    }
    print('Creating `Question` index...')
    try:
        if es_client.indices.exists(INDEX_NAME):
            es_client.indices.delete(index=INDEX_NAME, ignore=[404])
        es_client.indices.create(index=INDEX_NAME, body=settings)
        is_created = True
        print('index `Question` created successfully.')
    except Exception as ex:
        print(str(ex))
    finally:
        return is_created
    return is_created



def index_data(es_client, data, BATCH_SIZE=100000):
    """ Indexs all the rows in data."""
    docs = []
    count = 0
    for line in data:
        js_object = {}
        js_object['body'] = line
        docs.append(js_object)
        count += 1

        if count % BATCH_SIZE == 0:
            index_batch(docs)
            docs = []
            print('Indexed {} documents.'.format(count))
    if docs:
        index_batch(docs)
        print('Indexed {} documents.'.format(count))

    es_client.indices.refresh(index=INDEX_NAME)
    print("Done indexing.")


def index_batch(docs):
    """ Indexes a batch of documents."""
    requests = []
    for i, doc in enumerate(docs):
        request = doc
        request["_op_type"] = "index"
        request["_index"] = INDEX_NAME
        request["body"] = doc['body']
        requests.append(request)
    bulk(es_client, requests)

def run_query_loop():
    """ Asks user to enter a query to search."""
    while True:
        try:
            handle_query()
        except KeyboardInterrupt:
            break
    return


def handle_query():
    """ Searches the user query and finds the best matches using elasticsearch."""
    query = input("Enter query: ")

    search_start = time.time()
    search = {"size": SEARCH_SIZE,"query": {"match": {"body": query}}}
    print(search)
    response = es_client.search(index=INDEX_NAME, body=json.dumps(search))
    search_time = time.time() - search_start
    print()
    print("{} total hits.".format(response["hits"]["total"]["value"]))
    print("search time: {:.2f} ms".format(search_time * 1000))
    for hit in response["hits"]["hits"]:
        print("id: {}, score: {}".format(hit["_id"], hit["_score"]))
        print(hit["_source"])
        print()
