In [18]:
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
from elasticsearch_dsl import Q
es_client = Elasticsearch()
from pprint import pprint

In [12]:
# this is the search result object
from app import search_results_data

In [13]:
# we will use elasticsearch-dsl this time; a higher level python es client
# !pip install elasticsearch-dsl

In [14]:
def es_search(query, cutoff = 1000, index="msmacro-full", fields = ["passage", "query"]):
    q = Q({"multi_match": {"query": query, "fields": fields}})

    s = Search(using=es_client, index=index).query(q)

    s.update_from_dict({"size": cutoff})

    response = s.execute()
    response_dict = response.to_dict()
    hits = response_dict['hits']['hits']
    result_count = len(hits)
    return result_count, hits

In [45]:
result_count, hits = es_search("what is python", cutoff = 3, index="msmacro-full")



In [46]:
pprint(hits[0])

{'_id': 'hW5H-HoBVnrKUyODw7l4',
 '_index': 'msmacro-full',
 '_score': 18.307333,
 '_source': {'passage': 'What is the Python Software Foundation?¶. The Python '
                        'Software Foundation is an independent non-profit '
                        'organization that holds the copyright on Python '
                        'versions 2.1 and newer. The PSF’s mission is to '
                        'advance open source technology related to the Python '
                        'programming language and to publicize the use of '
                        'Python.',
             'pid': 2713990,
             'qid': 267012,
             'query': 'how long should i wait to handle my ball python after '
                      'feeding'},
 '_type': '_doc'}


In [53]:
def direct_es_search_result(search_session_id, query_input, hits):
    '''
    this parses the search result and returns the search results object
    '''
    ranks, qids, pids, query_labels, passages, scores = [], [], [], [], [], []

    for index, hit in enumerate(hits):
        ranks.append(index + 1)
        qids.append(hit['_source']['qid'])
        pids.append(hit['_source']['pid'])
        query_labels.append(hit['_source']['query'])
        passages.append(hit['_source']['passage'])
        scores.append(hit['_score'])
    
    result = search_results_data(
        search_session_id = search_session_id,
        query_input = query_input,
        ranks = ranks, 
        qids = qids, 
        pids = pids, 
        query_labels = query_labels, 
        passages = passages, 
        scores = scores
    )

    return result


In [57]:
temp = direct_es_search_result(25234, "aksjdf", hits)

In [58]:
pprint(temp.search_session_id)
pprint(temp.query_input)
pprint(temp.table)

25234
'aksjdf'
[{'passage': 'What is the Python Software Foundation?¶. The Python Software '
             'Foundation is an independent non-profit organization that holds '
             'the copyright on Python versions 2.1 and newer. The PSF’s '
             'mission is to advance open source technology related to the '
             'Python programming language and to publicize the use of Python.',
  'pid': 2713990,
  'qid': 267012,
  'query_label': 'how long should i wait to handle my ball python after '
                 'feeding',
  'rank': 1,
  'score': 18.307333},
 {'passage': 'Python Glob. The module glob (in Python) is what you want to be '
             'using if you are trying to use wildcards with Python. '
             'glob.glob(). In case you need to learn or review about wildcards '
             '(in Mac OS X): koldfyre’s tutorial.And here is the official site '
             'with slightly complicated explanations.he module glob (in '
             'Python) is what you want