In [1]:
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
from elasticsearch_dsl import Q
es_client = Elasticsearch()
from pprint import pprint

In [2]:
# this is the search result object
from app import search_results_data

In [3]:
# we will use elasticsearch-dsl this time; a higher level python es client
# !pip install elasticsearch-dsl

In [17]:
def es_search(query, cutoff = 1000, index="msmacro-full", fields = ["passage", "query"]):
    q = Q({"multi_match": {"query": query, "fields": fields}})

    s = Search(using=es_client, index=index).query(q)

    s.update_from_dict({"size": cutoff})

    response = s.execute()
    response_dict = response.to_dict()
    hits = response_dict['hits']['hits']
    result_count = len(hits)
    return result_count, hits

In [18]:
result_count, hits = es_search("what is python", cutoff = 3, index="msmacro-full")



In [19]:
pprint(hits[0])

{'_id': 'hW5H-HoBVnrKUyODw7l4',
 '_index': 'msmacro-full',
 '_score': 18.307333,
 '_source': {'passage': 'What is the Python Software Foundation?¶. The Python '
                        'Software Foundation is an independent non-profit '
                        'organization that holds the copyright on Python '
                        'versions 2.1 and newer. The PSF’s mission is to '
                        'advance open source technology related to the Python '
                        'programming language and to publicize the use of '
                        'Python.',
             'pid': 2713990,
             'qid': 267012,
             'query': 'how long should i wait to handle my ball python after '
                      'feeding'},
 '_type': '_doc'}


In [20]:
def direct_es_search_result(search_session_id, query_input, hits):
    '''
    this parses the search result and returns the search results object
    '''
    ranks, qids, pids, query_labels, passages, scores = [], [], [], [], [], []

    for index, hit in enumerate(hits):
        ranks.append(index + 1)
        qids.append(hit['_source']['qid'])
        pids.append(hit['_source']['pid'])
        query_labels.append(hit['_source']['query'])
        passages.append(hit['_source']['passage'])
        scores.append(hit['_score'])
    
    result = search_results_data(
        search_session_id = search_session_id,
        query_input = query_input,
        ranks = ranks, 
        qids = qids, 
        pids = pids, 
        query_labels = query_labels, 
        passages = passages, 
        scores = scores
    )

    return result


In [21]:
temp = direct_es_search_result(25234, "aksjdf", hits)

In [22]:
pprint(temp.search_session_id)
pprint(temp.query_input)
pprint(temp.table)

25234
'aksjdf'
[{'passage': 'What is the Python Software Foundation?¶. The Python Software '
             'Foundation is an independent non-profit organization that holds '
             'the copyright on Python versions 2.1 and newer. The PSF’s '
             'mission is to advance open source technology related to the '
             'Python programming language and to publicize the use of Python.',
  'pid': 2713990,
  'qid': 267012,
  'query_label': 'how long should i wait to handle my ball python after '
                 'feeding',
  'rank': 1,
  'score': 18.307333},
 {'passage': 'Python Glob. The module glob (in Python) is what you want to be '
             'using if you are trying to use wildcards with Python. '
             'glob.glob(). In case you need to learn or review about wildcards '
             '(in Mac OS X): koldfyre’s tutorial.And here is the official site '
             'with slightly complicated explanations.he module glob (in '
             'Python) is what you want

# some experiment on efaqa-70

In [26]:
result_count, hits = es_search("你", cutoff = 1000, index="efaqa-70", fields = ["pseudo_passage", "pseudo_query"])
result_count

1000

In [27]:
pprint(hits[0])

{'_id': 'MoDx1HsBVPLpt6nxIGK3',
 '_index': 'efaqa-70',
 '_score': 3.4125881,
 '_source': {'pseudo_passage': '您的观点似乎有些 '
                               "''硬''的感觉人贵有自知的能力，找到一个爱自己的，同时自己也爱他的，两全其美只有学会爱自己，才会真正爱别人！一个真正爱自己的人，别人才会爱她，她也才会真实的爱别人你需要心理成长，爱让你投入到忘我，这本身也是不成熟的表现。正所谓君子有所为，有所不为。如果你想成长自己，更成熟一些，可以和我咨询婚姻里如何权衡好爱人与被爱的关系并不容易。把握好亲密关系不是一味的付出或者索取得到的。面对家庭婚姻和孩子，如何成为积极面对生活的人，一切问题就都不会是问题。专业婚恋咨询可以关注我，希望帮能解除困扰。",
             'pseudo_query': '爱你的人无论怎样都会帮你，为你付出，而你爱的人往往是背叛你的人。当你爱一个人很深时，你会发现已经失去自我，迷失方向，当那个人深深伤你，丢下你，你才会发现你的爱是多么的卑微，原来在那一方的爱根本不存在，于是你陷入自责，自伤，你寻求帮助，寻求安慰，可是你却发现只有自己可以相信有时候还是自我一些好吧现在的生活让我出现了恐慌，也对婚姻生活出现了不信任，不知道该让自己如何走。如果没有孩子我会选择离婚，恢复到原来的自己，好好让自己生活',
             'short_query': '爱你的人无论怎样都会帮你，为你付出，而你爱的人往往是背叛你的人。'},
 '_type': '_doc'}
