In [31]:
%load_ext autoreload
%autoreload 2

import sys
import os
import re
import pandas as pd
from tqdm.notebook import tqdm
from elasticsearch import Elasticsearch

module_path = os.path.dirname(os.getcwd())
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.data_utils import load_pkl_file
from utils.elastic_utils import connect_es, run_question_query
from utils.model_utils import DocumentReader

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Example Notebook

The purpose of this notebook is to demonstrate how to work with NQ data in Elasticsearch and assumes that `prepare_data.py` has been executed per the repo's README.md.

## Connect to Local ElasticSearch Instance

In [3]:
# assumes ElasticSearch is already running
es = connect_es(host='localhost', port=9200)

In [4]:
es.ping()

True

In [5]:
# ensure our demo_index is available
'demo_index' in es.indices.get_alias('*').keys()

True

In [15]:
es.count(index='demo_index')['count']

40971

In [22]:
len(qa_records)

287854

In [6]:
es.indices.get_alias('*')

{'cases': {'aliases': {}},
 'baseline': {'aliases': {}},
 'nq_wiki_data_test': {'aliases': {}},
 'nq_wiki_data': {'aliases': {}},
 'demo_index': {'aliases': {}}}

### Load NQ Question/Answer Records 

In [21]:
# load data
qa_records = load_pkl_file('../data/eval_data/qa_records_fullsys.pkl')

In [21]:
qa_records[0]

{'example_id': 5328212470870865242,
 'document_title': 'The_Mother_(How_I_Met_Your_Mother)',
 'document_url': 'https://en.wikipedia.org//w/index.php?title=The_Mother_(How_I_Met_Your_Mother)&amp;oldid=802354471',
 'question_text': 'how i.met your mother who is the mother',
 'short_answer': 'Tracy McConnell'}

### Ask a Question to ElasticSearch

In [35]:
question = 'Who is the mother in how i met your mother?'

res = run_question_query(es_obj=es,
                           index_name='demo_index',
                           question_text=question,
                           n_results=5)

In [13]:
res

{'took': 296,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 10000, 'relation': 'gte'},
  'max_score': 14.754465,
  'hits': [{'_index': 'demo_index',
    '_type': '_doc',
    '_id': '34652',
    '_score': 14.754465,
    '_source': {'document_title': 'Lily_Aldrin',
     'document_url': 'https://en.wikipedia.org//w/index.php?title=Lily_Aldrin&amp;oldid=836074294',
     'document_text_clean': "Lily Aldrin - wikipedia  Lily Aldrin  Jump to : navigation , search      This article needs additional citations for verification . Please help improve this article by adding citations to reliable sources . Unsourced material may be challenged and removed . ( May 2014 ) ( Learn how and when to remove this template message )       Lily Aldrin     How I Met Your Mother character     Alyson Hannigan as Lily Aldrin     First appearance   `` Pilot ''     Last appearance   `` Last Forever ''     Created by   Carter Bays Craig Thomas 

### Apply BERT Reader to Get Answer

In [32]:
reader = DocumentReader("deepset/bert-base-cased-squad2")



In [36]:
# lets check the first article returned

context = res['hits']['hits'][0]['_source']['document_text_clean']
reader.tokenize(question, context)

print(f'Question: {question} \n')
print(f"Answer: {reader.get_answer()}")



Question: Who is the mother in how i met your mother? 

Answer: Janice Aldrin / Tracy McConnell / 
