In [None]:
In [74]:
import numpy as np
import requests
import html2text
from googlesearch import search
import json
import re
from simpletransformers.question_answering import QuestionAnsweringModel
from IPython.display import display
from IPython.html import widgets
from bs4 import BeautifulSoup
from markdown import markdown

In [None]:

In [64]:
def query_pages(query, n=5):
    return list(search(query, num=n, stop=n, pause=2))

def query_to_text(query, n=5):
    html_conv = html2text.HTML2Text()
    html_conv.ignore_links = True
    html_conv.escape_all = True
    
    text = []
    for link in query_pages(query, n):
        req = requests.get(link)
        text.append(html_conv.handle(req.text))
        
    return text
    

In [None]:
In [75]:
# Source: https://gist.github.com/lorey/eb15a7f3338f959a78cc3661fbc255fe
def markdown_to_text(markdown_string):
    """ Converts a markdown string to plaintext """

    # md -> html -> text since BeautifulSoup can extract text cleanly
    html = markdown(markdown_string)

    # remove code snippets
    html = re.sub(r'<pre>(.*?)</pre>', ' ', html)
    html = re.sub(r'<code>(.*?)</code >', ' ', html)

    # extract text
    soup = BeautifulSoup(html, "html.parser")
    text = ''.join(soup.findAll(text=True))

    return text

def format_text(text):
    text = markdown_to_text(text)
    text.replace('\n', ' ')
#     text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
#     text = re.sub(r'^http?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
    return text
    

In [None]:
In [76]:
format_text(query_to_text('How many fingers do humans have?', n=1)[0])

In [None]:

Out[76]:
"writing\nHow ToGrammar TipsTrendsInspiration\nProductCompany\nstart writing\n\n\n\n\n\nCan You Call Your Thumb a Finger?\nShundalyn Allen\nWriting\n\nA thumb is a digit, but not technically a finger.\nMany people don’t make the distinction between thumbs and other digits.\n\n\nHow many fingers do you have? Your answer might depend on whether you consider\nyour thumb to be a finger. Let’s look into whether or not you can accurately\ncall your thumb a finger.\nHere’s a tip: Want to make sure your writing always looks great? Grammarly\ncan save you from misspellings, grammatical and punctuation mistakes, and\nother writing issues on all your favorite websites.\nYour writing, at its best.\nBe the best writer in the office.\nGet Grammarly\nFinger Definition\nIf you look up finger on OxfordDictionaries.com, you will find this\ndefinition: “Each of the four slender jointed parts attached to either hand. .\n.” Doesn’t this definition seem to exclude the thumb? Why exclude it?\nThumb vs. Finger\nYour thumb is different from your fingers. Your fingers have two joints and\nthree bones called phalanges or phalanxes. A thumb only has one joint and two\nphalanges. Anatomically, a thumb is unique. The position also sets the thumb\napart. The thumb is out to the side of the hand and lower than the four\nfingers. So if a thumb is not really a finger, what is it?\nThumb Definition\nThumb refers to the first digit of the human hand, set apart and opposable to\nthe other four digits of the hand. Appendages like your fingers, toes, and\nthumbs are digits. So humans usually have five digits per hand, one of which\nis the thumb.\nThumb and Finger Examples\nWhat about in practice? Do writers make the distinction between thumbs and\nfingers? Let’s look at some examples:\nMeanwhile, the original poster, who wished to be identified as Vicky,\nexplained to Buzzfeed that her photo was staged as a joke – and she does\nindeed have five fingers after all.\nNews.com.au\n“We had an ultrasound,” said his mom, Sheryl Lilya. “And they were going,\n‘There’s the five fingers, there’s five fingers here.”\nCBS Minnesota\nIt's very close to a human hand in the way it operates, even though it doesn't\nhave four fingers and a thumb.\nARS Technica Uk\nFleck, asked what kind of television rating he expects Tuesday when WMU plays\nat Kent State, curled his four fingers onto the tip of his thumb to form a\nzero.\nM Live\nWhat has four fingers and a thumb, but no hand?\nPersephone Magazine\nAs the examples indicate, some people make the distinction, some don’t, and\nothers only do to contrast the fingers with the thumb. Whether you make the\ndistinction will probably depend on your personality and your audience. Are\nyou wondering about the answer to the riddle from Persephone Magazine? It’s a\nglove!\n\nYour writing, at its best.\nGet Grammarly for free\nWorks on all your favorite websites\n\nRelated Articles\nWritingGrammarly’s Writing Encyclopedia: 2019 in Language\nFrom A to ZWriting5 Most Effective\nMethods for Avoiding PlagiarismWritingRecent\nGrad? Here's How to Approach Your Job SearchWritingAn\nArgument for Exclamation Points (!!!)WritingHow to Ask for HelpWritingThe\nDos and Don’ts of Business Email Etiquette\nWriting, grammar, and communication tips for your inbox.\nsubscribe\nYou have been successfully subscribed to the Grammarly blog.\nView Comments\nproductPlansGrammarly PremiumGrammarly BusinessGrammarly @eduGrammarly Desktop\nAppsThe Grammarly KeyboardGrammarly for MS OfficecompanyAboutCareers &\nCultureBlogPressAffiliatesContact UsfeaturesProduct Feature NewsTone\nDetectorGrammar CheckerPlagiarism CheckersupportHelp CenterPrivacy PolicyTerms\nof ServiceSecuritycommunityFacebookInstagramTwitterLinkedIn\n2019 © Grammarly Inc."

In [None]:

In [3]:

In [None]:


In [59]:
def create_model():
     return QuestionAnsweringModel('distilbert', 'distilbert-base-uncased-distilled-squad')
    
def predict_answer(model, question, contexts, seq_len=512, debug=False):
    split_context = []
    
    if not isinstance(contexts, list):
        contexts = [contexts]
    
    for context in contexts:
        for i in range(0, len(context), seq_len):
            split_context.append(context[i:i+seq_len])
    
    f_data = []
    
    for i, c in enumerate(split_context):
        f_data.append(
            {'qas': 
              [{'question': question,
               'id': i,
               'answers': [{'text': ' ', 'answer_start': 0}],
               'is_impossible': False}],
              'context': c
            })
        
    prediction = model.predict(f_data)
    if debug:
        print(prediction)
    preds = [x['answer'].lower() for x in prediction]
    return max(set(preds), key = preds.count)

In [None]:
In [60]:
def q_to_a(model, question, n=2, debug=False):
    context = query_to_text(question, n=n)
    pred = predict_answer(model, question, context, debug=debug)
    return pred

In [None]:
In [5]:
model = create_model()

In [None]:

In [6]:
predict_answer(model, 'what color is the bird?', 'the bird is red.')
100%|██████████| 1/1 [00:00<00:00, 436.23it/s]
Converting to features started.
HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

In [None]:

Out[6]:
[{'id': 0, 'answer': 'red'}]
In [32]:
question = 'What is the bone on the back of your skull called?'
context = query_to_text(question, n=3)
pred = predict_answer(model, question, context)
print(pred)
 14%|█▎        | 13/95 [00:00<00:00, 127.41it/s]
Converting to features started.
100%|██████████| 95/95 [00:00<00:00, 150.34it/s]
HBox(children=(IntProgress(value=0, max=12), HTML(value='')))
occipital bone


In [None]:

In [35]:
q_to_a(model, 'What is the bone on the back of your skull called?')
 33%|███▎      | 15/45 [00:00<00:00, 143.47it/s]
Converting to features started.
100%|██████████| 45/45 [00:00<00:00, 131.05it/s]
HBox(children=(IntProgress(value=0, max=6), HTML(value='')))
Out[35]:
'occipital bone'
In [45]:
model = create_model()

In [None]:

In [54]:
text = widgets.Text(description='Question:', width=300)
display(text)

button = widgets.Button(description='Get an Answer')
display(button)

def on_button_click(b)
    answer = q_to_a(model, text.value)
    print('Answer:', answer)
    
button.on_click(on_button_click)
Text(value='', description='Question:')
Button(description='Get an Answer', style=ButtonStyle())
How many fingers do humans have?
 28%|██▊       | 15/53 [00:00<00:00, 144.69it/s]
Converting to features started.
100%|██████████| 53/53 [00:00<00:00, 141.50it/s]
HBox(children=(IntProgress(value=0, max=7), HTML(value='')))
Answer: 
In [57]:
q_to_a(model, 'How many fingers do humans have?')
 32%|███▏      | 17/53 [00:00<00:00, 167.23it/s]
Converting to features started.
100%|██████████| 53/53 [00:00<00:00, 150.85it/s]
HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

In [None]:

Out[57]:
''
In [62]:
answer = q_to_a(model, 'How many fingers do humans have?', debug=True)
 32%|███▏      | 17/53 [00:00<00:00, 165.33it/s]
Converting to features started.
100%|██████████| 53/53 [00:00<00:00, 149.82it/s]
HBox(children=(IntProgress(value=0, max=7), HTML(value='')))
[{'id': 0, 'answer': ''}, {'id': 1, 'answer': 'How many'}, {'id': 2, 'answer': 'four'}, {'id': 3, 'answer': 'four'}, {'id': 4, 'answer': 'five'}, {'id': 5, 'answer': 'five'}, {'id': 6, 'answer': 'thumb'}, {'id': 7, 'answer': ''}, {'id': 8, 'answer': ''}, {'id': 9, 'answer': '356'}, {'id': 10, 'answer': ''}, {'id': 11, 'answer': '443cf72c056c479de112086ea9ccadf9-235x124.jpeg'}, {'id': 12, 'answer': ''}, {'id': 13, 'answer': '2019'}, {'id': 14, 'answer': ''}, {'id': 15, 'answer': 'five'}, {'id': 16, 'answer': 'five'}, {'id': 17, 'answer': 'five'}, {'id': 18, 'answer': 'four'}, {'id': 19, 'answer': ''}, {'id': 20, 'answer': 'two'}, {'id': 21, 'answer': 'Fingers do not contain muscles'}, {'id': 22, 'answer': 'two'}, {'id': 23, 'answer': 'human thumb also has other muscles in the thenar group'}, {'id': 24, 'answer': 'two'}, {'id': 25, 'answer': 'small finger'}, {'id': 26, 'answer': '5'}, {'id': 27, 'answer': '6'}, {'id': 28, 'answer': '8'}, {'id': 29, 'answer': 'fingertips.[8] ### Brain representation Each finger'}, {'id': 30, 'answer': '32'}, {'id': 31, 'answer': '13'}, {'id': 32, 'answer': '500'}, {'id': 33, 'answer': 'seven'}, {'id': 34, 'answer': '17'}, {'id': 35, 'answer': 'five'}, {'id': 36, 'answer': '380 3'}, {'id': 37, 'answer': ''}, {'id': 38, 'answer': ''}, {'id': 39, 'answer': ''}, {'id': 40, 'answer': ''}, {'id': 41, 'answer': '16'}, {'id': 42, 'answer': ''}, {'id': 43, 'answer': ''}, {'id': 44, 'answer': ''}, {'id': 45, 'answer': ''}, {'id': 46, 'answer': ''}, {'id': 47, 'answer': '926793311'}, {'id': 48, 'answer': '2017'}, {'id': 49, 'answer': ''}, {'id': 50, 'answer': ''}, {'id': 51, 'answer': ''}, {'id': 52, 'answer': ''}]

In [None]:

In [61]:
query_to_text('How many fingers do humans have?', n=2)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-61-2c5d8914128b> in <module>
----> 1 query_to_text('How many fingers do humans have?', n=2, debug=True)

TypeError: query_to_text() got an unexpected keyword argument 'debug'