
> Order in how each is method is called after loading text data and requesting an answer.

```
_load_path()
_load_path()
_load_path()
add_url()
texts_from_url()
answer()
_load_context()
_load_spelling_context()
_save_spelling_context()
build_paragraph()
max_model()
```

In [48]:
from qaam_nlp import QAAM

models = {
    "spacy-sm": "en_core_web_sm",
    "spacy-lg": "en_core_web_lg",
    "bert-uncased": "en_trf_bertbaseuncased_lg"
}

qaam = QAAM(model=models["spacy-lg"], top_k=15)
qaam.add_url("http://dfaff921.ngrok.io")

In [49]:
# lets load the blog's texts to the model:
qaam.texts_from_url("https://explosion.ai/blog/spacy-transformers")
print(f"number of tokens extracted: {len(qaam.doc)}")

number of tokens extracted: 2840


In [50]:
from pprint import pprint

prediction = qaam.answer("How can I fine-tunme a Trnsformer model?")
pprint(prediction)

{'answer': 'Internally, the transformer model will predict over sentences',
 'context': 'Internally, the transformer model will predict over sentences, '
            'and the resulting tensor features will be reconstructed to '
            'produce document-level annotations.'}


In [51]:
qaam.history["spelling"][0]

'How can I fine-tune a transformer model?'

In [52]:
# QAAM has built it methods to handle spelling errors from a lg
# english dictionary and the actual context of the texts extracted.
pprint(qaam.answer("What to do to improev predictions?"))

{'answer': 'perform length-based subbatching internally',
 'context': 'In order to further improve efficiency and reduce memory '
            'requirements, we also perform length-based subbatching '
            'internally. The aligned tokenization should be especially helpful '
            'for answering questions like "Do these two transformers pay '
            'attention to the same words?".'}


In [53]:
qaam.history["spelling"][-1:]

['What to do to improve predictions?']

In [54]:
pprint(qaam.answer("Is there anything wrong with this approach?"))

{'answer': 'a disadvantage when the model is small or data is limited',
 'context': 'This (slightly more) "blank slate" approach is a disadvantage '
            'when the model is small or data is limited, but with a big enough '
            'model and sufficient examples, transformers are able to reach a '
            'much more subtle understanding of linguistic information.'}


In [55]:
qaam.history["spelling"][-1:]

['Is there anything wrong with this approach?']

In [56]:
embed_question = qaam.embedd_sequence(qaam.history["spelling"][1])
embed_context = qaam.embedd_sequence(qaam.history["context"][1])
similary = qaam.cosine_distance(embed_question, embed_context)
print(similary)

0.6673790216445923


In [57]:
prediction = qaam.answer("How do models pay attention to same words in a sentence or context?")
pprint(prediction)

{'answer': 'aligned tokenization',
 'context': 'The aligned tokenization should be especially helpful for '
            'answering questions like "Do these two transformers pay attention '
            'to the same words?".'}


In [58]:
import random
def similarities(doc_a, doc_b):
    embed_a = qaam.embedd_sequence(doc_a)
    embed_b = qaam.embedd_sequence(doc_b)
    return qaam.cosine_distance(embed_a, embed_b)

In [59]:
random.seed(0)
X = [0, 1, 2, 3] * 2
Y = [1, 0, 1, 3] * 3
random.shuffle(X)
random.shuffle(Y)

results = []
for x, y in zip(X, Y):
    (Q, C, A) = (
        qaam.history['spelling'][x],
        qaam.history['context'][y],
        qaam.history["answer"][y],)
    S = similarities(Q, C)
    results.append((Q, A, S))
results = sorted(results, key=lambda i: i[2], reverse=True)

In [60]:
for doc in results: print(doc)

('How can I fine-tune a transformer model?', 'Internally, the transformer model will predict over sentences', 0.6673790216445923)
('How can I fine-tune a transformer model?', 'Internally, the transformer model will predict over sentences', 0.6673790216445923)
('How can I fine-tune a transformer model?', 'Internally, the transformer model will predict over sentences', 0.6673790216445923)
('How can I fine-tune a transformer model?', 'a disadvantage when the model is small or data is limited', 0.5869237780570984)
('What to do to improve predictions?', 'Internally, the transformer model will predict over sentences', 0.5800701379776001)
('Is there anything wrong with this approach?', 'Internally, the transformer model will predict over sentences', 0.4755496084690094)
('Is there anything wrong with this approach?', 'Internally, the transformer model will predict over sentences', 0.4755496084690094)
('What to do to improve predictions?', 'a disadvantage when the model is small or data is limi

In [64]:
from collections import Counter
def get_blog_entities(spacy_doc: object, top_k=10):
    entities = dict()
    for ent in spacy_doc.ents:
        ent = ent.text
        if ent not in entities:
            entities[ent] = 1
        else:
            entities[ent] += 1
    return Counter(entities).most_common(top_k)

get_blog_entities(qaam.doc, top_k=15)

[('BERT', 6),
 ('NLP', 6),
 ('Prodigy', 4),
 ('2019', 4),
 ('API', 3),
 ('one', 3),
 ('two', 3),
 ('GPT-2', 2),
 ('GPU', 2),
 ('TPU', 2),
 ('Transformers', 2),
 ('IMDB', 2),
 ('❄', 2),
 ('XLNet', 1),
 ("Hugging Face's", 1)]