In [1]:
from adaptnlp import EasyTokenTagger

# Examples of using EasyTokenTagger

In [2]:
# Set example text and instantiate tagger instance
example_text = '''Novetta Solutions is the best. Albert Einstein used to be employed at Novetta Solutions. 
The Wright brothers loved to visit the JBF headquarters, and they would have a chat with Albert.'''
tagger = EasyTokenTagger()

## Named Entity Recognition

In [3]:
# Tag the string
sentences = tagger.tag_text(text = example_text, model_name_or_path = "ner-ontonotes")

2020-02-14 07:05:33,824 loading file /home/ubuntu/.flair/models/en-ner-ontonotes-v0.4.pt


In [4]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    print(sen.to_tagged_string())

List string outputs of tags:

Novetta <B-ORG> Solutions <E-ORG> is the best . Albert <B-PERSON> Einstein <E-PERSON> used to be employed at Novetta <B-ORG> Solutions <E-ORG> . The Wright <S-PERSON> brothers loved to visit the JBF <S-ORG> headquarters , and they would have a chat with Albert <S-PERSON> .


In [5]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("ner"):
        print(entity)

List entities tagged:

ORG-span [1,2]: "Novetta Solutions"
PERSON-span [7,8]: "Albert Einstein"
ORG-span [14,15]: "Novetta Solutions"
PERSON-span [18]: "Wright"
ORG-span [24]: "JBF"
PERSON-span [34]: "Albert"


In [6]:
print("Get json of tagged information:\n")
for sen in sentences:
    print(sen.to_dict(tag_type="ner"),"\n")

Get json of tagged information:

{'text': 'Novetta Solutions is the best. Albert Einstein used to be employed at Novetta Solutions.  The Wright brothers loved to visit the JBF headquarters, and they would have a chat with Albert.', 'labels': [], 'entities': [{'text': 'Novetta Solutions', 'start_pos': 0, 'end_pos': 17, 'type': 'ORG', 'confidence': 0.9644385576248169}, {'text': 'Albert Einstein', 'start_pos': 31, 'end_pos': 46, 'type': 'PERSON', 'confidence': 0.9968637228012085}, {'text': 'Novetta Solutions', 'start_pos': 70, 'end_pos': 87, 'type': 'ORG', 'confidence': 0.9795641601085663}, {'text': 'Wright', 'start_pos': 94, 'end_pos': 100, 'type': 'PERSON', 'confidence': 0.9994567036628723}, {'text': 'JBF', 'start_pos': 129, 'end_pos': 132, 'type': 'ORG', 'confidence': 0.9897594451904297}, {'text': 'Albert', 'start_pos': 179, 'end_pos': 185, 'type': 'PERSON', 'confidence': 0.9998805522918701}]} 



## Parts of Speech

In [7]:
sentences = tagger.tag_text(text = example_text, model_name_or_path = "pos")

2020-02-14 07:05:42,907 loading file /home/ubuntu/.flair/models/en-pos-ontonotes-v0.4.pt


In [8]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    print(sen.to_tagged_string())

List string outputs of tags:

Novetta <PROPN> Solutions <PROPN> is <VERB> the <DET> best <ADJ> . <PUNCT> Albert <PROPN> Einstein <PROPN> used <VERB> to <PART> be <VERB> employed <VERB> at <ADP> Novetta <PROPN> Solutions <PROPN> . <PUNCT> The <DET> Wright <PROPN> brothers <NOUN> loved <VERB> to <PART> visit <VERB> the <DET> JBF <PROPN> headquarters <NOUN> , <PUNCT> and <CCONJ> they <PRON> would <AUX> have <VERB> a <DET> chat <NOUN> with <ADP> Albert <PROPN> . <PUNCT>


In [9]:
print("List text/entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("pos"):
        print(entity)

List text/entities tagged:

PROPN-span [1]: "Novetta"
PROPN-span [2]: "Solutions"
VERB-span [3]: "is"
DET-span [4]: "the"
ADJ-span [5]: "best"
PUNCT-span [6]: "."
PROPN-span [7]: "Albert"
PROPN-span [8]: "Einstein"
VERB-span [9]: "used"
PART-span [10]: "to"
VERB-span [11]: "be"
VERB-span [12]: "employed"
ADP-span [13]: "at"
PROPN-span [14]: "Novetta"
PROPN-span [15]: "Solutions"
PUNCT-span [16]: "."
DET-span [17]: "The"
PROPN-span [18]: "Wright"
NOUN-span [19]: "brothers"
VERB-span [20]: "loved"
PART-span [21]: "to"
VERB-span [22]: "visit"
DET-span [23]: "the"
PROPN-span [24]: "JBF"
NOUN-span [25]: "headquarters"
PUNCT-span [26]: ","
CCONJ-span [27]: "and"
PRON-span [28]: "they"
AUX-span [29]: "would"
VERB-span [30]: "have"
DET-span [31]: "a"
NOUN-span [32]: "chat"
ADP-span [33]: "with"
PROPN-span [34]: "Albert"
PUNCT-span [35]: "."


In [10]:
print("Get json of tagged information:\n")
for sen in sentences:
    print(sen.to_dict(tag_type="pos"),"\n")

Get json of tagged information:

{'text': 'Novetta Solutions is the best. Albert Einstein used to be employed at Novetta Solutions.  The Wright brothers loved to visit the JBF headquarters, and they would have a chat with Albert.', 'labels': [], 'entities': [{'text': 'Novetta', 'start_pos': 0, 'end_pos': 7, 'type': 'PROPN', 'confidence': 0.996782660484314}, {'text': 'Solutions', 'start_pos': 8, 'end_pos': 17, 'type': 'PROPN', 'confidence': 0.9987585544586182}, {'text': 'is', 'start_pos': 18, 'end_pos': 20, 'type': 'VERB', 'confidence': 0.9999998807907104}, {'text': 'the', 'start_pos': 21, 'end_pos': 24, 'type': 'DET', 'confidence': 0.999997615814209}, {'text': 'best', 'start_pos': 25, 'end_pos': 29, 'type': 'ADJ', 'confidence': 0.988896906375885}, {'text': '.', 'start_pos': 29, 'end_pos': 30, 'type': 'PUNCT', 'confidence': 0.9999998807907104}, {'text': 'Albert', 'start_pos': 31, 'end_pos': 37, 'type': 'PROPN', 'confidence': 0.9998212456703186}, {'text': 'Einstein', 'start_pos': 38, 'en

## Chunk

In [11]:
sentences = tagger.tag_text(text = example_text, model_name_or_path = "chunk")

2020-02-14 07:05:44,730 loading file /home/ubuntu/.flair/models/en-chunk-conll2000-v0.4.pt


In [12]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    print(sen.to_tagged_string())

List string outputs of tags:

Novetta <B-NP> Solutions <E-NP> is <S-VP> the <B-NP> best <E-NP> . Albert <B-NP> Einstein <E-NP> used <B-VP> to <I-VP> be <I-VP> employed <E-VP> at <S-PP> Novetta <B-NP> Solutions <E-NP> . The <B-NP> Wright <I-NP> brothers <E-NP> loved <B-VP> to <I-VP> visit <E-VP> the <B-NP> JBF <I-NP> headquarters <E-NP> , and they <S-NP> would <B-VP> have <E-VP> a <B-NP> chat <E-NP> with <S-PP> Albert <S-NP> .


In [13]:
print("List text/entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("np"):
        print(entity)

List text/entities tagged:

NP-span [1,2]: "Novetta Solutions"
VP-span [3]: "is"
NP-span [4,5]: "the best"
NP-span [7,8]: "Albert Einstein"
VP-span [9,10,11,12]: "used to be employed"
PP-span [13]: "at"
NP-span [14,15]: "Novetta Solutions"
NP-span [17,18,19]: "The Wright brothers"
VP-span [20,21,22]: "loved to visit"
NP-span [23,24,25]: "the JBF headquarters"
NP-span [28]: "they"
VP-span [29,30]: "would have"
NP-span [31,32]: "a chat"
PP-span [33]: "with"
NP-span [34]: "Albert"


## Frame

In [14]:
sentences = tagger.tag_text(text = example_text, model_name_or_path = "frame")

2020-02-14 07:05:45,281 loading file /home/ubuntu/.flair/models/en-frame-ontonotes-v0.4.pt


In [15]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    print(sen.to_tagged_string())

List string outputs of tags:

Novetta <_> Solutions <_> is <be.01> the <_> best <_> . <_> Albert <_> Einstein <_> used <use.03> to <_> be <be.03> employed <employ.01> at <_> Novetta <_> Solutions <_> . <_> The <_> Wright <_> brothers <_> loved <love.02> to <_> visit <visit.01> the <_> JBF <_> headquarters <_> , <_> and <_> they <_> would <_> have <have.03> a <_> chat <chat.01> with <_> Albert <_> . <_>


## Fast Sequence Labeling
#### Make sure to check out some of Flair's "fast" cpu-minded models that produce similar results to SOTA models

### NER

In [16]:
# Tag the string
sentences = tagger.tag_text(text = example_text, model_name_or_path = "ner-ontonotes-fast")

2020-02-14 07:05:46,098 loading file /home/ubuntu/.flair/models/en-ner-ontonotes-fast-v0.4.pt


In [17]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("ner"):
        print(entity)

List entities tagged:

ORG-span [1,2]: "Novetta Solutions"
PERSON-span [7,8]: "Albert Einstein"
ORG-span [14,15]: "Novetta Solutions"
PERSON-span [18]: "Wright"
ORG-span [24]: "JBF"
PERSON-span [34]: "Albert"


## Tag Tokens with All Loaded Models At Once

#### The tagger now has multiple models loaded after tagging all of the previous text
#### This means with one method call of `tag_all(text)`, we can tag the text with all the loaded models

In [18]:
sentences = tagger.tag_all(example_text)

In [19]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("ner"):
        print(entity)

List entities tagged:

ORG-span [1,2]: "Novetta Solutions"
PERSON-span [7,8]: "Albert Einstein"
ORG-span [14,15]: "Novetta Solutions"
PERSON-span [18]: "Wright"
ORG-span [24]: "JBF"
PERSON-span [34]: "Albert"


In [20]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("pos"):
        print(entity)

List entities tagged:

PROPN-span [1]: "Novetta"
PROPN-span [2]: "Solutions"
VERB-span [3]: "is"
DET-span [4]: "the"
ADJ-span [5]: "best"
PUNCT-span [6]: "."
PROPN-span [7]: "Albert"
PROPN-span [8]: "Einstein"
VERB-span [9]: "used"
PART-span [10]: "to"
VERB-span [11]: "be"
VERB-span [12]: "employed"
ADP-span [13]: "at"
PROPN-span [14]: "Novetta"
PROPN-span [15]: "Solutions"
PUNCT-span [16]: "."
DET-span [17]: "The"
PROPN-span [18]: "Wright"
NOUN-span [19]: "brothers"
VERB-span [20]: "loved"
PART-span [21]: "to"
VERB-span [22]: "visit"
DET-span [23]: "the"
PROPN-span [24]: "JBF"
NOUN-span [25]: "headquarters"
PUNCT-span [26]: ","
CCONJ-span [27]: "and"
PRON-span [28]: "they"
AUX-span [29]: "would"
VERB-span [30]: "have"
DET-span [31]: "a"
NOUN-span [32]: "chat"
ADP-span [33]: "with"
PROPN-span [34]: "Albert"
PUNCT-span [35]: "."


In [21]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("np"):
        print(entity)

List entities tagged:

NP-span [1,2]: "Novetta Solutions"
VP-span [3]: "is"
NP-span [4,5]: "the best"
NP-span [7,8]: "Albert Einstein"
VP-span [9,10,11,12]: "used to be employed"
PP-span [13]: "at"
NP-span [14,15]: "Novetta Solutions"
NP-span [17,18,19]: "The Wright brothers"
VP-span [20,21,22]: "loved to visit"
NP-span [23,24,25]: "the JBF headquarters"
NP-span [28]: "they"
VP-span [29,30]: "would have"
NP-span [31,32]: "a chat"
PP-span [33]: "with"
NP-span [34]: "Albert"
