In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
from termcolor import colored
from texar.torch import HParams

from forte import Pipeline
from forte.data.ontology import conll03_ontology
from forte.data.ontology.conll03_ontology import (
    Token, Sentence, EntityMention, PredicateLink)
from forte.data.readers import StringReader
from forte.processors import (
    NLTKWordTokenizer, NLTKSentenceSegmenter, NLTKPOSTagger, SRLPredictor, 
    CoNLLNERPredictor)

# Creates the pipeline here:

## In a pipeline, processors should follow a consistent ontology.

In [None]:
pl = Pipeline()

# Set the reader of the pipeline

In [None]:
pl.set_reader(StringReader())

# Add processors
## The processors can wrap any external tools. For example, we are wrapping some NLTK tools.

In [None]:
pl.add_processor(NLTKSentenceSegmenter())
pl.add_processor(NLTKWordTokenizer())
pl.add_processor(NLTKPOSTagger())

# We now load our own NER predictor

In [None]:
ner_configs = HParams(
    {
        'storage_path': os.path.join('/home/hector/models/NER_model', 'resources.pkl')
    },
    CoNLLNERPredictor.default_hparams())

pl.add_processor(CoNLLNERPredictor(), ner_configs)

# And here is our SRL predictor

In [None]:
srl_configs = HParams(
    {
        'storage_path': '/home/hector/models/SRL_model/',
    },
    SRLPredictor.default_hparams()
)
pl.add_processor(SRLPredictor(), srl_configs)

In [None]:
pl.initialize_processors()

# Our pipeline is ready, now let's try out some text snippets.

In [None]:
search_engine_text = "A Scottish firm is looking to attract web surfers with a search engine that reads out results."\
                " Called Speegle, it has the look and feel of a normal search engine, with the added feature of being able to read"\
                " out the results. Scottish speech technology firm CEC Systems launched the site in November. But experts have"\
                " questioned whether talking search engines are of any real benefit to people with visual impairments. The"\
                " Edinburgh-based firm CEC has married speech technology with ever-popular internet search. The ability to search is"\
                " becoming increasingly crucial to surfers baffled by the huge amount of information available on the web."\

win_medal_text = "British hurdler Sarah Claxton is confident she can win her first major medal at next "\
                "month's European Indoor Championships in Madrid. Claxton will see if her new training "\
                "regime pays dividends at the European Indoors which take place on 5-6 March."

# Process this snippet with one simple command.

In [None]:
pack = pl.process(win_medal_text)

# Now all the results are ready.
## We have added the results as "entries" into our data.
## Let's first take a look at the sentences.

In [None]:
for sentence in pack.get(Sentence):  # returns an iterator of sentences in this pack
    sent_text = sentence.text
    print(colored("Sentence:",'red'), sent_text, "\n")

# We can access more fine-grained data in the sentences using our magical "get" function.
## Let's get all the tokens in the first sentence and print out their Part-of-Speech value.

In [None]:
for sentence in pack.get(Sentence):
    tokens = [(token.text, token.pos_tag) for token in
              pack.get(Token, sentence)]  # get tokens in the span of "sentence"
    print(colored("Tokens:",'red'), tokens, "\n")
    break

## Similarly, we can get all the named entities in the sentences, let's look at their types.

In [None]:
for sentence in pack.get(Sentence):
    for entity in pack.get(EntityMention, sentence):
        print(colored("EntityMention:",'red'), 
              entity.text, 
              'has type', 
              colored(entity.ner_type, 'blue'), "\n")
    break

## With this simple "get" function we can do a lot more. Let's see how one can play with semantic role labeling and NER at the same time.

In [None]:
for sentence in pack.get(Sentence):
    print(colored("Semantic role labels:", 'red'))
    # Here we can get all the links within this sentence.
    for link in pack.get(PredicateLink, sentence):
        parent = link.get_parent()
        child = link.get_child()
        print(f"  - \"{child.text}\" is role {link.arg_type} of predicate \"{parent.text}\"")
        # get entities in the span of predicate args
        entities = [entity.text for entity in pack.get(EntityMention, child)] 
        print("      Has entities:", entities, "\n")
    break

In [None]:
for sentence in pack.get(Sentence):
    for entity in pack.get(EntityMention, sentence):
        print(colored("EntityMention:",'red'), entity.text)
        tokens = [token.text for token in pack.get(Token, entity)]
        print("    Has tokens:", tokens, "\n")