In [None]:
from termcolor import colored
import pickle
import os
from nlp.pipeline.data.readers.conll03_reader import CoNLL03Ontology as Ont
from nlp.pipeline.pipeline import Pipeline
from nlp.pipeline.processors.impl.tokenization_predictor import NLTKWordTokenizer
from nlp.pipeline.processors.impl.sentence_predictor import NLTKSentenceSegmenter
from nlp.pipeline.processors.impl.postag_predictor import NLTKPOSTagger
from nlp.pipeline.processors.impl.srl_predictor import SRLPredictor
from nlp.pipeline.processors.impl.ner_predictor import CoNLLNERPredictor

# Creates the pipeline here

In [None]:
pl = Pipeline()

# The pipeline can wrap any external tools, for example, we are wrapping some NLTK tools.

In [None]:
pl.processors.append(NLTKSentenceSegmenter())
pl.processors.append(NLTKWordTokenizer())
pl.processors.append(NLTKPOSTagger())

# We now load our own NER predictor

In [None]:
# Read the NER config and resources.
ner_path = '/home/hector/models/NER_model/'
ner_resource = pickle.load(open(os.path.join(ner_path, 'resources.pkl'), 'rb'))

# Initialize the NER predictor.
ner_predictor = CoNLLNERPredictor()
ner_predictor.initialize(ner_resource)

# Add it to the processors.
pl.processors.append(ner_predictor)

# And here is our SRL predictor

In [None]:
srl_predictor = SRLPredictor(model_dir="/home/hector/models/SRL_model/")
pl.processors.append(srl_predictor)

# Our pipeline is ready, now let's try out some text snippets.

In [None]:
search_engine_text = "A Scottish firm is looking to attract web surfers with a search engine that reads out results."\
                " Called Speegle, it has the look and feel of a normal search engine, with the added feature of being able to read"\
                " out the results. Scottish speech technology firm CEC Systems launched the site in November. But experts have"\
                " questioned whether talking search engines are of any real benefit to people with visual impairments. The"\
                " Edinburgh-based firm CEC has married speech technology with ever-popular internet search. The ability to search is"\
                " becoming increasingly crucial to surfers baffled by the huge amount of information available on the web."\

win_medal_text = "British hurdler Sarah Claxton is confident she can win her first major medal at next "\
                "month's European Indoor Championships in Madrid."

# Process this snippet with one simple command.

In [None]:
pack = pl.process(win_medal_text)

# Now all the results are ready.
## We have added the results as "entries" into our data.
## Let's take a look at the sentences.

In [None]:
for sentence in pack.get(Ont.Sentence):
    sent_text = sentence.text
    print(colored("Sentence:",'red'), sent_text, "\n")

# We can access more fine-grained data in the sentences using our magical "get" function.
## Let's get all the tokens in the first sentence and print out their Part-of-Speech value.

In [None]:
for sentence in pack.get(Ont.Sentence):
    tokens = [(token.text, token.pos_tag) for token in
              pack.get(Ont.Token, sentence)]
    print(colored("Tokens:",'red'), tokens, "\n")
    break

## Similarly, we can get all the named entities in the sentences, let's look at their types.

In [None]:
for sentence in pack.get(Ont.Sentence):
    for entity in pack.get(Ont.EntityMention, sentence):
        print(colored("EntityMention:",'red'), 
              entity.text, 
              'has type', 
              colored(entity.ner_type, 'blue'), "\n")

## With this simple "get" function we can do a lot more. Let's see how one can play with semantic role labeling and NER at the same time.

In [None]:
for sentence in pack.get(Ont.Sentence):
    print(colored("Semantic role labels:", 'red'))
    # Here we can get all the links within this sentence.
    for link in pack.get(Ont.PredicateLink, sentence):
        parent = link.get_parent()
        child = link.get_child()
        print(f"  - \"{child.text}\" is role {link.arg_type} of predicate \"{parent.text}\"")
        entities = [entity.text for entity in pack.get(Ont.EntityMention, child)]
        print("      Entities in predicate argument:", entities, "\n")
    print()

In [None]:
for sentence in pack.get(Ont.Sentence):
    for entity in pack.get(Ont.EntityMention, sentence):
        print(f"Entity: {entity.text}")
        for token in pack.get(Ont.Token, entity):
            print(f"Has token {token.text}")