In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
%matplotlib inline  

import numpy as np

In [2]:
from fnsa.lexicon import get_en2scope, Lexicon
from fnsa.scope import DRScopeDetector, IFScopeDetector, score_candidate
from fnsa.graph import make_graph
from fnsa.util import *
import spacy
nlp = spacy.load('en_core_web_sm')
sentences = load_sentences()

In [3]:
lexicon = Lexicon(nlp)
dr_detector = DRScopeDetector()
if_detector = IFScopeDetector()

In [4]:
def show_detection(sentence, detectors=[dr_detector, if_detector]):
    doc = lexicon(sentence)
    show(doc, index=None, include_text=True, include_tree=True)
    graph = make_graph(doc)
    lex2tokens = build_lex2tokens(doc)
    for detector in detectors: detector(doc, graph, lex2tokens)
    show_tokens(doc, include_annotations=False, include_text=False)
    return doc
for sentence in sentences[-1:]: 
    doc = show_detection(sentence)
    print("\n")

The acquisition would give CIBC control of FirstCaribbean with a stake of 87.4 percent.
 The acquisition [would/if:mod] give [CIBC/xe:org] control of [FirstCaribbean/xe:org] with a [stake/fe:+] of [87.4 percent/xe:per] .
                  give                                              
   ________________|________________________________                 
  |        |              |                        with             
  |        |              |                         |                
  |        |           control                    stake             
  |        |        ______|__________           ____|________        
  |   acquisition  |                 of        |             of     
  |        |       |                 |         |             |       
would     The     CIBC         FirstCaribbean  a        87.4 percent

   [nd||?]                  The -->> =||=||?
   [nd||?]          acquisition -->> =||=||?
 [if||mod]                would -->> =||=||?
   [nd||?]        

## Scope too Wide Bug - PATCHED!

In the sentence

> Comptel Corporation has received a significant long-term order as the mediation process wraps up.

the litigious influencer "mediation" should only apply to one financial entity.

In [5]:
from fnsa.graph import make_graph, get_distance
from fnsa.util import build_lex2tokens, DEFAULT_LEX, EXTRACTED_ENTITY_LEX
import numpy as np

In [8]:
sentence = 'Comptel Corporation has made significant improvements even while the long-term order is under mediation.'
sentence = 'The move was triggered by weak demand for forestry equipment and the uncertain market situation .'
doc = show_detection(sentence, detectors=[dr_detector, if_detector])

The move was triggered by weak demand for forestry equipment and the uncertain market situation .
 The [move/lm:=] was triggered by [weak/lm:-] [demand/fe:+] for forestry equipment and the [uncertain/if:mod] [market situation/fe:+] .
              triggered                                         
  ________________|_________                                     
 |   |                      by                                  
 |   |                      |                                    
 |   |                    demand                                
 |   |     _________________|________________                    
 |   |    |       |        for               |                  
 |   |    |       |         |                |                   
 |  move  |       |     equipment     market situation          
 |   |    |       |         |       _________|_____________      
was The  weak    and     forestry the                  uncertain

   [nd||?]                  The -->> =||=||?


In [None]:
lex2tokens = build_lex2tokens(doc)
graph = make_graph(doc)
sources = if_detector.get_sources(doc, lex2tokens)
print("sources:", sources)
targets = if_detector.get_targets(doc, lex2tokens)
print("targets:", targets)

In [None]:
sources = if_detector.get_sources(doc, lex2tokens)
targets = if_detector.get_targets(doc, lex2tokens)
scores = []
for source in sources:
    candidates = [target for target in targets if np.abs(target.i - source.i) < if_detector.max_delta]
    for candidate in candidates:
        scores.append((source, candidate, score_candidate(graph, source, candidate), np.abs(candidate.i - source.i)))
scores = sorted(scores, key=lambda item: item[-2:])
source_assigned = set([])
target_assigned = set([])
for source, candidate, *rest in scores:
    if candidate.i in source_assigned or source.i in target_assigned: continue
    if_detector.apply(source, candidate)
    source_assigned.add(candidate.i)
    target_assigned.add(source.i)



## Directionality Scope Bug - PATCHED!

In the sentence

> The number of collection errors fell considerably, and operations speeded up.

the scope of the directionality "fell" should be the financial entity phrase "collection errors."


In [None]:
sentence = "The number of collection errors fell considerably, and operations speeded up."
doc = show_detection(sentence, detectors=[dr_detector, if_detector])

## Influencer Scope Bug - PATCHED!

In the sentence

> Despite the difficult conditions, the operating profit has jumped 36%.

the scope of the reversal influencer "Despite" should be the LM term "difficult."

In [None]:
sentence = "Despite the difficult conditions, the operating profit has jumped 36%."
doc = show_detection(sentence, detectors=[dr_detector, if_detector])

## A Simple Example

In [None]:
sentence = "Google expects its EBIT in 2016 to increase 5-10% from 2015."
doc = show_detection(sentence, detectors=[dr_detector, if_detector])

## Miscellaneous Stuff - Can be deleted.

In [None]:
import networkx as nx
def make_dir_graph(doc, include_punctuation=False):
    graph = nx.DiGraph()
    for token in doc:
        if token.head.i == token.i: continue
        if include_punctuation or token.pos_ != 'PUNCT':
            s = str(token.head.i)
            if not graph.has_node(s): graph.add_node(s, text="%s-%s" % (s, token.head.text))
            t = str(token.i)
            if not graph.has_node(t): graph.add_node(t, text="%s-%s" % (t, token.text))
            graph.add_edge(s, t, **{'dep':token.dep_})
    return graph

In [None]:
graph = make_graph(doc)
dir_graph = make_dir_graph(doc)
lex2tokens = build_lex2tokens(doc)

In [None]:
sources = if_detector.get_sources(doc, lex2tokens)
print("sources:", sources)
targets = if_detector.get_targets(doc, lex2tokens)
print("targets:", targets)

In [None]:
def score(graph, source, candidate):
    f = get_distance(graph, source, candidate)
    b = get_distance(graph, candidate, source)
    d = min(f, b)
    return d
scores = []
for source in sources:
    print(source)
    candidates = [target for target in targets if np.abs(target.i - source.i) < if_detector.max_delta]
    for candidate in candidates:
        scores.append((source, candidate, score_candidate(graph, source, candidate), np.abs(candidate.i - source.i)))
scores = sorted(scores, key=lambda item: item[-2:])
scores

In [None]:
assigned = set([])
scores = sorted(scores, key=lambda item: item[-2:])
for source, candidate, score in scores:
    print(source, candidate, score, candidate._.direction, source._.category)
    if candidate.i in assigned: continue
    print("Assigning %s to %s." % (source, candidate))
    dr_detector.apply(source, candidate)
    assigned.add(candidate.i)

In [None]:
show(doc, index=None, include_text=True, include_tree=True)
show_tokens(doc, include_annotations=False, include_text=False)