In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
%matplotlib inline  

import numpy as np

In [11]:
from fnsa.lexicon import get_en2scope, Lexicon
from fnsa.scope import DRScopeDetector, IFScopeDetector, score_candidate
from fnsa.graph import make_graph
from fnsa.util import *
import spacy
nlp = spacy.load('en_core_web_sm')
sentences = load_sentences()

In [12]:
lexicon = Lexicon(nlp)
dr_detector = DRScopeDetector()
if_detector = IFScopeDetector()

In [13]:
def show_detection(sentence, detectors=[dr_detector, if_detector]):
    doc = lexicon(sentence)
    show(doc, index=None, include_text=True, include_tree=True)
    graph = make_graph(doc)
    lex2tokens = build_lex2tokens(doc)
    for detector in detectors: detector(doc, graph, lex2tokens)
    show_tokens(doc, include_annotations=False, include_text=False)
    return doc
for sentence in sentences[-1:]: 
    doc = show_detection(sentence)
    print("\n")

The acquisition would give CIBC control of FirstCaribbean with a stake of 87.4 percent.
 The acquisition [would/if:mod] give [CIBC/xe:org] control of [FirstCaribbean/xe:org] with a [stake/fe:+] of [87.4 percent/xe:per] .
                  give                                              
   ________________|________________________________                 
  |        |              |                        with             
  |        |              |                         |                
  |        |           control                    stake             
  |        |        ______|__________           ____|________        
  |   acquisition  |                 of        |             of     
  |        |       |                 |         |             |       
would     The     CIBC         FirstCaribbean  a        87.4 percent

   [nd||?]                  The -->> =||=||?
   [nd||?]          acquisition -->> =||=||?
 [if||mod]                would -->> =||=||?
   [nd||?]        

## Bug 01. Directionality Scope Fail - PATCHED!

In the sentence

> The number of collection errors fell considerably, and operations speeded up.

the scope of the directionality "fell" should be the financial entity phrase "collection errors."


In [14]:
from fnsa.graph import make_graph, get_distance
from fnsa.util import build_lex2tokens, DEFAULT_LEX, EXTRACTED_ENTITY_LEX
import numpy as np

In [15]:
sentence = "The number of collection errors fell considerably, and operations speeded up."
doc = show_detection(sentence, detectors=[dr_detector, if_detector])

The number of collection errors fell considerably, and operations speeded up.
 The number of [collection errors/fe:-] [fell/dr:-] [considerably/lm:=] , and [operations/fe:+] [speeded up/dr:+] .
                 fell                                   
      ____________|_______________________________       
     |        |       number                      |     
     |        |    _____|___________              |      
     |        |   |                 of        speeded up
     |        |   |                 |             |      
considerably and The         collection error operations
                                    s                   

   [nd||?]                  The -->> =||=||?
   [nd||?]               number -->> =||=||?
   [nd||?]                   of -->> =||=||?
   [fe||-]    collection errors -->> -||=||?
   [dr||-]                 fell -->> =||=||?
   [lm||=]         considerably -->> =||=||?
   [nd||?]                    , -->> =||=||?
   [nd||?]                  and

## Bug 02. Influencer Scope Fail - PATCHED!

In the sentence

> Despite the difficult conditions, the operating profit has jumped 36%.

the scope of the reversal influencer "Despite" should be the LM term "difficult."

In [16]:
sentence = "Despite the difficult conditions, the operating profit has jumped 36%."
doc = show_detection(sentence, detectors=[dr_detector, if_detector])

Despite the difficult conditions, the operating profit has jumped 36%.
 [Despite/if:rev] the [difficult/lm:-] conditions , the [operating profit/fe:+] has [jumped/dr:+] [36%/xe:per] .
        jumped                                      
  ________|________________________________          
 |   |          Despite                    |        
 |   |             |                       |         
 |   |         conditions           operating profit
 |   |     ________|__________             |         
has 36%  the              difficult       the       

 [if||rev]              Despite -->> =||=||?
   [nd||?]                  the -->> =||=||?
   [lm||-]            difficult -->> =||rev||?
   [nd||?]           conditions -->> =||=||?
   [nd||?]                    , -->> =||=||?
   [nd||?]                  the -->> =||=||?
   [fe||+]     operating profit -->> +||rev||?
   [nd||?]                  has -->> =||=||?
   [dr||+]               jumped -->> =||=||?
 [xe||per]                  36% 

## A Simple Example

In [17]:
sentence = "Google expects its EBIT in 2016 to increase 5-10% from 2015."
doc = show_detection(sentence, detectors=[dr_detector, if_detector])

Google expects its EBIT in 2016 to increase 5-10% from 2015.
 [Google/xe:org] expects its [EBIT/fe:+] in [2016/xe:dat] to [increase/dr:+] [5-10%/xe:per] from [2015/xe:dat] .
            expects                  
   ____________|___________           
  |     |      |        increase     
  |     |      |      _____|______    
  |    EBIT    in    |     |     from
  |     |      |     |     |      |   
Google its    2016   to  5-10%   2015

 [xe||org]               Google -->> =||=||?
   [nd||?]              expects -->> =||=||?
   [nd||?]                  its -->> =||=||?
   [fe||+]                 EBIT -->> +||=||?
   [nd||?]                   in -->> =||=||?
 [xe||dat]                 2016 -->> =||=||?
   [nd||?]                   to -->> =||=||?
   [dr||+]             increase -->> =||=||?
 [xe||per]                5-10% -->> =||=||?
   [nd||?]                 from -->> =||=||?
 [xe||dat]                 2015 -->> =||=||?
   [nd||?]                    . -->> =||=||?


## Miscellaneous Stuff - Can be deleted.

In [None]:
import networkx as nx
def make_dir_graph(doc, include_punctuation=False):
    graph = nx.DiGraph()
    for token in doc:
        if token.head.i == token.i: continue
        if include_punctuation or token.pos_ != 'PUNCT':
            s = str(token.head.i)
            if not graph.has_node(s): graph.add_node(s, text="%s-%s" % (s, token.head.text))
            t = str(token.i)
            if not graph.has_node(t): graph.add_node(t, text="%s-%s" % (t, token.text))
            graph.add_edge(s, t, **{'dep':token.dep_})
    return graph

In [None]:
graph = make_graph(doc)
dir_graph = make_dir_graph(doc)
lex2tokens = build_lex2tokens(doc)

In [None]:
sources = if_detector.get_sources(doc, lex2tokens)
print("sources:", sources)
targets = if_detector.get_targets(doc, lex2tokens)
print("targets:", targets)

In [None]:
def score(graph, source, candidate):
    f = get_distance(graph, source, candidate)
    b = get_distance(graph, candidate, source)
    d = min(f, b)
    return d
scores = []
for source in sources:
    print(source)
    candidates = [target for target in targets if np.abs(target.i - source.i) < if_detector.max_delta]
    for candidate in candidates:
        scores.append((source, candidate, score_candidate(graph, source, candidate), np.abs(candidate.i - source.i)))
scores = sorted(scores, key=lambda item: item[-2:])
scores

In [None]:
assigned = set([])
scores = sorted(scores, key=lambda item: item[-2:])
for source, candidate, score in scores:
    print(source, candidate, score, candidate._.direction, source._.category)
    if candidate.i in assigned: continue
    print("Assigning %s to %s." % (source, candidate))
    dr_detector.apply(source, candidate)
    assigned.add(candidate.i)

In [None]:
show(doc, index=None, include_text=True, include_tree=True)
show_tokens(doc, include_annotations=False, include_text=False)