In [32]:
import spacy
nlp = spacy.load("en_core_web_lg")
nlp.add_pipe("merge_entities")

<function spacy.pipeline.functions.merge_entities(doc: spacy.tokens.doc.Doc)>

In [33]:
doc = nlp("A leak was noticed from the RCP pump 1A.")
spacy.displacy.render(doc, style="dep")

In [34]:
doc = nlp("RCP pump 1A pressure gauge was found not operating.")
spacy.displacy.render(doc, style="dep")

In [10]:
doc = nlp("RCP pump 1A pressure gauge was found inoperative.")
spacy.displacy.render(doc, style="dep")

In [11]:
doc = nlp("Rupture of pump bearings caused shaft degradation.")
spacy.displacy.render(doc, style="dep")

In [12]:
doc = nlp("Rupture of pump bearings caused shaft degradation and consequent flow reduction.")
spacy.displacy.render(doc, style="dep")

In [13]:
doc = nlp("Pump power supply has been found burnout.")
spacy.displacy.render(doc, style="dep")

In [14]:
doc = nlp("Pump test failed due to power supply failure.")
spacy.displacy.render(doc, style="dep")

In [15]:
doc = nlp("Pump inspection revealed excessive impeller degradation.")
spacy.displacy.render(doc, style="dep")

In [16]:
doc = nlp("Pump inspection revealed excessive impeller degradation likely due to cavitation.")
spacy.displacy.render(doc, style="dep")

In [19]:
doc = nlp("Pump inspection revealed excessive impeller degradation likely due to cavitation.")
spacy.displacy.render(doc, style="ent")



In [27]:
if nlp.has_pipe('entity_ruler'):
    nlp.remove_pipe('entity_ruler')
if nlp.has_pipe('merge_entities'):
    nlp.remove_pipe('merge_entities')

ruler = nlp.add_pipe("entity_ruler")
patterns = [{"label": "ORG", "pattern": "MyCorp Inc."}]
ruler.add_patterns(patterns)
nlp.add_pipe("merge_entities")

doc = nlp("MyCorp Inc. is a company in the U.S.")
print([(ent.text, ent.label_) for ent in doc.ents])
spacy.displacy.render(doc, style='ent')

[('MyCorp Inc.', 'ORG'), ('U.S.', 'GPE')]


In [28]:

spacy.displacy.render(doc, style='dep')

In [23]:
doc = nlp("Alex Smith worked at Acme Corp Inc.")
# If you're not in a Jupyter / IPython environment, use displacy.serve
spacy.displacy.render(doc, options={"fine_grained": True})

## Example that can be used for Relation Extraction after Named Entity Recognizer 

In [31]:
import spacy
from spacy.language import Language
from spacy import displacy

nlp = spacy.load("en_core_web_sm")

# @Language.component("extract_person_orgs")
# def extract_person_orgs(doc):
#     person_entities = [ent for ent in doc.ents if ent.label_ == "PERSON"]
#     for ent in person_entities:
#         head = ent.root.head
#         if head.lemma_ == "work":
#             preps = [token for token in head.children if token.dep_ == "prep"]
#             for prep in preps:
#                 orgs = [token for token in prep.children if token.ent_type_ == "ORG"]
#                 print({'person': ent, 'orgs': orgs, 'past': head.tag_ == "VBD"})
#     return doc

@Language.component("extract_person_orgs")
def extract_person_orgs(doc):
    person_entities = [ent for ent in doc.ents if ent.label_ == "PERSON"]
    for ent in person_entities:
        head = ent.root.head
        if head.lemma_ == "work":
            preps = [token for token in head.children if token.dep_ == "prep"]
            for prep in preps:
                orgs = [t for t in prep.children if t.ent_type_ == "ORG"]
                aux = [token for token in head.children if token.dep_ == "aux"]                
                past_aux = any(t.tag_ == "VBD" for t in aux)                
                past = head.tag_ == "VBD" or head.tag_ == "VBG" and past_aux                
                print({'person': ent, 'orgs': orgs, 'past': past})
    return doc

# To make the entities easier to work with, we'll merge them into single tokens
nlp.add_pipe("merge_entities")
nlp.add_pipe("extract_person_orgs")

doc = nlp("Alex Smith worked at Acme Corp Inc.")
# If you're not in a Jupyter / IPython environment, use displacy.serve
displacy.render(doc, options={"fine_grained": True})

{'person': Alex Smith, 'orgs': [Acme Corp Inc.], 'past': True}
