In [4]:
import spacy

nlp = spacy.blank("en")

# Adding and storing entity_ruler component
entity_ruler = nlp.add_pipe("entity_ruler")

# Defining Entity Patterns
patterns = [
    # Phrase entity pattern (exact match)
    {"label": "ORG", "pattern": "Microsoft"},

    # Token entity pattern (case-insensitive)
    {"label": "GPE", "pattern": [
        {"LOWER": "san"}, 
        {"LOWER": "francisco"}
    ]},

    # Additional Patterns
    {"label": "ORG", "pattern": "Manhattan Associates"},
    {"label": "PERSON", "pattern": "John Smith"}
]

# Adding patterns to entity ruler
entity_ruler.add_patterns(patterns)

# Process text
text = "Microsoft is headquartered in Seattle. San Francisco is a beautiful city."
doc = nlp(text)

# Extract Entities
for ent in doc.ents:
    print(ent.text, " - ", ent.label_)



Microsoft  -  ORG
San Francisco  -  GPE


In [None]:
# EntityRuler with existing NER model
nlp = spacy.load("en_core_web_sm")

# adding entityruler before existing NER model
nlp.add_pipe("entity_ruler", before="ner")

# get entityruler
ruler = nlp.get_pipe("entity_ruler")
ruler.add_patterns([
    {"label": "ORG", "pattern": "Manhattan Associates"}
])

text = "Manhattan Associates is a company in the US"
doc = nlp(text)

# Extract Entities
for ent in doc.ents:
    print(ent.text, " - ", ent.label_)

"""
Manhattan Associates  -  ORG
US  -  GPE
"""

nlp.analyze_pipes(pretty=True)
    # adds entity ruler to the existing pipe


Manhattan Associates  -  ORG
US  -  GPE
[1m

#   Component         Assigns               Requires   Scores             Retokenizes
-   ---------------   -------------------   --------   ----------------   -----------
0   tok2vec           doc.tensor                                          False      
                                                                                     
1   tagger            token.tag                        tag_acc            False      
                                                       pos_acc                       
                                                       tag_micro_p                   
                                                       tag_micro_r                   
                                                       tag_micro_f                   
                                                                                     
2   parser            token.dep                        dep_uas            False      
        

{'summary': {'tok2vec': {'assigns': ['doc.tensor'],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'tagger': {'assigns': ['token.tag'],
   'requires': [],
   'scores': ['tag_acc',
    'pos_acc',
    'tag_micro_p',
    'tag_micro_r',
    'tag_micro_f'],
   'retokenizes': False},
  'parser': {'assigns': ['token.dep',
    'token.head',
    'token.is_sent_start',
    'doc.sents'],
   'requires': [],
   'scores': ['dep_uas',
    'dep_las',
    'dep_las_per_type',
    'sents_p',
    'sents_r',
    'sents_f'],
   'retokenizes': False},
  'attribute_ruler': {'assigns': [],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'lemmatizer': {'assigns': ['token.lemma'],
   'requires': [],
   'scores': ['lemma_acc'],
   'retokenizes': False},
  'entity_ruler': {'assigns': ['doc.ents', 'token.ent_type', 'token.ent_iob'],
   'requires': [],
   'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'],
   'retokenizes': False},
  'ner': {'assigns': ['doc.ents', 'token.ent_