# Custom Components/Modifications

In [11]:
import spacy

In [12]:
nlp = spacy.load("en_core_web_sm")
doc = nlp("Britain is a place. Mary is a doctor.")

In [13]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Britain GPE
Mary PERSON


### Modifications

We want to remove all instances of GPE.

In [14]:
from spacy.language import Language

### We create a component to add to the pipeline

In [15]:
@Language.component("remove_gpe")

def remove_gpe(doc):
    original_ents = list(doc.ents)

    for ent in original_ents:
        if ent.label_ == "GPE":
            original_ents.remove(ent)
    doc.ents = original_ents

    return doc

In [16]:

#- Adding to the pipeline
nlp.add_pipe("remove_gpe")
nlp.analyze_pipes()

{'summary': {'tok2vec': {'assigns': ['doc.tensor'],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'tagger': {'assigns': ['token.tag'],
   'requires': [],
   'scores': ['tag_acc'],
   'retokenizes': False},
  'parser': {'assigns': ['token.dep',
    'token.head',
    'token.is_sent_start',
    'doc.sents'],
   'requires': [],
   'scores': ['dep_uas',
    'dep_las',
    'dep_las_per_type',
    'sents_p',
    'sents_r',
    'sents_f'],
   'retokenizes': False},
  'attribute_ruler': {'assigns': [],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'lemmatizer': {'assigns': ['token.lemma'],
   'requires': [],
   'scores': ['lemma_acc'],
   'retokenizes': False},
  'ner': {'assigns': ['doc.ents', 'token.ent_iob', 'token.ent_type'],
   'requires': [],
   'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'],
   'retokenizes': False},
  'remove_gpe': {'assigns': [],
   'requires': [],
   'scores': [],
   'retokenizes': False}},
 'problems': {'tok2vec': [],
  

In [17]:
doc = nlp("Britain is a place. Mary is a doctor.")
for ent in doc.ents:
    print(ent.text, ent.label_)

Mary PERSON


# Saving your model to disk

The `nlp.to_disk("data/new_en_core_web_sm")` command saves your current spaCy model and its pipeline components to the specified directory, making it easy to persist and later reload the model for further use. 

This is particularly useful for deploying models, sharing them with others, or simply preserving your work.

In [18]:
nlp.to_disk("data/new_en_core_web_sm")