In [1]:
# Import and load the spacy model
import spacy
nlp=spacy.load("en_core_web_sm") 

# Getting the ner component
ner=nlp.get_pipe('ner')

In [21]:
# New label to add
LABEL = "FOOD"

# Training examples in the required format
TRAIN_DATA =[ ("Pizza is a common fast food.", {"entities": [(0, 5, "FOOD")]}),
              ("Pasta is an italian recipe", {"entities": [(0, 5, "FOOD")]}),
              ("China's noodles are very famous", {"entities": [(8,15, "FOOD")]}),
              ("Shrimps are famous in China too", {"entities": [(0,7, "FOOD")]}),
              ("Lasagna is another classic of Italy", {"entities": [(0,7, "FOOD")]}),
              ("Sushi and kougefdsjofs is extemely famous and expensive Japanese dish", {"entities": [(0,5, "FOOD"),(10,22, "FOOD")]}),
              ("Unagi is a famous seafood of Japan", {"entities": [(0,5, "FOOD")]}),
              ("Tempura , Soba are other famous dishes of Japan", {"entities": [(0,7, "FOOD")]}),
              ("Udon is a healthy type of noodles", {"entities": [(0,4, "ORG")]}),
              ("Chocolate soufflé is extremely famous french cuisine", {"entities": [(0,17, "FOOD")]}),
              ("Flamiche is french pastry", {"entities": [(0,8, "FOOD")]}),
              ("Burgers are the most commonly consumed fastfood", {"entities": [(0,7, "FOOD")]}),
              ("Frenchfries are considered too oily", {"entities": [(0,11, "FOOD")]})
           ]

In [22]:
# Add the new label to ner
ner.add_label(LABEL)

# Resume training
optimizer = nlp.resume_training()
move_names = list(ner.move_names)

# List of pipes you want to train
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]

# List of pipes which should remain unaffected in training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]

In [23]:
# Importing requirements
from spacy.util import minibatch, compounding
from spacy.training.example import Example
import random

# Begin training by disabling other pipeline components
with nlp.disable_pipes(*other_pipes) :

  sizes = compounding(1.0, 4.0, 1.001)
  # Training for 30 iterations     
  for itn in range(30):
    # shuffle examples before training
    random.shuffle(TRAIN_DATA)
    # batch up the examples using spaCy's minibatch
    batches = minibatch(TRAIN_DATA, size=sizes)
    # ictionary to store losses
    losses = {}
    for batch in batches:
        for text, annotations in batch:
            # create Example
            doc = nlp.make_doc(text)
            example = Example.from_dict(doc, annotations)
            # Update the model
            nlp.update([example], losses=losses, drop=0.5)
            print("Losses", losses)

Losses {'ner': 3.650657084887754e-07}
Losses {'ner': 3.6507662455295716e-07}
Losses {'ner': 3.6644789678293706e-07}
Losses {'ner': 1.2137943594505864}
Losses {'ner': 1.2137943637667201}
Losses {'ner': 1.2137943829998579}
Losses {'ner': 3.2137931190812594}
Losses {'ner': 3.2137931190858313}
Losses {'ner': 3.2137931190877684}
Losses {'ner': 3.28247000830861}
Losses {'ner': 3.282470012618467}
Losses {'ner': 3.282470150611731}
Losses {'ner': 3.282470150629006}
Losses {'ner': 3.0404648412112055e-10}
Losses {'ner': 3.925307365126699e-09}
Losses {'ner': 4.084106691064048e-09}
Losses {'ner': 2.1067670620316077e-08}
Losses {'ner': 2.5757029786412258e-08}
Losses {'ner': 2.701678506644333e-08}
Losses {'ner': 2.702041593003945e-08}
Losses {'ner': 1.5328647664980477}
Losses {'ner': 1.5332842456387767}
Losses {'ner': 1.53328428256892}
Losses {'ner': 1.5333230026367932}
Losses {'ner': 1.5333230026917246}
Losses {'ner': 1.5333230028126157}
Losses {'ner': 2.345343845283784e-07}
Losses {'ner': 2.3463870

In [24]:
# Testing the NER

test_text = "I ate kougefdsjof and kougefdsjofs and kougefds yesterday. they are a common fast food "
doc = nlp(test_text)
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

Entities [('kougefdsjofs', 'FOOD')]


In [6]:
# Output directory
from pathlib import Path
output_dir=Path('/content/')

# Saving the model to the output directory
if not output_dir.exists():
  output_dir.mkdir()
nlp.meta['name'] = 'my_ner'  # rename model
nlp.to_disk(output_dir)
print("Saved model to", output_dir)

# Loading the model from the directory
print("Loading from", output_dir)
nlp2 = spacy.load(output_dir)
assert nlp2.get_pipe("ner").move_names == move_names
doc2 = nlp2(' Dosa is an extremely famous south Indian dish')
for ent in doc2.ents:
  print(ent.label_, ent.text)

Saved model to \content
Loading from \content
FOOD Dosa
