In [1]:
import json
import spacy

In [2]:
# load the training data
with open('data.spacy') as fp:
  training_data = json.load(fp)

In [3]:
training_data

{'classes': ['HEALTH',
  'CRIME',
  'HYGIENE',
  'EDUCATION',
  'TRANSPORT',
  'NEUTRAL'],
 'annotations': [['Samantha, a 60-year-old woman with diabetes, is walking in the park when she suddenly trips and falls, injuring her arm and leg. She also has a deep cut on her arm that is bleeding heavily. A bystander calls an ambulance, and Samantha is taken to the hospital.\r',
   {'entities': [[35, 43, 'HEALTH'],
     [48, 55, 'NEUTRAL'],
     [77, 85, 'NEUTRAL'],
     [96, 101, 'HEALTH'],
     [103, 111, 'HEALTH'],
     [116, 119, 'HEALTH'],
     [124, 128, 'HEALTH'],
     [144, 152, 'CRIME'],
     [164, 168, 'NEUTRAL'],
     [172, 180, 'CRIME'],
     [181, 189, 'NEUTRAL'],
     [192, 201, 'NEUTRAL'],
     [202, 207, 'NEUTRAL'],
     [211, 220, 'HEALTH'],
     [238, 243, 'TRANSPORT'],
     [251, 259, 'HEALTH']]}],
  ['\r', {'entities': []}],
  ["At the hospital, healthcare professionals assess Samantha's injuries and provide immediate treatment for the bleeding. They also diagnose her arm 

In [5]:
# prepare an empty model to train
nlp = spacy.blank('en')
nlp.vocab.vectors.name = 'DATA'
ner = nlp.add_pipe('ner')

In [7]:
# Add the custom NER Tags as entities into the model
for label in training_data["classes"]:
  ner.add_label(label)

In [8]:
# Train the model
optimizer = nlp.begin_training()

In [9]:
for text, annotations in training_data["annotations"]:
    if len(text) > 0:
        nlp.update([text], [annotations], sgd=optimizer)

ValueError: [E989] `nlp.update()` was called with two positional arguments. This may be due to a backwards-incompatible change to the format of the training data in spaCy 3.0 onwards. The 'update' function should now be called with a batch of Example objects, instead of `(text, annotation)` tuples. 

In [15]:
import random
import spacy
from spacy.util import minibatch, compounding
from spacy.training.example import Example

# convert the data into spaCy format
examples = []
for text, annotations in training_data["annotations"]:
    if len(text) > 0:
        examples.append(Example.from_dict(nlp.make_doc(text), annotations))

# train the NER model
for i in range(50):
    random.shuffle(examples)
    for batch in minibatch(examples, size=16):
        nlp.update(batch, sgd=optimizer)


In [16]:
training_data["annotations"]

[['Samantha, a 60-year-old woman with diabetes, is walking in the park when she suddenly trips and falls, injuring her arm and leg. She also has a deep cut on her arm that is bleeding heavily. A bystander calls an ambulance, and Samantha is taken to the hospital.\r',
  {'entities': [[35, 43, 'HEALTH'],
    [48, 55, 'NEUTRAL'],
    [77, 85, 'NEUTRAL'],
    [96, 101, 'HEALTH'],
    [103, 111, 'HEALTH'],
    [116, 119, 'HEALTH'],
    [124, 128, 'HEALTH'],
    [144, 152, 'CRIME'],
    [164, 168, 'NEUTRAL'],
    [172, 180, 'CRIME'],
    [181, 189, 'NEUTRAL'],
    [192, 201, 'NEUTRAL'],
    [202, 207, 'NEUTRAL'],
    [211, 220, 'HEALTH'],
    [238, 243, 'TRANSPORT'],
    [251, 259, 'HEALTH']]}],
 ['\r', {'entities': []}],
 ["At the hospital, healthcare professionals assess Samantha's injuries and provide immediate treatment for the bleeding. They also diagnose her arm and leg injuries, and provide pain relief and medication for her diabetes. Samantha undergoes surgery to repair her broken bo

In [None]:
! python3 -m spacy download en_core_web_sm

Python was not found; run without arguments to install from the Microsoft Store, or disable this shortcut from Settings > Manage App Execution Aliases.


In [25]:
import en_core_web_sm

nlp = en_core_web_sm.load()

In [28]:
import spacy 
import random
nlp = spacy.load('en_core_web_sm')

In [36]:
nlp.entity.add_label('CELEBRITY')
TRAIN_DATA = [
        (u"Modi", {"entities": [(0, 4, "CELEBRITY")]})]

optimizer = nlp.begin_training()
for i in range(20):
    random.shuffle(TRAIN_DATA)
    for text, annotations in TRAIN_DATA:
        nlp.update([text], [annotations],drop=0.3, sgd=optimizer)


text = "But Modi is starting India. The company made a late push\ninto hardware, and Apple’s Siri and Google available on iPhones, and Amazon’s Alexa\nsoftware, which runs on its Echo and Dot devices, have clear leads in\nconsumer adoption."
doc = nlp(text)
for ent in doc.ents:
    print(ent.text,ent.label_)

AttributeError: 'English' object has no attribute 'entity'

In [31]:
optimizer = nlp.begin_training()
for i in range(20):
    random.shuffle(training_data["annotations"])
    for text, annotations in training_data["annotations"]:
        nlp.update([text], [annotations],drop=0.3, sgd=optimizer

SyntaxError: unexpected EOF while parsing (1423188335.py, line 5)

In [32]:
for text, annotations in training_data["annotations"]:
    print(len(text),len([annotations]))

261 1
1 1
427 1
1 1
328 1
1 1
322 1
1 1
231 1
1 1
466 1
1 1
266 1
1 1
222 1
1 1
347 1
1 1
262 1
1 1
293 1
1 1
258 1
1 1
329 1
1 1
310 1
1 1
304 1
1 1
371 1
1 1
270 1
1 1
250 1
1 1
274 1
1 1
264 1
1 1
330 1
1 1
311 1
1 1
270 1
1 1
339 1
1 1
335 1
1 1
248 1
1 1
297 1
297 1


In [33]:
text = "I will kill you."

In [35]:
doc = nlp(text)
for ent in doc.ents:
    print(ent.text,ent.label_)