In [1]:
#!pip install spacy

In [1]:
import spacy
from spacy.training.example import Example
from spacy.util import minibatch, compounding
import random
import json




In [2]:
# Load training data from a JSON file
with open('annotations.json', 'r') as file:
    TRAIN_DATA = json.load(file)
print(type(TRAIN_DATA))
print(type(TRAIN_DATA[0]))
# Prepare the training data for spaCy
spacy_train_data = []
for item in TRAIN_DATA:
    text = item['annotations']
    entities = [(start, end, label) for start, end, label in item['entities']]
    spacy_train_data.append((text, {"entities": entities}))



<class 'list'>
<class 'dict'>


In [None]:
# Create a blank model
nlp = spacy.blank("en")

# Create the NER component and add it to the pipeline
if "ner" not in nlp.pipe_names:
    ner = nlp.add_pipe("ner")
else:
    ner = nlp.get_pipe("ner")

# Add labels to the NER component
for _, annotations in spacy_train_data:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])



In [None]:
# Start the training
nlp.begin_training()

# Loop for 30 iterations
for itn in range(30):
    losses = {}
    # Shuffle the training data
    random.shuffle(spacy_train_data)
    # Create batches
    batches = minibatch(spacy_train_data, size=compounding(4.0, 32.0, 1.001))
    # Update the model
    for batch in batches:
        for text, annotations in batch:
            doc = nlp.make_doc(text)
            example = Example.from_dict(doc, annotations)
            nlp.update([example], drop=0.5, losses=losses)
    print(f"Iteration {itn}, Losses: {losses}")



In [None]:
# Save the model to a directory
nlp.to_disk("custom_ner_model")

# Load the saved model
nlp = spacy.load("custom_ner_model")



In [None]:
# Test the model
test_text = ""
doc = nlp(test_text)

for ent in doc.ents:
    print(ent.text, ent.label_)