In [7]:
import spacy
from spacy.training import Example
import random

# Step 1: Load the pre-existing trained model
nlp = spacy.load('custom_ner_model')  # Load your custom NER model


# Step 3: Add new labels to the NER component (if necessary)
ner = nlp.get_pipe("ner")
for _, annotations in train_data:
    for ent in annotations["entities"]:
        if ent[2] not in ner.labels:
            ner.add_label(ent[2])

# Step 4 (Optional): Freeze other components if you want to only train the NER component
# Freeze all other components (e.g., tagger, parser) so that only the NER is trained
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
with nlp.disable_pipes(*other_pipes):
    optimizer = nlp.resume_training()  # Use resume_training to continue training

    # Step 5: Fine-tune the model on the new data
    for epoch in range(50):  # Set the number of epochs
        random.shuffle(train_data)
        losses = {}
        batches = spacy.util.minibatch(train_data, size=2)  # Adjust batch size based on your dataset
        
        for batch in batches:
            examples = []
            for text, annotations in batch:
                doc = nlp.make_doc(text)
                example = Example.from_dict(doc, annotations)
                examples.append(example)
            # Fine-tune the model
            nlp.update(examples, drop=0.2, losses=losses, sgd=optimizer)

        print(f"Epoch {epoch + 1}, Losses: {losses}")

# Step 6: Save the updated model
nlp.to_disk('updated_custom_ner_model')

# Test the updated model
test_texts = [
    "Call me at 123-456-7890.",
    "The total payment is $100.50."
]

trained_nlp = spacy.load('updated_custom_ner_model')
for text in test_texts:
    doc = trained_nlp(text)
    print(f"Text: {text}")
    print("Entities:", [(ent.text, ent.label_) for ent in doc.ents])
    print()




Epoch 1, Losses: {'ner': 45.81808793274505}
Epoch 2, Losses: {'ner': 16.295298339279757}
Epoch 3, Losses: {'ner': 14.59564108731989}
Epoch 4, Losses: {'ner': 20.008329536300707}
Epoch 5, Losses: {'ner': 40.954626914526976}
Epoch 6, Losses: {'ner': 17.06036545567365}
Epoch 7, Losses: {'ner': 24.529566698906024}
Epoch 8, Losses: {'ner': 22.803412675670533}
Epoch 9, Losses: {'ner': 15.335315185953514}
Epoch 10, Losses: {'ner': 22.45877991623957}
Epoch 11, Losses: {'ner': 19.38217812605407}
Epoch 12, Losses: {'ner': 15.769239747748744}
Epoch 13, Losses: {'ner': 21.517595382808114}
Epoch 14, Losses: {'ner': 18.699779454294166}
Epoch 15, Losses: {'ner': 10.570956973972024}
Epoch 16, Losses: {'ner': 13.295246402103302}
Epoch 17, Losses: {'ner': 13.782807103687897}
Epoch 18, Losses: {'ner': 20.852216237982027}
Epoch 19, Losses: {'ner': 9.760146121430608}
Epoch 20, Losses: {'ner': 14.227844892181428}
Epoch 21, Losses: {'ner': 11.023349606694024}
Epoch 22, Losses: {'ner': 12.166356355069562}
Epo

In [17]:
# Test your model
nlp = spacy.load("custom_ner_model_v2")
texts = [
    "My phone number is 123-456-7890. robinrose@example.net",
    "I paid $150.75 for groceries. 4444555566668888 ABC123"
    "The card number 4123456789012345 is used.",
    "I have paid $1,000.99.",
    "Her phone number is +44 20 7946 0958.",
    "Please email sales@company.com.",
    "You owe $150.00.",
    "Contact support at +18005551212.",
    "The invoice number is N987654321.",
]

for text in texts:
    doc = nlp(text)
    print(f"Text: {text}")
    print("Entities:", [(ent.text, ent.label_) for ent in doc.ents])

Text: My phone number is 123-456-7890. robinrose@example.net
Entities: [('robinrose@example.net', 'EMAIL')]
Text: I paid $150.75 for groceries. 4444555566668888 ABC123The card number 4123456789012345 is used.
Entities: []
Text: I have paid $1,000.99.
Entities: []
Text: Her phone number is +44 20 7946 0958.
Entities: [('20 7946 0958', 'CARD_NUMBER')]
Text: Please email sales@company.com.
Entities: [('sales@company.com', 'EMAIL')]
Text: You owe $150.00.
Entities: []
Text: Contact support at +18005551212.
Entities: []
Text: The invoice number is N987654321.
Entities: []
