Install Libraries

In [2]:
pip install spacy pandas scikit-learn





[notice] A new release of pip is available: 25.1.1 -> 26.0
[notice] To update, run: python.exe -m pip install --upgrade pip


Load Pretrained NER Model

In [3]:
import spacy

nlp = spacy.load("en_core_web_sm")

text = "Microsoft acquired LinkedIn for $26 billion"
doc = nlp(text)

for ent in doc.ents:
    print(ent.text, ent.label_)

Microsoft ORG
LinkedIn ORG
$26 billion MONEY


Preparing Labeled Dataset

In [4]:
TRAIN_DATA = [
    ("Apple is looking at buying U.K. startup", 
     {"entities":[(0,5,"ORG"),(27,30,"GPE")]}),
    ("Google launched Pixel phone", 
     {"entities":[(0,6,"ORG"),(15,20,"PRODUCT")]})
]

Train Custom NER Model

In [5]:
import random
import spacy
from spacy.training.example import Example

nlp = spacy.blank("en")
ner = nlp.add_pipe("ner")

for _, annotations in TRAIN_DATA:
    for ent in annotations["entities"]:
        ner.add_label(ent[2])

optimizer = nlp.begin_training()

for i in range(20):
    random.shuffle(TRAIN_DATA)
    losses = {}
    for text, annotations in TRAIN_DATA:
        example = Example.from_dict(nlp.make_doc(text), annotations)
        nlp.update([example], drop=0.2, losses=losses)
    print("Iteration", i, "Loss:", losses)



Iteration 0 Loss: {'ner': np.float32(8.448783)}
Iteration 1 Loss: {'ner': np.float32(8.096018)}
Iteration 2 Loss: {'ner': np.float32(7.566228)}
Iteration 3 Loss: {'ner': np.float32(6.468427)}
Iteration 4 Loss: {'ner': np.float32(5.4494004)}
Iteration 5 Loss: {'ner': np.float32(4.370829)}
Iteration 6 Loss: {'ner': np.float32(2.6672633)}
Iteration 7 Loss: {'ner': np.float32(2.667318)}
Iteration 8 Loss: {'ner': np.float32(2.3777966)}
Iteration 9 Loss: {'ner': np.float32(2.3713791)}
Iteration 10 Loss: {'ner': np.float32(1.5866959)}
Iteration 11 Loss: {'ner': np.float32(0.6882662)}
Iteration 12 Loss: {'ner': np.float32(0.41336906)}
Iteration 13 Loss: {'ner': np.float32(0.18595564)}
Iteration 14 Loss: {'ner': np.float32(0.08965218)}
Iteration 15 Loss: {'ner': np.float32(0.004574527)}
Iteration 16 Loss: {'ner': np.float32(0.0005029749)}
Iteration 17 Loss: {'ner': np.float32(0.00053766544)}
Iteration 18 Loss: {'ner': np.float32(3.547708e-05)}
Iteration 19 Loss: {'ner': np.float32(1.4510962e-06

Save and Load Model

In [6]:
nlp.to_disk("ner_model")
nlp = spacy.load("ner_model")

Testing Model

In [7]:
doc = nlp("Amazon opened a new office in Hyderabad")
for ent in doc.ents:
    print(ent.text, ent.label_)

Amazon ORG
new ORG


Compute Precision, Recall, F1, Accuracy

In [8]:
from sklearn.metrics import classification_report, accuracy_score

y_true = ["ORG","O","PRODUCT","O","GPE"]
y_pred = ["ORG","O","PRODUCT","O","ORG"]

print("Accuracy:", accuracy_score(y_true, y_pred))
print(classification_report(y_true, y_pred))

Accuracy: 0.8
              precision    recall  f1-score   support

         GPE       0.00      0.00      0.00         1
           O       1.00      1.00      1.00         2
         ORG       0.50      1.00      0.67         1
     PRODUCT       1.00      1.00      1.00         1

    accuracy                           0.80         5
   macro avg       0.62      0.75      0.67         5
weighted avg       0.70      0.80      0.73         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
