In [1]:
!pip install spacy
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m88.9 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [2]:
data = [
    ("Barack Obama was born in Hawaii.",
     [("Barack Obama", "PERSON"), ("Hawaii", "GPE")]),

    ("Apple Inc. is headquartered in California.",
     [("Apple Inc.", "ORG"), ("California", "GPE")]),

    ("Elon Musk is the CEO of Tesla.",
     [("Elon Musk", "PERSON"), ("Tesla", "ORG")])
]


In [4]:
import spacy
nlp = spacy.load('en_core_web_sm')

y_true = []
y_pred = []

for text, true_entities in data:
    doc = nlp(text)

    # True entities
    true_set = set(true_entities)

    # Predicted entities
    pred_set = set([(ent.text, ent.label_) for ent in doc.ents])

    y_true.append(true_set)
    y_pred.append(pred_set)

print("True Entities:", y_true)
print("Predicted Entities:", y_pred)


True Entities: [{('Hawaii', 'GPE'), ('Barack Obama', 'PERSON')}, {('Apple Inc.', 'ORG'), ('California', 'GPE')}, {('Elon Musk', 'PERSON'), ('Tesla', 'ORG')}]
Predicted Entities: [{('Hawaii', 'GPE'), ('Barack Obama', 'PERSON')}, {('Apple Inc.', 'ORG'), ('California', 'GPE')}, {('Elon Musk', 'PERSON'), ('Tesla', 'ORG')}]


In [5]:
TP = FP = FN = 0

for true, pred in zip(y_true, y_pred):
    TP += len(true & pred)
    FP += len(pred - true)
    FN += len(true - pred)

TP, FP, FN


(6, 0, 0)

In [7]:
Precision = TP / (TP + FP)
Recall    = TP / (TP + FN)
F1_score  = 2 * (Precision * Recall) / (Precision + Recall)
Accuracy  = TP / (TP + FP + FN)


In [8]:
precision = TP / (TP + FP) if (TP + FP) > 0 else 0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0
f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
accuracy = TP / (TP + FP + FN) if (TP + FP + FN) > 0 else 0

print("Accuracy :", accuracy)
print("Precision:", precision)
print("Recall   :", recall)
print("F1 Score :", f1_score)


Accuracy : 1.0
Precision: 1.0
Recall   : 1.0
F1 Score : 1.0
