In [None]:
%pip install -q spacy scikit-learn
!python -m spacy download en_core_web_sm

In [3]:
import spacy
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [4]:
nlp = spacy.load("en_core_web_sm")

In [5]:
texts = [
    "Apple is looking to buy a startup in the United Kingdom.",
    "Elon Musk is the CEO of Tesla and SpaceX.",
    "Google was founded in California."
]

**performing NER**

In [6]:
predicted_entities = []

for text in texts:
    doc = nlp(text)
    for ent in doc.ents:
        predicted_entities.append((ent.text, ent.label_))

print(predicted_entities)

[('Apple', 'ORG'), ('the United Kingdom', 'GPE'), ('Elon Musk', 'PERSON'), ('Tesla', 'ORG'), ('Google', 'ORG'), ('California', 'GPE')]


**Ground Truth Labels**

In [7]:
true_entities = [
    ('Apple', 'ORG'), ('United Kingdom', 'GPE'),
    ('Elon Musk', 'PERSON'), ('Tesla', 'ORG'), ('SpaceX', 'ORG'),
    ('Google', 'ORG'), ('California', 'GPE')
]

**Convert to Label Lists for Evaluation**

In [27]:
common_entities = set([ent[0] for ent in true_entities]) & set([ent[0] for ent in predicted_entities])
filtered_true_entities = [entity for entity in true_entities if entity[0] in common_entities]
filtered_predicted_entities = [entity for entity in predicted_entities if entity[0] in common_entities]
print("Filtered True Entities:", filtered_true_entities)
print("Filtered Predicted Entities:", filtered_predicted_entities)
y_true = [label for _, label in filtered_true_entities]
y_pred = [label for _, label in filtered_predicted_entities]

Filtered True Entities: [('Apple', 'ORG'), ('Elon Musk', 'PERSON'), ('Tesla', 'ORG'), ('Google', 'ORG'), ('California', 'GPE')]
Filtered Predicted Entities: [('Apple', 'ORG'), ('Elon Musk', 'PERSON'), ('Tesla', 'ORG'), ('Google', 'ORG'), ('California', 'GPE')]


**evaluation metrics**

In [30]:
from sklearn.metrics import classification_report, accuracy_score

accuracy = accuracy_score(y_true, y_pred)

print("Accuracy:", accuracy)
print("\nClassification Report:\n")
print(classification_report(y_true, y_pred))

Accuracy: 1.0

Classification Report:

              precision    recall  f1-score   support

         GPE       1.00      1.00      1.00         1
         ORG       1.00      1.00      1.00         3
      PERSON       1.00      1.00      1.00         1

    accuracy                           1.00         5
   macro avg       1.00      1.00      1.00         5
weighted avg       1.00      1.00      1.00         5



In [31]:
import pandas as pd

df_entities = pd.DataFrame(predicted_entities, columns=["Entity", "Label"])
df_entities.to_csv("ner_entities.csv", index=False)

In [32]:
metrics_df = pd.DataFrame({
    "Accuracy": [accuracy],
    "Precision": [precision],
    "Recall": [recall],
    "F1-score": [f1]
})

metrics_df.to_csv("ner_metrics.csv", index=False)