In [1]:
# CADL3: Named Entity Recognition with displacy
import spacy
from spacy import displacy
import pandas as pd

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

# Example dataset: news headlines
news_text = """
Apple is planning to open a new campus in Bangalore next year.
Elon Musk gave a keynote at the Tesla conference in Berlin.
Amazon invested $4 billion in Anthropic, an AI startup.
"""

# Process text
doc = nlp(news_text)

# ---- 1. Display Named Entities in Text Output ----
print("🔹 Named Entities Found")
for ent in doc.ents:
    print(f"{ent.text:<20} --> {ent.label_}")

# ---- 2. Convert entities to DataFrame ----
ner_data = [(ent.text, ent.label_) for ent in doc.ents]
df = pd.DataFrame(ner_data, columns=["Entity", "Label"])

print("\nStructured DataFrame:\n")
print(df)

# ---- 3. Visualize Named Entities using displacy ----
# 'style="ent"' is used to highlight entities
print("\n🔹 Visualizing Named Entities...\n")
displacy.render(doc, style="ent", jupyter=True)


🔹 Named Entities Found
Apple                --> ORG
Bangalore            --> GPE
next year            --> DATE
Elon Musk            --> PERSON
Tesla                --> NORP
Berlin               --> GPE
Amazon               --> ORG
$4 billion           --> MONEY
Anthropic            --> GPE
AI                   --> GPE

Structured DataFrame:

       Entity   Label
0       Apple     ORG
1   Bangalore     GPE
2   next year    DATE
3   Elon Musk  PERSON
4       Tesla    NORP
5      Berlin     GPE
6      Amazon     ORG
7  $4 billion   MONEY
8   Anthropic     GPE
9          AI     GPE

🔹 Visualizing Named Entities...

