In [8]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [10]:
import nltk
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


True

In [12]:
import nltk
nltk.download('maxent_ne_chunker_tab')

[nltk_data] Downloading package maxent_ne_chunker_tab to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker_tab.zip.


True

In [6]:
import nltk
nltk.download('words')

[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [17]:

!pip install nltk spacy tabulate

import nltk
import spacy
from nltk import word_tokenize, pos_tag, ne_chunk
from nltk.tree import Tree
from tabulate import tabulate
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')


text = '''alan musk said, "The mission of xAI and Grok is to understand the universe.
We want to answer the biggest questions: Where are the aliens? What’s the meaning of life?
How does the universe end? To do that, we must rigorously pursue truth.”'''


tokens = word_tokenize(text)
pos_tags = pos_tag(tokens)
ne_tree = ne_chunk(pos_tags)


nltk_entities = []
for subtree in ne_tree:
    if isinstance(subtree, Tree):
        entity_name = " ".join([token for token, pos in subtree.leaves()])
        entity_type = subtree.label()
        nltk_entities.append([entity_name, entity_type, 'NLTK'])


print("NLTK Named Entity Parse Tree:")
ne_tree.pretty_print()


nlp = spacy.load("en_core_web_sm")
doc = nlp(text)

spacy_entities = []
for ent in doc.ents:
    spacy_entities.append([ent.text, ent.label_, 'spaCy'])


all_entities = nltk_entities + spacy_entities
headers = ["Entity", "Label", "Library"]
print("\nNamed Entities (Tabular Form):")
print(tabulate(all_entities, headers=headers, tablefmt="grid"))


ground_truth = set([
    ('alan', 'PERSON'),
    ('musk', 'ORG'),
    ('Grok', 'ORG')
])


predicted = set([(e[0], e[1]) for e in all_entities])


TP = len(ground_truth & predicted)
FP = len(predicted - ground_truth)
FN = len(ground_truth - predicted)

precision = TP / (TP + FP) if TP + FP != 0 else 0
recall = TP / (TP + FN) if TP + FN != 0 else 0
f1_score = 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0

print("\nNER Evaluation Metrics:")
metrics_table = [["Precision", round(precision, 2)],
                 ["Recall", round(recall, 2)],
                 ["F1 Score", round(f1_score, 2)]]
print(tabulate(metrics_table, headers=["Metric", "Value"], tablefmt="grid"))


report = """
Common Named Entity Types:
- PERSON: People, including fictional.
- ORG: Companies, agencies, institutions, etc.
- GPE: Countries, cities, states.
- LOC: Non-GPE locations, mountain ranges, bodies of water.
- DATE: Absolute or relative dates or periods.
- TIME: Times smaller than a day.
"""
print(report)




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


NLTK Named Entity Parse Tree:
                                                                                                                                                                                                                             S                                                                                                                                                                                                               
    _________________________________________________________________________________________________________________________________________________________________________________________________________________________|__________________________________________________________________________________________________________________________________________________________________________________________________________      
   |       |       |      |    |     |        |        |     |      |      |         |         |         |   